transformers | 张量不匹配问题

错误记录

Token indices sequence length is longer than the specified maximum sequence length for this model (24 > 16). Running this sequence through the model will result in indexing errors

test_web_vision.py:28 (test_image)
def test_image():
        driver = webdriver.Chrome()
        driver.get('https://www.baidu.com')
        driver.maximize_window()
        image_driver = ImageDriver(driver)
        search_input = image_driver.find_by_labels('搜索框')
        search_input.send_keys("hogwarts")
        driver.save_screenshot('./输入结果.png')
>       search_button = image_driver.find_by_labels(['百度一下搜索按钮'])

test_web_vision.py:40: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
image_driver.py:49: in find_by_labels
    predictions = object_detector(
..\..\..\.venv\visual_ai\lib\site-packages\transformers\pipelines\zero_shot_object_detection.py:135: in __call__
    results = super().__call__(inputs, **kwargs)
..\..\..\.venv\visual_ai\lib\site-packages\transformers\pipelines\base.py:1198: in __call__
    return next(
..\..\..\.venv\visual_ai\lib\site-packages\transformers\pipelines\pt_utils.py:124: in __next__
    item = next(self.iterator)
..\..\..\.venv\visual_ai\lib\site-packages\transformers\pipelines\pt_utils.py:266: in __next__
    processed = self.infer(next(self.iterator), **self.params)
..\..\..\.venv\visual_ai\lib\site-packages\transformers\pipelines\base.py:1112: in forward
    model_outputs = self._forward(model_inputs, **forward_params)
..\..\..\.venv\visual_ai\lib\site-packages\transformers\pipelines\zero_shot_object_detection.py:172: in _forward
    outputs = self.model(**model_inputs)
..\..\..\.venv\visual_ai\lib\site-packages\torch\nn\modules\module.py:1511: in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
..\..\..\.venv\visual_ai\lib\site-packages\torch\nn\modules\module.py:1520: in _call_impl
    return forward_call(*args, **kwargs)
..\..\..\.venv\visual_ai\lib\site-packages\transformers\models\owlv2\modeling_owlv2.py:1726: in forward
    query_embeds, feature_map, outputs = self.image_text_embedder(
..\..\..\.venv\visual_ai\lib\site-packages\transformers\models\owlv2\modeling_owlv2.py:1424: in image_text_embedder
    outputs = self.owlv2(
..\..\..\.venv\visual_ai\lib\site-packages\torch\nn\modules\module.py:1511: in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
..\..\..\.venv\visual_ai\lib\site-packages\torch\nn\modules\module.py:1520: in _call_impl
    return forward_call(*args, **kwargs)
..\..\..\.venv\visual_ai\lib\site-packages\transformers\models\owlv2\modeling_owlv2.py:1177: in forward
    text_outputs = self.text_model(
..\..\..\.venv\visual_ai\lib\site-packages\torch\nn\modules\module.py:1511: in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
..\..\..\.venv\visual_ai\lib\site-packages\torch\nn\modules\module.py:1520: in _call_impl
    return forward_call(*args, **kwargs)
..\..\..\.venv\visual_ai\lib\site-packages\transformers\models\owlv2\modeling_owlv2.py:808: in forward
    hidden_states = self.embeddings(input_ids=input_ids, position_ids=position_ids)
..\..\..\.venv\visual_ai\lib\site-packages\torch\nn\modules\module.py:1511: in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
..\..\..\.venv\visual_ai\lib\site-packages\torch\nn\modules\module.py:1520: in _call_impl
    return forward_call(*args, **kwargs)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

self = Owlv2TextEmbeddings(
  (token_embedding): Embedding(49408, 512)
  (position_embedding): Embedding(16, 512)
)
input_ids = tensor([[49406,   163,   247,   122,   161,   118,    99, 19759,   222, 19759,
           233,   162,   238,   250,   163,   112,    95,   162,   234,   231,
           165,   240,   362, 49407]])
position_ids = tensor([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15]])
inputs_embeds = tensor([[[ 0.0004,  0.0005, -0.0039,  ...,  0.0001,  0.0024,  0.0052],
         [-0.0061, -0.0062,  0.0115,  ..., -0.0...19,  0.0051,  ..., -0.0189,  0.0023,  0.0012],
         [ 0.0032, -0.0076, -0.0188,  ..., -0.0006,  0.0070,  0.0028]]])

    def forward(
        self,
        input_ids: Optional[torch.LongTensor] = None,
        position_ids: Optional[torch.LongTensor] = None,
        inputs_embeds: Optional[torch.FloatTensor] = None,
    ) -> torch.Tensor:
        seq_length = input_ids.shape[-1] if input_ids is not None else inputs_embeds.shape[-2]
    
        if position_ids is None:
            position_ids = self.position_ids[:, :seq_length]
    
        if inputs_embeds is None:
            inputs_embeds = self.token_embedding(input_ids)
    
        position_embeddings = self.position_embedding(position_ids)
>       embeddings = inputs_embeds + position_embeddings
E       RuntimeError: The size of tensor a (24) must match the size of tensor b (16) at non-singleton dimension 1

..\..\..\.venv\visual_ai\lib\site-packages\transformers\models\owlv2\modeling_owlv2.py:338: RuntimeError


============================= 1 failed in 49.10s ==============================

Process finished with exit code 1

解决办法

缩短提示词的长度