错误记录
Token indices sequence length is longer than the specified maximum sequence length for this model (24 > 16). Running this sequence through the model will result in indexing errors
test_web_vision.py:28 (test_image)
def test_image():
driver = webdriver.Chrome()
driver.get('https://www.baidu.com')
driver.maximize_window()
image_driver = ImageDriver(driver)
search_input = image_driver.find_by_labels('搜索框')
search_input.send_keys("hogwarts")
driver.save_screenshot('./输入结果.png')
> search_button = image_driver.find_by_labels(['百度一下搜索按钮'])
test_web_vision.py:40:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
image_driver.py:49: in find_by_labels
predictions = object_detector(
..\..\..\.venv\visual_ai\lib\site-packages\transformers\pipelines\zero_shot_object_detection.py:135: in __call__
results = super().__call__(inputs, **kwargs)
..\..\..\.venv\visual_ai\lib\site-packages\transformers\pipelines\base.py:1198: in __call__
return next(
..\..\..\.venv\visual_ai\lib\site-packages\transformers\pipelines\pt_utils.py:124: in __next__
item = next(self.iterator)
..\..\..\.venv\visual_ai\lib\site-packages\transformers\pipelines\pt_utils.py:266: in __next__
processed = self.infer(next(self.iterator), **self.params)
..\..\..\.venv\visual_ai\lib\site-packages\transformers\pipelines\base.py:1112: in forward
model_outputs = self._forward(model_inputs, **forward_params)
..\..\..\.venv\visual_ai\lib\site-packages\transformers\pipelines\zero_shot_object_detection.py:172: in _forward
outputs = self.model(**model_inputs)
..\..\..\.venv\visual_ai\lib\site-packages\torch\nn\modules\module.py:1511: in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
..\..\..\.venv\visual_ai\lib\site-packages\torch\nn\modules\module.py:1520: in _call_impl
return forward_call(*args, **kwargs)
..\..\..\.venv\visual_ai\lib\site-packages\transformers\models\owlv2\modeling_owlv2.py:1726: in forward
query_embeds, feature_map, outputs = self.image_text_embedder(
..\..\..\.venv\visual_ai\lib\site-packages\transformers\models\owlv2\modeling_owlv2.py:1424: in image_text_embedder
outputs = self.owlv2(
..\..\..\.venv\visual_ai\lib\site-packages\torch\nn\modules\module.py:1511: in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
..\..\..\.venv\visual_ai\lib\site-packages\torch\nn\modules\module.py:1520: in _call_impl
return forward_call(*args, **kwargs)
..\..\..\.venv\visual_ai\lib\site-packages\transformers\models\owlv2\modeling_owlv2.py:1177: in forward
text_outputs = self.text_model(
..\..\..\.venv\visual_ai\lib\site-packages\torch\nn\modules\module.py:1511: in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
..\..\..\.venv\visual_ai\lib\site-packages\torch\nn\modules\module.py:1520: in _call_impl
return forward_call(*args, **kwargs)
..\..\..\.venv\visual_ai\lib\site-packages\transformers\models\owlv2\modeling_owlv2.py:808: in forward
hidden_states = self.embeddings(input_ids=input_ids, position_ids=position_ids)
..\..\..\.venv\visual_ai\lib\site-packages\torch\nn\modules\module.py:1511: in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
..\..\..\.venv\visual_ai\lib\site-packages\torch\nn\modules\module.py:1520: in _call_impl
return forward_call(*args, **kwargs)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = Owlv2TextEmbeddings(
(token_embedding): Embedding(49408, 512)
(position_embedding): Embedding(16, 512)
)
input_ids = tensor([[49406, 163, 247, 122, 161, 118, 99, 19759, 222, 19759,
233, 162, 238, 250, 163, 112, 95, 162, 234, 231,
165, 240, 362, 49407]])
position_ids = tensor([[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]])
inputs_embeds = tensor([[[ 0.0004, 0.0005, -0.0039, ..., 0.0001, 0.0024, 0.0052],
[-0.0061, -0.0062, 0.0115, ..., -0.0...19, 0.0051, ..., -0.0189, 0.0023, 0.0012],
[ 0.0032, -0.0076, -0.0188, ..., -0.0006, 0.0070, 0.0028]]])
def forward(
self,
input_ids: Optional[torch.LongTensor] = None,
position_ids: Optional[torch.LongTensor] = None,
inputs_embeds: Optional[torch.FloatTensor] = None,
) -> torch.Tensor:
seq_length = input_ids.shape[-1] if input_ids is not None else inputs_embeds.shape[-2]
if position_ids is None:
position_ids = self.position_ids[:, :seq_length]
if inputs_embeds is None:
inputs_embeds = self.token_embedding(input_ids)
position_embeddings = self.position_embedding(position_ids)
> embeddings = inputs_embeds + position_embeddings
E RuntimeError: The size of tensor a (24) must match the size of tensor b (16) at non-singleton dimension 1
..\..\..\.venv\visual_ai\lib\site-packages\transformers\models\owlv2\modeling_owlv2.py:338: RuntimeError
============================= 1 failed in 49.10s ==============================
Process finished with exit code 1
解决办法
缩短提示词的长度