Update processing_minicpmv.py
Browse files- processing_minicpmv.py +8 -0
processing_minicpmv.py
CHANGED
|
@@ -164,6 +164,14 @@ class MiniCPMVProcessor(ProcessorMixin):
|
|
| 164 |
return input_ids.unsqueeze(0), image_bounds
|
| 165 |
|
| 166 |
def _convert_images_texts_to_inputs(self, images, texts, do_pad=False, truncation=None, max_length=None, return_tensors=None):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
if not len(images):
|
| 168 |
model_inputs = self.tokenizer(texts, return_tensors=return_tensors, padding=do_pad, truncation=truncation, max_length=max_length)
|
| 169 |
return MiniCPMVBatchFeature(data={**model_inputs})
|
|
|
|
| 164 |
return input_ids.unsqueeze(0), image_bounds
|
| 165 |
|
| 166 |
def _convert_images_texts_to_inputs(self, images, texts, do_pad=False, truncation=None, max_length=None, return_tensors=None):
|
| 167 |
+
assert len(images) == len(texts)
|
| 168 |
+
batch = []
|
| 169 |
+
for ind in range(len(images)):
|
| 170 |
+
result = _convert_images_texts_to_inputs2(self, images[ind], texts[ind], do_pad, truncation, max_length, return_tensors)
|
| 171 |
+
batch.append(result)
|
| 172 |
+
return batch
|
| 173 |
+
|
| 174 |
+
def _convert_images_texts_to_inputs2(self, images, texts, do_pad=False, truncation=None, max_length=None, return_tensors=None):
|
| 175 |
if not len(images):
|
| 176 |
model_inputs = self.tokenizer(texts, return_tensors=return_tensors, padding=do_pad, truncation=truncation, max_length=max_length)
|
| 177 |
return MiniCPMVBatchFeature(data={**model_inputs})
|