Update modeling_minicpmv.py
Browse files- modeling_minicpmv.py +24 -4
modeling_minicpmv.py
CHANGED
|
@@ -197,17 +197,27 @@ class MiniCPMV(MiniCPMVPreTrainedModel):
|
|
| 197 |
result_text.append(tokenizer.decode(result).strip())
|
| 198 |
return result_text
|
| 199 |
|
| 200 |
-
def _decode(self, inputs_embeds, tokenizer, decode_text=False, **kwargs):
|
| 201 |
terminators = [
|
| 202 |
tokenizer.eos_token_id,
|
| 203 |
tokenizer.convert_tokens_to_ids("<|eot_id|>")
|
| 204 |
]
|
| 205 |
-
output =
|
|
|
|
|
|
|
| 206 |
inputs_embeds=inputs_embeds,
|
|
|
|
| 207 |
pad_token_id=0,
|
| 208 |
eos_token_id=terminators,
|
| 209 |
**kwargs
|
| 210 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
if decode_text:
|
| 212 |
return self._decode_text(output, tokenizer)
|
| 213 |
return output
|
|
@@ -277,6 +287,16 @@ class MiniCPMV(MiniCPMVPreTrainedModel):
|
|
| 277 |
max_x = max(tensor.shape[1] for tensor in batch)
|
| 278 |
|
| 279 |
# Step 2: Automatically pad each tensor to have the same length (L) in the last dimension
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 280 |
padded_tensors = [torch.nn.functional.pad(tensor, (0, 0, 0, max_x - tensor.shape[1])) for tensor in batch]
|
| 281 |
|
| 282 |
# Step 3: Stack the padded tensors into a single batch
|
|
@@ -289,7 +309,7 @@ class MiniCPMV(MiniCPMVPreTrainedModel):
|
|
| 289 |
kwargs.pop("decode_text")
|
| 290 |
result = self._decode_stream(batch, tokenizer, **kwargs)
|
| 291 |
else:
|
| 292 |
-
result = self._decode(batch, tokenizer, **kwargs)
|
| 293 |
|
| 294 |
return result
|
| 295 |
|
|
|
|
| 197 |
result_text.append(tokenizer.decode(result).strip())
|
| 198 |
return result_text
|
| 199 |
|
| 200 |
+
def _decode(self, inputs_embeds, tokenizer, attention_mask=None, decode_text=False, **kwargs):
|
| 201 |
terminators = [
|
| 202 |
tokenizer.eos_token_id,
|
| 203 |
tokenizer.convert_tokens_to_ids("<|eot_id|>")
|
| 204 |
]
|
| 205 |
+
output = None
|
| 206 |
+
if (attention_mask != None):
|
| 207 |
+
output = self.llm.generate(
|
| 208 |
inputs_embeds=inputs_embeds,
|
| 209 |
+
attention_mask=attention_mask,
|
| 210 |
pad_token_id=0,
|
| 211 |
eos_token_id=terminators,
|
| 212 |
**kwargs
|
| 213 |
+
)
|
| 214 |
+
else:
|
| 215 |
+
output = self.llm.generate(
|
| 216 |
+
inputs_embeds=inputs_embeds,
|
| 217 |
+
pad_token_id=0,
|
| 218 |
+
eos_token_id=terminators,
|
| 219 |
+
**kwargs
|
| 220 |
+
)
|
| 221 |
if decode_text:
|
| 222 |
return self._decode_text(output, tokenizer)
|
| 223 |
return output
|
|
|
|
| 287 |
max_x = max(tensor.shape[1] for tensor in batch)
|
| 288 |
|
| 289 |
# Step 2: Automatically pad each tensor to have the same length (L) in the last dimension
|
| 290 |
+
attention_mask = []
|
| 291 |
+
for tensor in batch:
|
| 292 |
+
to_add = []
|
| 293 |
+
for pl in range(tensor.shape[1]):
|
| 294 |
+
to_add.append(1)
|
| 295 |
+
for pl in range(tensor.shape[1], max_x):
|
| 296 |
+
to_add.append(0)
|
| 297 |
+
attention_mask.append(to_add)
|
| 298 |
+
attention_mask = torch.tensor(attention_mask)
|
| 299 |
+
|
| 300 |
padded_tensors = [torch.nn.functional.pad(tensor, (0, 0, 0, max_x - tensor.shape[1])) for tensor in batch]
|
| 301 |
|
| 302 |
# Step 3: Stack the padded tensors into a single batch
|
|
|
|
| 309 |
kwargs.pop("decode_text")
|
| 310 |
result = self._decode_stream(batch, tokenizer, **kwargs)
|
| 311 |
else:
|
| 312 |
+
result = self._decode(batch, tokenizer, attention_mask=attention_mask, **kwargs)
|
| 313 |
|
| 314 |
return result
|
| 315 |
|