andrewqian123 commited on
Commit
34b9361
·
verified ·
1 Parent(s): 609abef

Update modeling_minicpmv.py

Browse files
Files changed (1) hide show
  1. modeling_minicpmv.py +24 -4
modeling_minicpmv.py CHANGED
@@ -197,17 +197,27 @@ class MiniCPMV(MiniCPMVPreTrainedModel):
197
  result_text.append(tokenizer.decode(result).strip())
198
  return result_text
199
 
200
- def _decode(self, inputs_embeds, tokenizer, decode_text=False, **kwargs):
201
  terminators = [
202
  tokenizer.eos_token_id,
203
  tokenizer.convert_tokens_to_ids("<|eot_id|>")
204
  ]
205
- output = self.llm.generate(
 
 
206
  inputs_embeds=inputs_embeds,
 
207
  pad_token_id=0,
208
  eos_token_id=terminators,
209
  **kwargs
210
- )
 
 
 
 
 
 
 
211
  if decode_text:
212
  return self._decode_text(output, tokenizer)
213
  return output
@@ -277,6 +287,16 @@ class MiniCPMV(MiniCPMVPreTrainedModel):
277
  max_x = max(tensor.shape[1] for tensor in batch)
278
 
279
  # Step 2: Automatically pad each tensor to have the same length (L) in the last dimension
 
 
 
 
 
 
 
 
 
 
280
  padded_tensors = [torch.nn.functional.pad(tensor, (0, 0, 0, max_x - tensor.shape[1])) for tensor in batch]
281
 
282
  # Step 3: Stack the padded tensors into a single batch
@@ -289,7 +309,7 @@ class MiniCPMV(MiniCPMVPreTrainedModel):
289
  kwargs.pop("decode_text")
290
  result = self._decode_stream(batch, tokenizer, **kwargs)
291
  else:
292
- result = self._decode(batch, tokenizer, **kwargs)
293
 
294
  return result
295
 
 
197
  result_text.append(tokenizer.decode(result).strip())
198
  return result_text
199
 
200
+ def _decode(self, inputs_embeds, tokenizer, attention_mask=None, decode_text=False, **kwargs):
201
  terminators = [
202
  tokenizer.eos_token_id,
203
  tokenizer.convert_tokens_to_ids("<|eot_id|>")
204
  ]
205
+ output = None
206
+ if (attention_mask != None):
207
+ output = self.llm.generate(
208
  inputs_embeds=inputs_embeds,
209
+ attention_mask=attention_mask,
210
  pad_token_id=0,
211
  eos_token_id=terminators,
212
  **kwargs
213
+ )
214
+ else:
215
+ output = self.llm.generate(
216
+ inputs_embeds=inputs_embeds,
217
+ pad_token_id=0,
218
+ eos_token_id=terminators,
219
+ **kwargs
220
+ )
221
  if decode_text:
222
  return self._decode_text(output, tokenizer)
223
  return output
 
287
  max_x = max(tensor.shape[1] for tensor in batch)
288
 
289
  # Step 2: Automatically pad each tensor to have the same length (L) in the last dimension
290
+ attention_mask = []
291
+ for tensor in batch:
292
+ to_add = []
293
+ for pl in range(tensor.shape[1]):
294
+ to_add.append(1)
295
+ for pl in range(tensor.shape[1], max_x):
296
+ to_add.append(0)
297
+ attention_mask.append(to_add)
298
+ attention_mask = torch.tensor(attention_mask)
299
+
300
  padded_tensors = [torch.nn.functional.pad(tensor, (0, 0, 0, max_x - tensor.shape[1])) for tensor in batch]
301
 
302
  # Step 3: Stack the padded tensors into a single batch
 
309
  kwargs.pop("decode_text")
310
  result = self._decode_stream(batch, tokenizer, **kwargs)
311
  else:
312
+ result = self._decode(batch, tokenizer, attention_mask=attention_mask, **kwargs)
313
 
314
  return result
315