openbmb
/

MiniCPM-o-4_5

feature-extraction

Model card Files Files and versions

fix: stream in chat not return early

#16

by airlsyn - opened Mar 4

base: refs/heads/main

←

from: refs/pr/16

Discussion Files changed

Files changed (1) hide show

modeling_minicpmo.py +5 -0

modeling_minicpmo.py CHANGED Viewed

@@ -1176,6 +1176,8 @@ class MiniCPMO(MiniCPMOPreTrainedModel):
             max_length=max_inp_length,
         ).to(self.device)
         generation_config = self.prepare_generation_config(
             do_sample=do_sample, max_new_tokens=max_new_tokens, min_new_tokens=min_new_tokens, **kwargs
         )
@@ -1194,6 +1196,9 @@ class MiniCPMO(MiniCPMOPreTrainedModel):
                 **generation_config,
             )
         # spk bound and tts bound
         tts_bos_token = self.processor.tokenizer.convert_tokens_to_ids("<|tts_bos|>")
         tts_eos_token = self.processor.tokenizer.convert_tokens_to_ids("<|tts_eos|>")

             max_length=max_inp_length,
         ).to(self.device)
+        if stream:
+            kwargs["num_beams"] = 1
         generation_config = self.prepare_generation_config(
             do_sample=do_sample, max_new_tokens=max_new_tokens, min_new_tokens=min_new_tokens, **kwargs
         )
                 **generation_config,
             )
+        if stream:
+            return res
         # spk bound and tts bound
         tts_bos_token = self.processor.tokenizer.convert_tokens_to_ids("<|tts_bos|>")
         tts_eos_token = self.processor.tokenizer.convert_tokens_to_ids("<|tts_eos|>")