Spaces:

open-nlp
/

Chris-lab

Runtime error

App Files Files Community

kz209 commited on Sep 1, 2024

Commit

f276c92

1 Parent(s): 9dfac6e

update

Browse files

Files changed (2) hide show

utils/model.py +32 -17
utils/multiple_stream.py +7 -7

utils/model.py CHANGED Viewed

@@ -60,27 +60,42 @@ class Model(torch.nn.Module):
         input_ids = self.tokenizer(content_list, return_tensors="pt", padding=True, truncation=True).input_ids.to(self.model.device)
         if streaming:
-            # Prepare streamers for each input
-            streamers = [TextStreamer(self.tokenizer, skip_prompt=True) for _ in content_list]
-            # Stream the output token by token for each input text
-            for i, streamer in enumerate(streamers):
-                for output in self.model.generate(
-                        input_ids[i].unsqueeze(0),  # Process each input separately
-                        max_new_tokens=max_length,
-                        do_sample=True,
-                        temperature=temp,
-                        eos_token_id=self.tokenizer.eos_token_id,
-                        return_dict_in_generate=True,
-                        output_scores=True,
-                        streamer=streamer):
-                    yield output  # TextStreamer automatically handles the streaming, no need to manually handle the output
         else:
             outputs = self.model.generate(
                 input_ids,
                 max_new_tokens=max_length,
                 do_sample=True,
                 temperature=temp,
-                eos_token_id=self.tokenizer.eos_token_id
             )
-            return [self.tokenizer.decode(output, skip_special_tokens=True) for output in outputs]

         input_ids = self.tokenizer(content_list, return_tensors="pt", padding=True, truncation=True).input_ids.to(self.model.device)
         if streaming:
+            # Process each input separately
+            for single_input_ids in input_ids:
+                # Set up the initial generation parameters
+                gen_kwargs = {
+                    "input_ids": single_input_ids.unsqueeze(0),
+                    "max_new_tokens": max_length,
+                    "do_sample": True,
+                    "temperature": temp,
+                    "eos_token_id": self.tokenizer.eos_token_id,
+                }
+                # Generate and yield tokens one by one
+                unfinished_sequences = single_input_ids.unsqueeze(0)
+                while unfinished_sequences.shape[1] < gen_kwargs["max_new_tokens"]:
+                    with torch.no_grad():
+                        output = self.model.generate(**gen_kwargs, max_new_tokens=1, return_dict_in_generate=True, output_scores=True)
+                    next_token_logits = output.scores[0][0]
+                    next_token = torch.argmax(next_token_logits, dim=-1).unsqueeze(0)
+                    unfinished_sequences = torch.cat([unfinished_sequences, next_token], dim=-1)
+                    # Yield the newly generated token
+                    yield self.tokenizer.decode(next_token[0], skip_special_tokens=True)
+                    if next_token.item() == self.tokenizer.eos_token_id:
+                        break
+                    # Update input_ids for the next iteration
+                    gen_kwargs["input_ids"] = unfinished_sequences
         else:
+            # Non-streaming generation (unchanged)
             outputs = self.model.generate(
                 input_ids,
                 max_new_tokens=max_length,
                 do_sample=True,
                 temperature=temp,
+                eos_token_id=self.tokenizer.eos_token_id,
             )
+            return self.tokenizer.batch_decode(outputs, skip_special_tokens=True)

utils/multiple_stream.py CHANGED Viewed

@@ -26,13 +26,13 @@ def stream_data(content_list, model):
     # Use the gen method to handle batch generation
     while True:
         updated = False
-        for i, content in enumerate(content_list):
-            try:
-                word = next(model.gen([content], streaming=True))  # Wrap content in a list to match expected input type
-                outputs[i] += word
-                updated = True
-            except StopIteration:
-                pass
         if not updated:
             break

     # Use the gen method to handle batch generation
     while True:
         updated = False
+        #for i, content in enumerate(content_list):
+        try:
+            words = next(model.gen(content_list, streaming=True))  # Wrap content in a list to match expected input type
+            outputs = [outputs[i].append(f" {words[i]}") for i in range(len(content_list))]
+            updated = True
+        except StopIteration:
+            pass
         if not updated:
             break