mazesmazes
/

tiny-audio

@@ -731,10 +731,13 @@ class ASRModel(PreTrainedModel):
         Stream generation by using the working generate() method with a TextIteratorStreamer.
         """
         # Set up the streamer
         streamer = TextIteratorStreamer(
             self.tokenizer,
             skip_prompt=True,
-            skip_special_tokens=True  # Back to True - special tokens shouldn't be in output text
         )
         # Count prompt length for stats
@@ -781,6 +784,14 @@ class ASRModel(PreTrainedModel):
             if future.exception():
                 raise future.exception()
         # For stats, estimate input tokens (we can't easily get exact count without duplicating work)
         # Rough estimate: prompt is about 20 tokens + 750 audio tokens
         estimated_input_tokens = 770

         Stream generation by using the working generate() method with a TextIteratorStreamer.
         """
         # Set up the streamer
+        # Note: skip_prompt=True means it won't output the prompt tokens
+        # This should start streaming from the first NEW generated token
         streamer = TextIteratorStreamer(
             self.tokenizer,
             skip_prompt=True,
+            skip_special_tokens=True,
+            timeout=30.0  # Add timeout to prevent hanging
         )
         # Count prompt length for stats
             if future.exception():
                 raise future.exception()
+            # Debug: If no chunks were yielded, check what was generated
+            if output_token_count == 0:
+                import sys
+                result = future.result()
+                if result is not None:
+                    decoded = self.tokenizer.decode(result[0], skip_special_tokens=True)
+                    print(f"DEBUG: No chunks yielded but generated: {decoded}", file=sys.stderr)
         # For stats, estimate input tokens (we can't easily get exact count without duplicating work)
         # Rough estimate: prompt is about 20 tokens + 750 audio tokens
         estimated_input_tokens = 770