Update custom model files, README, and requirements
Browse files- asr_modeling.py +12 -1
asr_modeling.py
CHANGED
|
@@ -731,10 +731,13 @@ class ASRModel(PreTrainedModel):
|
|
| 731 |
Stream generation by using the working generate() method with a TextIteratorStreamer.
|
| 732 |
"""
|
| 733 |
# Set up the streamer
|
|
|
|
|
|
|
| 734 |
streamer = TextIteratorStreamer(
|
| 735 |
self.tokenizer,
|
| 736 |
skip_prompt=True,
|
| 737 |
-
skip_special_tokens=True
|
|
|
|
| 738 |
)
|
| 739 |
|
| 740 |
# Count prompt length for stats
|
|
@@ -781,6 +784,14 @@ class ASRModel(PreTrainedModel):
|
|
| 781 |
if future.exception():
|
| 782 |
raise future.exception()
|
| 783 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 784 |
# For stats, estimate input tokens (we can't easily get exact count without duplicating work)
|
| 785 |
# Rough estimate: prompt is about 20 tokens + 750 audio tokens
|
| 786 |
estimated_input_tokens = 770
|
|
|
|
| 731 |
Stream generation by using the working generate() method with a TextIteratorStreamer.
|
| 732 |
"""
|
| 733 |
# Set up the streamer
|
| 734 |
+
# Note: skip_prompt=True means it won't output the prompt tokens
|
| 735 |
+
# This should start streaming from the first NEW generated token
|
| 736 |
streamer = TextIteratorStreamer(
|
| 737 |
self.tokenizer,
|
| 738 |
skip_prompt=True,
|
| 739 |
+
skip_special_tokens=True,
|
| 740 |
+
timeout=30.0 # Add timeout to prevent hanging
|
| 741 |
)
|
| 742 |
|
| 743 |
# Count prompt length for stats
|
|
|
|
| 784 |
if future.exception():
|
| 785 |
raise future.exception()
|
| 786 |
|
| 787 |
+
# Debug: If no chunks were yielded, check what was generated
|
| 788 |
+
if output_token_count == 0:
|
| 789 |
+
import sys
|
| 790 |
+
result = future.result()
|
| 791 |
+
if result is not None:
|
| 792 |
+
decoded = self.tokenizer.decode(result[0], skip_special_tokens=True)
|
| 793 |
+
print(f"DEBUG: No chunks yielded but generated: {decoded}", file=sys.stderr)
|
| 794 |
+
|
| 795 |
# For stats, estimate input tokens (we can't easily get exact count without duplicating work)
|
| 796 |
# Rough estimate: prompt is about 20 tokens + 750 audio tokens
|
| 797 |
estimated_input_tokens = 770
|