Update custom model files, README, and requirements
Browse files- asr_modeling.py +12 -0
asr_modeling.py
CHANGED
|
@@ -840,6 +840,18 @@ class ASRModel(PreTrainedModel):
|
|
| 840 |
print(f"DEBUG generate_stream: num_audio_tokens={num_audio_tokens}", file=sys.stderr)
|
| 841 |
print(f"DEBUG generate_stream: generate_kwargs={generate_kwargs}", file=sys.stderr)
|
| 842 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 843 |
# Set up the streamer
|
| 844 |
streamer = TextIteratorStreamer(
|
| 845 |
self.tokenizer,
|
|
|
|
| 840 |
print(f"DEBUG generate_stream: num_audio_tokens={num_audio_tokens}", file=sys.stderr)
|
| 841 |
print(f"DEBUG generate_stream: generate_kwargs={generate_kwargs}", file=sys.stderr)
|
| 842 |
|
| 843 |
+
# Test: Try without threading first to see if that's the issue
|
| 844 |
+
print(f"DEBUG: Testing non-threaded generation first", file=sys.stderr)
|
| 845 |
+
test_output = self.decoder.generate(
|
| 846 |
+
input_ids=expanded_prompt_ids,
|
| 847 |
+
inputs_embeds=inputs_embeds,
|
| 848 |
+
attention_mask=attention_mask,
|
| 849 |
+
max_new_tokens=10, # Just generate a few tokens to test
|
| 850 |
+
**{k: v for k, v in generate_kwargs.items() if k != 'max_new_tokens'}
|
| 851 |
+
)
|
| 852 |
+
test_text = self.tokenizer.decode(test_output[0, input_token_count:], skip_special_tokens=True)
|
| 853 |
+
print(f"DEBUG: Non-threaded test output: {test_text}", file=sys.stderr)
|
| 854 |
+
|
| 855 |
# Set up the streamer
|
| 856 |
streamer = TextIteratorStreamer(
|
| 857 |
self.tokenizer,
|