Spaces:

mic3333
/

asr

Sleeping

michaeltangz commited on Dec 8, 2025

Commit

574825e

1 Parent(s): 6cd2d8c

fix app.py to correct dtype parameter usage in model initialization and pipeline; remove redundant torch_dtype argument

Files changed (1) hide show

app.py CHANGED Viewed

@@ -9,12 +9,12 @@ import numpy as np
 from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, WhisperTokenizer, pipeline
 device = "cuda" if torch.cuda.is_available() else "cpu"
-torch_dtype = torch.float16
 MODEL_NAME = "openai/whisper-large-v3-turbo"
 model = AutoModelForSpeechSeq2Seq.from_pretrained(
     MODEL_NAME,
-    torch_dtype=torch_dtype,
     low_cpu_mem_usage=True,
     use_safetensors=True,
     attn_implementation="sdpa"
@@ -30,7 +30,6 @@ pipe = pipeline(
     tokenizer=tokenizer,
     feature_extractor=processor.feature_extractor,
     chunk_length_s=10,
-    torch_dtype=torch_dtype,
     device=device,
     ignore_warning=True,
 )
@@ -100,8 +99,7 @@ with gr.Blocks() as microphone:
         input_audio_microphone.stream(
             stream_transcribe,
             inputs=[state, input_audio_microphone],
-            outputs=[state, output, latency_textbox],
-            stream_every=2
         )
         clear_button.click(clear_state, outputs=[state]).then(clear, outputs=[output])

 from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, WhisperTokenizer, pipeline
 device = "cuda" if torch.cuda.is_available() else "cpu"
+torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
 MODEL_NAME = "openai/whisper-large-v3-turbo"
 model = AutoModelForSpeechSeq2Seq.from_pretrained(
     MODEL_NAME,
+    dtype=torch_dtype,
     low_cpu_mem_usage=True,
     use_safetensors=True,
     attn_implementation="sdpa"
     tokenizer=tokenizer,
     feature_extractor=processor.feature_extractor,
     chunk_length_s=10,
     device=device,
     ignore_warning=True,
 )
         input_audio_microphone.stream(
             stream_transcribe,
             inputs=[state, input_audio_microphone],
+            outputs=[state, output, latency_textbox]
         )
         clear_button.click(clear_state, outputs=[state]).then(clear, outputs=[output])