Spaces:

mic3333
/

asr

Sleeping

michaeltangz commited on Dec 8, 2025

Commit

078e579

1 Parent(s): 8f2a46b

refactor app.py to move flash attention installation into a try-except block; ensure proper fallback to sdpa if installation fails

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,19 +8,30 @@ import scipy.io.wavfile
 import time
 import numpy as np
 from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, WhisperTokenizer, pipeline
-import subprocess
-subprocess.run(
-    "pip install flash-attn --no-build-isolation",
-    env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
-    shell=True,
-)
 device = "cuda" if torch.cuda.is_available() else "cpu"
 torch_dtype = torch.float16
 MODEL_NAME = "openai/whisper-large-v3-turbo"
 model = AutoModelForSpeechSeq2Seq.from_pretrained(
-    MODEL_NAME, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True, attn_implementation="flash_attention_2"
 )
 model.to(device)

 import time
 import numpy as np
 from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, WhisperTokenizer, pipeline
 device = "cuda" if torch.cuda.is_available() else "cpu"
 torch_dtype = torch.float16
 MODEL_NAME = "openai/whisper-large-v3-turbo"
+# Try to use flash attention, fall back to sdpa if not available
+try:
+    import subprocess
+    subprocess.run(
+        "pip install flash-attn --no-build-isolation",
+        env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
+        shell=True,
+    )
+    from flash_attn import flash_attn_func
+    attn_implementation = "flash_attention_2"
+except Exception:
+    attn_implementation = "sdpa"  # Use PyTorch's scaled dot product attention
 model = AutoModelForSpeechSeq2Seq.from_pretrained(
+    MODEL_NAME,
+    torch_dtype=torch_dtype,
+    low_cpu_mem_usage=True,
+    use_safetensors=True,
+    attn_implementation=attn_implementation
 )
 model.to(device)