Qwen3-TTS-Rulate

Runtime error

littlebird13 commited on Jan 22

Commit

6453bfe

verified ·

1 Parent(s): 9921fc6

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,7 +2,7 @@
 # Qwen3-TTS Gradio Demo for HuggingFace Spaces with Zero GPU
 # Supports: Voice Design, Voice Clone (Base), TTS (CustomVoice)
 import subprocess
-subprocess.run('pip install flash-attn --no-build-isolation', shell=True)
 import os
 import spaces
 import gradio as gr
@@ -38,7 +38,7 @@ def get_model(model_type: str, model_size: str):
             device_map="cuda",
             dtype=torch.bfloat16,
             token=HF_TOKEN,
-            attn_implementation="flash_attention_2",
         )
     return loaded_models[key]

 # Qwen3-TTS Gradio Demo for HuggingFace Spaces with Zero GPU
 # Supports: Voice Design, Voice Clone (Base), TTS (CustomVoice)
 import subprocess
+subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
 import os
 import spaces
 import gradio as gr
             device_map="cuda",
             dtype=torch.bfloat16,
             token=HF_TOKEN,
+#           attn_implementation="flash_attention_2",
         )
     return loaded_models[key]