Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
| 1 |
# coding=utf-8
|
| 2 |
# Qwen3-TTS Gradio Demo for HuggingFace Spaces with Zero GPU
|
| 3 |
# Supports: Voice Design, Voice Clone (Base), TTS (CustomVoice)
|
| 4 |
-
|
|
|
|
| 5 |
import os
|
| 6 |
import spaces
|
| 7 |
import gradio as gr
|
|
@@ -37,6 +38,7 @@ def get_model(model_type: str, model_size: str):
|
|
| 37 |
device_map="cuda",
|
| 38 |
dtype=torch.bfloat16,
|
| 39 |
token=HF_TOKEN,
|
|
|
|
| 40 |
)
|
| 41 |
return loaded_models[key]
|
| 42 |
|
|
|
|
| 1 |
# coding=utf-8
|
| 2 |
# Qwen3-TTS Gradio Demo for HuggingFace Spaces with Zero GPU
|
| 3 |
# Supports: Voice Design, Voice Clone (Base), TTS (CustomVoice)
|
| 4 |
+
import subprocess
|
| 5 |
+
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
| 6 |
import os
|
| 7 |
import spaces
|
| 8 |
import gradio as gr
|
|
|
|
| 38 |
device_map="cuda",
|
| 39 |
dtype=torch.bfloat16,
|
| 40 |
token=HF_TOKEN,
|
| 41 |
+
attn_implementation="flash_attention_2",
|
| 42 |
)
|
| 43 |
return loaded_models[key]
|
| 44 |
|