speech / app.py
CryptoCreeper
Update app.py
c885f0d verified
import gradio as gr
import torch
import soundfile as sf
from qwen_tts import Qwen3TTSModel
from langdetect import detect
import os
device = "cuda" if torch.cuda.is_available() else "cpu"
model_id = "Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice"
# Supported voices for this specific model
SUPPORTED_VOICES = [
'aiden', 'dylan', 'eric', 'ono_anna',
'ryan', 'serena', 'sohee', 'uncle_fu', 'vivian'
]
print(f"Loading Qwen3-TTS to {device}...")
model = Qwen3TTSModel.from_pretrained(
model_id,
device_map=device,
torch_dtype=torch.bfloat16 if device == "cuda" else torch.float32
)
def smart_tts(text, voice, instructions, auto_detect):
try:
if voice not in SUPPORTED_VOICES:
return None, f"Error: Voice '{voice}' is not in the supported list."
# Smart Language Detection Mapping
lang_map = {
'zh': 'Chinese', 'en': 'English', 'jp': 'Japanese',
'ko': 'Korean', 'de': 'German', 'fr': 'French',
'ru': 'Russian', 'pt': 'Portuguese', 'es': 'Spanish', 'it': 'Italian'
}
detected_lang = "English"
if auto_detect:
try:
raw_lang = detect(text).split('-')[0]
detected_lang = lang_map.get(raw_lang, "English")
except:
pass
# Generate Audio using the specific speaker ID
wavs, sr = model.generate_custom_voice(
language=detected_lang,
speaker=voice,
instruct=instructions,
text=text
)
output_path = "output.wav"
sf.write(output_path, wavs[0], sr)
return output_path, f"Language: {detected_lang} | Speaker: {voice}"
except Exception as e:
return None, f"System Error: {str(e)}"
# UI Layout
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# 🗣️ Qwen3-TTS Smart Studio")
gr.Markdown(f"Optimized for **{model_id}** on Hugging Face Free Tier.")
with gr.Row():
with gr.Column():
input_text = gr.Textbox(
label="Text to Speak",
placeholder="Enter text here...",
lines=4
)
with gr.Row():
voice_select = gr.Dropdown(
choices=SUPPORTED_VOICES,
value="vivian",
label="Select Speaker"
)
auto_lang = gr.Checkbox(label="Auto-detect Language", value=True)
style_instruct = gr.Textbox(
label="Style/Emotion Instruction",
placeholder="e.g. Speak with a professional tone, Whisper, or Excitedly",
value="Speak naturally"
)
generate_btn = gr.Button("Generate Audio", variant="primary")
with gr.Column():
audio_output = gr.Audio(label="Result", type="filepath")
status_info = gr.Label(label="Metadata")
generate_btn.click(
fn=smart_tts,
inputs=[input_text, voice_select, style_instruct, auto_lang],
outputs=[audio_output, status_info]
)
if __name__ == "__main__":
demo.launch()