import gradio as gr import torch import soundfile as sf from qwen_tts import Qwen3TTSModel from langdetect import detect import os device = "cuda" if torch.cuda.is_available() else "cpu" model_id = "Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice" # Supported voices for this specific model SUPPORTED_VOICES = [ 'aiden', 'dylan', 'eric', 'ono_anna', 'ryan', 'serena', 'sohee', 'uncle_fu', 'vivian' ] print(f"Loading Qwen3-TTS to {device}...") model = Qwen3TTSModel.from_pretrained( model_id, device_map=device, torch_dtype=torch.bfloat16 if device == "cuda" else torch.float32 ) def smart_tts(text, voice, instructions, auto_detect): try: if voice not in SUPPORTED_VOICES: return None, f"Error: Voice '{voice}' is not in the supported list." # Smart Language Detection Mapping lang_map = { 'zh': 'Chinese', 'en': 'English', 'jp': 'Japanese', 'ko': 'Korean', 'de': 'German', 'fr': 'French', 'ru': 'Russian', 'pt': 'Portuguese', 'es': 'Spanish', 'it': 'Italian' } detected_lang = "English" if auto_detect: try: raw_lang = detect(text).split('-')[0] detected_lang = lang_map.get(raw_lang, "English") except: pass # Generate Audio using the specific speaker ID wavs, sr = model.generate_custom_voice( language=detected_lang, speaker=voice, instruct=instructions, text=text ) output_path = "output.wav" sf.write(output_path, wavs[0], sr) return output_path, f"Language: {detected_lang} | Speaker: {voice}" except Exception as e: return None, f"System Error: {str(e)}" # UI Layout with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("# 🗣️ Qwen3-TTS Smart Studio") gr.Markdown(f"Optimized for **{model_id}** on Hugging Face Free Tier.") with gr.Row(): with gr.Column(): input_text = gr.Textbox( label="Text to Speak", placeholder="Enter text here...", lines=4 ) with gr.Row(): voice_select = gr.Dropdown( choices=SUPPORTED_VOICES, value="vivian", label="Select Speaker" ) auto_lang = gr.Checkbox(label="Auto-detect Language", value=True) style_instruct = gr.Textbox( label="Style/Emotion Instruction", placeholder="e.g. Speak with a professional tone, Whisper, or Excitedly", value="Speak naturally" ) generate_btn = gr.Button("Generate Audio", variant="primary") with gr.Column(): audio_output = gr.Audio(label="Result", type="filepath") status_info = gr.Label(label="Metadata") generate_btn.click( fn=smart_tts, inputs=[input_text, voice_select, style_instruct, auto_lang], outputs=[audio_output, status_info] ) if __name__ == "__main__": demo.launch()