Spaces:

HedronCreeper
/

speech

Sleeping

App Files Files Community

CryptoCreeper commited on Feb 28

Commit

c885f0d

verified ·

1 Parent(s): a75a81d

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -20

app.py CHANGED Viewed

@@ -5,11 +5,16 @@ from qwen_tts import Qwen3TTSModel
 from langdetect import detect
 import os
-# Load model - optimized for BF16 to save memory
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model_id = "Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice"
-print(f"Loading model to {device}...")
 model = Qwen3TTSModel.from_pretrained(
     model_id,
     device_map=device,
@@ -18,14 +23,17 @@ model = Qwen3TTSModel.from_pretrained(
 def smart_tts(text, voice, instructions, auto_detect):
     try:
-        # Smart Language Detection
         lang_map = {
             'zh': 'Chinese', 'en': 'English', 'jp': 'Japanese',
             'ko': 'Korean', 'de': 'German', 'fr': 'French',
             'ru': 'Russian', 'pt': 'Portuguese', 'es': 'Spanish', 'it': 'Italian'
         }
-        detected_lang = "English" # Default
         if auto_detect:
             try:
                 raw_lang = detect(text).split('-')[0]
@@ -33,8 +41,7 @@ def smart_tts(text, voice, instructions, auto_detect):
             except:
                 pass
-        # Generate Audio
-        # The CustomVoice model uses instructions for style/emotion
         wavs, sr = model.generate_custom_voice(
             language=detected_lang,
             speaker=voice,
@@ -44,43 +51,43 @@ def smart_tts(text, voice, instructions, auto_detect):
         output_path = "output.wav"
         sf.write(output_path, wavs[0], sr)
-        return output_path, f"Detected Language: {detected_lang}"
     except Exception as e:
-        return None, str(e)
 # UI Layout
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown(f"# 🗣️ Qwen3-TTS Smart Studio")
-    gr.Markdown("Experience natural speech with style control using Qwen3-TTS-12Hz.")
     with gr.Row():
         with gr.Column():
             input_text = gr.Textbox(
-                label="Input Text",
-                placeholder="Type something here...",
                 lines=4
             )
             with gr.Row():
                 voice_select = gr.Dropdown(
-                    choices=["Vivian", "Ryan", "Bella", "Daisy", "George"],
-                    value="Vivian",
-                    label="Speaker"
                 )
                 auto_lang = gr.Checkbox(label="Auto-detect Language", value=True)
             style_instruct = gr.Textbox(
-                label="Style Instruction (e.g., 'Speak with a happy tone')",
-                placeholder="Angry, Sad, Excited, Whisper...",
                 value="Speak naturally"
             )
-            generate_btn = gr.Button("Generate Speech", variant="primary")
         with gr.Column():
-            audio_output = gr.Audio(label="Generated Audio", type="filepath")
-            status_info = gr.Label(label="System Status")
     generate_btn.click(
         fn=smart_tts,

 from langdetect import detect
 import os
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model_id = "Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice"
+# Supported voices for this specific model
+SUPPORTED_VOICES = [
+    'aiden', 'dylan', 'eric', 'ono_anna',
+    'ryan', 'serena', 'sohee', 'uncle_fu', 'vivian'
+]
+print(f"Loading Qwen3-TTS to {device}...")
 model = Qwen3TTSModel.from_pretrained(
     model_id,
     device_map=device,
 def smart_tts(text, voice, instructions, auto_detect):
     try:
+        if voice not in SUPPORTED_VOICES:
+            return None, f"Error: Voice '{voice}' is not in the supported list."
+        # Smart Language Detection Mapping
         lang_map = {
             'zh': 'Chinese', 'en': 'English', 'jp': 'Japanese',
             'ko': 'Korean', 'de': 'German', 'fr': 'French',
             'ru': 'Russian', 'pt': 'Portuguese', 'es': 'Spanish', 'it': 'Italian'
         }
+        detected_lang = "English"
         if auto_detect:
             try:
                 raw_lang = detect(text).split('-')[0]
             except:
                 pass
+        # Generate Audio using the specific speaker ID
         wavs, sr = model.generate_custom_voice(
             language=detected_lang,
             speaker=voice,
         output_path = "output.wav"
         sf.write(output_path, wavs[0], sr)
+        return output_path, f"Language: {detected_lang} | Speaker: {voice}"
     except Exception as e:
+        return None, f"System Error: {str(e)}"
 # UI Layout
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🗣️ Qwen3-TTS Smart Studio")
+    gr.Markdown(f"Optimized for **{model_id}** on Hugging Face Free Tier.")
     with gr.Row():
         with gr.Column():
             input_text = gr.Textbox(
+                label="Text to Speak",
+                placeholder="Enter text here...",
                 lines=4
             )
             with gr.Row():
                 voice_select = gr.Dropdown(
+                    choices=SUPPORTED_VOICES,
+                    value="vivian",
+                    label="Select Speaker"
                 )
                 auto_lang = gr.Checkbox(label="Auto-detect Language", value=True)
             style_instruct = gr.Textbox(
+                label="Style/Emotion Instruction",
+                placeholder="e.g. Speak with a professional tone, Whisper, or Excitedly",
                 value="Speak naturally"
             )
+            generate_btn = gr.Button("Generate Audio", variant="primary")
         with gr.Column():
+            audio_output = gr.Audio(label="Result", type="filepath")
+            status_info = gr.Label(label="Metadata")
     generate_btn.click(
         fn=smart_tts,