Spaces:
Build error
Build error
| """ | |
| Qwen3-TTS Web UI for Hugging Face Spaces | |
| ========================================= | |
| CPU-only mode for maximum compatibility. | |
| """ | |
| import os | |
| import spaces | |
| import gradio as gr | |
| import numpy as np | |
| import torch | |
| import soundfile as sf | |
| import tempfile | |
| from huggingface_hub import snapshot_download | |
| from qwen_tts import Qwen3TTSModel | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Configuration | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| MODEL_SIZE = "1.7B" # Full quality model | |
| ENGLISH_SPEAKERS = ["Ryan", "Aiden"] | |
| # Load model on CPU at startup | |
| print(f"π¦ Loading {MODEL_SIZE} model on CPU...") | |
| model_path = snapshot_download(f"Qwen/Qwen3-TTS-12Hz-{MODEL_SIZE}-CustomVoice") | |
| model = Qwen3TTSModel.from_pretrained( | |
| model_path, | |
| device_map="cpu", | |
| dtype=torch.float32, | |
| ) | |
| print("β Model loaded!") | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # TTS Generation Function | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def generate_speech(text, speaker, voice_style): | |
| """Generate speech from text.""" | |
| if not text.strip(): | |
| return None, "β οΈ Please enter some text." | |
| try: | |
| wavs, sr = model.generate_custom_voice( | |
| text=text, | |
| language="Auto", | |
| speaker=speaker, | |
| instruct=voice_style if voice_style else "", | |
| ) | |
| # Save to temp file | |
| temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav") | |
| sf.write(temp_file.name, wavs[0], sr) | |
| duration = len(wavs[0]) / sr | |
| status = f"β Generated {duration:.1f}s of audio" | |
| return temp_file.name, status | |
| except Exception as e: | |
| import traceback | |
| traceback.print_exc() | |
| return None, f"β Error: {str(e)}" | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Gradio UI | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Blocks(title="Qwen Voice Assistant") as demo: | |
| gr.Markdown( | |
| """ | |
| # ποΈ Qwen Voice Assistant | |
| ### Text-to-Speech powered by Qwen3-TTS | |
| β±οΈ Generation takes ~30-60 seconds (CPU mode) | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| text_input = gr.Textbox( | |
| label="Text to Speak", | |
| placeholder="Enter the text you want to convert to speech...", | |
| lines=4, | |
| max_lines=10 | |
| ) | |
| with gr.Row(): | |
| speaker_dropdown = gr.Dropdown( | |
| choices=ENGLISH_SPEAKERS, | |
| value="Ryan", | |
| label="Voice", | |
| info="Select a speaker voice" | |
| ) | |
| voice_style = gr.Textbox( | |
| label="Voice Style (Optional)", | |
| placeholder="e.g., happy, slow, whisper...", | |
| info="Describe the tone or emotion" | |
| ) | |
| generate_btn = gr.Button("π Generate Speech", variant="primary", size="lg") | |
| with gr.Column(scale=1): | |
| audio_output = gr.Audio( | |
| label="Generated Audio", | |
| type="filepath", | |
| interactive=False | |
| ) | |
| status_output = gr.Textbox( | |
| label="Status", | |
| interactive=False | |
| ) | |
| # Voice style examples | |
| gr.Markdown("### π‘ Voice Style Examples") | |
| gr.Examples( | |
| examples=[ | |
| ["Hello! How are you today?", "Ryan", "friendly and warm"], | |
| ["Breaking news: Scientists discover water on Mars!", "Aiden", "excited news anchor"], | |
| ["Once upon a time, in a land far away...", "Ryan", "storytelling, slow and dramatic"], | |
| ["Warning! System overload detected.", "Aiden", "urgent and serious"], | |
| ["I love you with all my heart.", "Ryan", "soft and emotional"], | |
| ], | |
| inputs=[text_input, speaker_dropdown, voice_style], | |
| label="Click an example to try it" | |
| ) | |
| gr.Markdown("---\n**Model:** [Qwen3-TTS-12Hz-1.7B-CustomVoice](https://huggingface.co/Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice) (Apache 2.0)") | |
| # Connect button | |
| generate_btn.click( | |
| fn=generate_speech, | |
| inputs=[text_input, speaker_dropdown, voice_style], | |
| outputs=[audio_output, status_output] | |
| ) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Launch | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| if __name__ == "__main__": | |
| demo.launch(ssr_mode=False) | |