CryptoCreeper commited on
Commit
c885f0d
·
verified ·
1 Parent(s): a75a81d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -20
app.py CHANGED
@@ -5,11 +5,16 @@ from qwen_tts import Qwen3TTSModel
5
  from langdetect import detect
6
  import os
7
 
8
- # Load model - optimized for BF16 to save memory
9
  device = "cuda" if torch.cuda.is_available() else "cpu"
10
  model_id = "Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice"
11
 
12
- print(f"Loading model to {device}...")
 
 
 
 
 
 
13
  model = Qwen3TTSModel.from_pretrained(
14
  model_id,
15
  device_map=device,
@@ -18,14 +23,17 @@ model = Qwen3TTSModel.from_pretrained(
18
 
19
  def smart_tts(text, voice, instructions, auto_detect):
20
  try:
21
- # Smart Language Detection
 
 
 
22
  lang_map = {
23
  'zh': 'Chinese', 'en': 'English', 'jp': 'Japanese',
24
  'ko': 'Korean', 'de': 'German', 'fr': 'French',
25
  'ru': 'Russian', 'pt': 'Portuguese', 'es': 'Spanish', 'it': 'Italian'
26
  }
27
 
28
- detected_lang = "English" # Default
29
  if auto_detect:
30
  try:
31
  raw_lang = detect(text).split('-')[0]
@@ -33,8 +41,7 @@ def smart_tts(text, voice, instructions, auto_detect):
33
  except:
34
  pass
35
 
36
- # Generate Audio
37
- # The CustomVoice model uses instructions for style/emotion
38
  wavs, sr = model.generate_custom_voice(
39
  language=detected_lang,
40
  speaker=voice,
@@ -44,43 +51,43 @@ def smart_tts(text, voice, instructions, auto_detect):
44
 
45
  output_path = "output.wav"
46
  sf.write(output_path, wavs[0], sr)
47
- return output_path, f"Detected Language: {detected_lang}"
48
 
49
  except Exception as e:
50
- return None, str(e)
51
 
52
  # UI Layout
53
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
54
- gr.Markdown(f"# 🗣️ Qwen3-TTS Smart Studio")
55
- gr.Markdown("Experience natural speech with style control using Qwen3-TTS-12Hz.")
56
 
57
  with gr.Row():
58
  with gr.Column():
59
  input_text = gr.Textbox(
60
- label="Input Text",
61
- placeholder="Type something here...",
62
  lines=4
63
  )
64
 
65
  with gr.Row():
66
  voice_select = gr.Dropdown(
67
- choices=["Vivian", "Ryan", "Bella", "Daisy", "George"],
68
- value="Vivian",
69
- label="Speaker"
70
  )
71
  auto_lang = gr.Checkbox(label="Auto-detect Language", value=True)
72
 
73
  style_instruct = gr.Textbox(
74
- label="Style Instruction (e.g., 'Speak with a happy tone')",
75
- placeholder="Angry, Sad, Excited, Whisper...",
76
  value="Speak naturally"
77
  )
78
 
79
- generate_btn = gr.Button("Generate Speech", variant="primary")
80
 
81
  with gr.Column():
82
- audio_output = gr.Audio(label="Generated Audio", type="filepath")
83
- status_info = gr.Label(label="System Status")
84
 
85
  generate_btn.click(
86
  fn=smart_tts,
 
5
  from langdetect import detect
6
  import os
7
 
 
8
  device = "cuda" if torch.cuda.is_available() else "cpu"
9
  model_id = "Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice"
10
 
11
+ # Supported voices for this specific model
12
+ SUPPORTED_VOICES = [
13
+ 'aiden', 'dylan', 'eric', 'ono_anna',
14
+ 'ryan', 'serena', 'sohee', 'uncle_fu', 'vivian'
15
+ ]
16
+
17
+ print(f"Loading Qwen3-TTS to {device}...")
18
  model = Qwen3TTSModel.from_pretrained(
19
  model_id,
20
  device_map=device,
 
23
 
24
  def smart_tts(text, voice, instructions, auto_detect):
25
  try:
26
+ if voice not in SUPPORTED_VOICES:
27
+ return None, f"Error: Voice '{voice}' is not in the supported list."
28
+
29
+ # Smart Language Detection Mapping
30
  lang_map = {
31
  'zh': 'Chinese', 'en': 'English', 'jp': 'Japanese',
32
  'ko': 'Korean', 'de': 'German', 'fr': 'French',
33
  'ru': 'Russian', 'pt': 'Portuguese', 'es': 'Spanish', 'it': 'Italian'
34
  }
35
 
36
+ detected_lang = "English"
37
  if auto_detect:
38
  try:
39
  raw_lang = detect(text).split('-')[0]
 
41
  except:
42
  pass
43
 
44
+ # Generate Audio using the specific speaker ID
 
45
  wavs, sr = model.generate_custom_voice(
46
  language=detected_lang,
47
  speaker=voice,
 
51
 
52
  output_path = "output.wav"
53
  sf.write(output_path, wavs[0], sr)
54
+ return output_path, f"Language: {detected_lang} | Speaker: {voice}"
55
 
56
  except Exception as e:
57
+ return None, f"System Error: {str(e)}"
58
 
59
  # UI Layout
60
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
61
+ gr.Markdown("# 🗣️ Qwen3-TTS Smart Studio")
62
+ gr.Markdown(f"Optimized for **{model_id}** on Hugging Face Free Tier.")
63
 
64
  with gr.Row():
65
  with gr.Column():
66
  input_text = gr.Textbox(
67
+ label="Text to Speak",
68
+ placeholder="Enter text here...",
69
  lines=4
70
  )
71
 
72
  with gr.Row():
73
  voice_select = gr.Dropdown(
74
+ choices=SUPPORTED_VOICES,
75
+ value="vivian",
76
+ label="Select Speaker"
77
  )
78
  auto_lang = gr.Checkbox(label="Auto-detect Language", value=True)
79
 
80
  style_instruct = gr.Textbox(
81
+ label="Style/Emotion Instruction",
82
+ placeholder="e.g. Speak with a professional tone, Whisper, or Excitedly",
83
  value="Speak naturally"
84
  )
85
 
86
+ generate_btn = gr.Button("Generate Audio", variant="primary")
87
 
88
  with gr.Column():
89
+ audio_output = gr.Audio(label="Result", type="filepath")
90
+ status_info = gr.Label(label="Metadata")
91
 
92
  generate_btn.click(
93
  fn=smart_tts,