rahul7star commited on
Commit
167c3f4
·
verified ·
1 Parent(s): ee61b54

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -114
app.py CHANGED
@@ -22,34 +22,12 @@ torch.load = _cpu_only_torch_load
22
 
23
 
24
  LANGUAGE_CONFIG = {
25
- "ar": {
26
- "audio": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/ar_f/ar_prompts2.flac",
27
- "text": "في الشهر الماضي، وصلنا إلى معلم جديد بمليارين من المشاهدات على قناتنا على يوتيوب."
28
- },
29
- "da": {
30
- "audio": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/da_m1.flac",
31
- "text": "Sidste måned nåede vi en ny milepæl med to milliarder visninger på vores YouTube-kanal."
32
- },
33
- "de": {
34
- "audio": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/de_f1.flac",
35
- "text": "Letzten Monat haben wir einen neuen Meilenstein erreicht: zwei Milliarden Aufrufe auf unserem YouTube-Kanal."
36
- },
37
- "el": {
38
- "audio": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/el_m.flac",
39
- "text": "Τον περασμένο μήνα, φτάσαμε σε ένα νέο ορόσημο με δύο δισεκατομμύρια προβολές στο κανάλι μας στο YouTube."
40
- },
41
  "en": {
42
  "audio": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/en_f1.flac",
43
  "text": "Last month, we reached a new milestone with two billion views on our YouTube channel."
44
  },
45
- "es": {
46
- "audio": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/es_f1.flac",
47
- "text": "El mes pasado alcanzamos un nuevo hito: dos mil millones de visualizaciones en nuestro canal de YouTube."
48
- },
49
- "fi": {
50
- "audio": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/fi_m.flac",
51
- "text": "Viime kuussa saavutimme uuden virstanpylvään kahden miljardin katselukerran kanssa YouTube-kanavallamme."
52
- },
53
  "fr": {
54
  "audio": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/fr_f1.flac",
55
  "text": "Le mois dernier, nous avons atteint un nouveau jalon avec deux milliards de vues sur notre chaîne YouTube."
@@ -62,58 +40,7 @@ LANGUAGE_CONFIG = {
62
  "audio": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/hi_f1.flac",
63
  "text": "पिछले महीने हमने एक नया मील का पत्थर छुआ: हमारे YouTube चैनल पर दो अरब व्यूज़।"
64
  },
65
- "it": {
66
- "audio": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/it_m1.flac",
67
- "text": "Il mese scorso abbiamo raggiunto un nuovo traguardo: due miliardi di visualizzazioni sul nostro canale YouTube."
68
- },
69
- "ja": {
70
- "audio": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/ja/ja_prompts1.flac",
71
- "text": "先月、私たちのYouTubeチャンネルで二十億回の再生回数という新たなマイルストーンに到達しました。"
72
- },
73
- "ko": {
74
- "audio": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/ko_f.flac",
75
- "text": "지난달 우리는 유튜브 채널에서 이십억 조회수라는 새로운 이정표에 도달했습니다."
76
- },
77
- "ms": {
78
- "audio": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/ms_f.flac",
79
- "text": "Bulan lepas, kami mencapai pencapaian baru dengan dua bilion tontonan di saluran YouTube kami."
80
- },
81
- "nl": {
82
- "audio": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/nl_m.flac",
83
- "text": "Vorige maand bereikten we een nieuwe mijlpaal met twee miljard weergaven op ons YouTube-kanaal."
84
- },
85
- "no": {
86
- "audio": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/no_f1.flac",
87
- "text": "Forrige måned nådde vi en ny milepæl med to milliarder visninger på YouTube-kanalen vår."
88
- },
89
- "pl": {
90
- "audio": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/pl_m.flac",
91
- "text": "W zeszłym miesiącu osiągnęliśmy nowy kamień milowy z dwoma miliardami wyświetleń na naszym kanale YouTube."
92
- },
93
- "pt": {
94
- "audio": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/pt_m1.flac",
95
- "text": "No mês passado, alcançámos um novo marco: dois mil milhões de visualizações no nosso canal do YouTube."
96
- },
97
- "ru": {
98
- "audio": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/ru_m.flac",
99
- "text": "В прошлом месяце мы достигли нового рубежа: два миллиарда просмотров на нашем YouTube-канале."
100
- },
101
- "sv": {
102
- "audio": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/sv_f.flac",
103
- "text": "Förra månaden nådde vi en ny milstolpe med två miljarder visningar på vår YouTube-kanal."
104
- },
105
- "sw": {
106
- "audio": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/sw_m.flac",
107
- "text": "Mwezi uliopita, tulifika hatua mpya ya maoni ya bilioni mbili kweny kituo chetu cha YouTube."
108
- },
109
- "tr": {
110
- "audio": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/tr_m.flac",
111
- "text": "Geçen ay YouTube kanalımızda iki milyar görüntüleme ile yeni bir dönüm noktasına ulaştık."
112
- },
113
- "zh": {
114
- "audio": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/zh_f2.flac",
115
- "text": "上个月,我们达到了一个新的里程碑。 我们的YouTube频道观看次数达到了二十亿次,这绝对令人难以置信。"
116
- },
117
  }
118
 
119
  # --- UI Helpers ---
@@ -143,29 +70,6 @@ def get_supported_languages_display() -> str:
143
  {line2}
144
  """
145
 
146
- def format_for_singing(lyrics: str) -> str:
147
- """
148
- Encode melody directly into text for Chatterbox.
149
- NO instructions. ONLY singable text.
150
- """
151
- lines = []
152
- for line in lyrics.splitlines():
153
- line = line.strip()
154
- if not line:
155
- continue
156
-
157
- # simple vowel stretching
158
- line = (
159
- line.replace("a", "aa")
160
- .replace("e", "ee")
161
- .replace("i", "ii")
162
- .replace("o", "oo")
163
- .replace("u", "uu")
164
- )
165
-
166
- lines.append(f"{line} ♪ ...")
167
-
168
- return "\n".join(lines)
169
 
170
 
171
  DEVICE = "cpu"
@@ -228,6 +132,39 @@ def resolve_audio_prompt(language_id: str, provided_path: str | None) -> str | N
228
 
229
 
230
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
  def generate_tts_audio(
232
  text_input: str,
233
  lyrics_input: str,
@@ -244,7 +181,7 @@ def generate_tts_audio(
244
  if current_model is None:
245
  raise RuntimeError("TTS model is not loaded.")
246
 
247
- if seed_num_input != 0:
248
  set_seed(int(seed_num_input))
249
 
250
  chosen_prompt = audio_prompt_path_input or default_audio_for_ui(language_id)
@@ -258,13 +195,21 @@ def generate_tts_audio(
258
  if chosen_prompt:
259
  generate_kwargs["audio_prompt_path"] = chosen_prompt
260
 
261
- # 🔀 Choose Speak vs Sing text
262
- if mode == "Sing 🎵" and lyrics_input.strip():
 
 
 
 
263
  final_text = format_for_singing(lyrics_input)
264
  else:
 
 
265
  final_text = text_input
266
 
267
- # 🔒 CPU-safe inference
 
 
268
  with torch.no_grad():
269
  wav = current_model.generate(
270
  final_text[:300],
@@ -276,11 +221,14 @@ def generate_tts_audio(
276
  return current_model.sr, wav
277
 
278
 
 
 
 
279
  with gr.Blocks() as demo:
280
  gr.Markdown(
281
  """
282
  # Chatterbox Multilingual Demo
283
- Generate high-quality multilingual speech from text or lyrics (sing mode).
284
  """
285
  )
286
 
@@ -298,13 +246,13 @@ with gr.Blocks() as demo:
298
 
299
  text = gr.Textbox(
300
  value=default_text_for_ui(initial_lang),
301
- label="Text (Speak mode)",
302
  max_lines=4
303
  )
304
 
305
  lyrics = gr.Textbox(
306
- label="Lyrics (Sing mode)",
307
- placeholder="Paste lyrics here (one line per verse)",
308
  max_lines=10
309
  )
310
 
@@ -322,36 +270,52 @@ with gr.Blocks() as demo:
322
  )
323
 
324
  exaggeration = gr.Slider(
325
- 0.25, 2, step=0.05,
326
  label="Exaggeration",
327
  value=0.5
328
  )
329
 
330
  cfg_weight = gr.Slider(
331
- 0.2, 1, step=0.05,
332
  label="CFG / Pace",
333
  value=0.5
334
  )
335
 
336
  with gr.Accordion("More options", open=False):
337
  seed_num = gr.Number(value=0, label="Random seed (0 = random)")
338
- temp = gr.Slider(0.05, 5, step=0.05, label="Temperature", value=0.8)
339
 
340
  run_btn = gr.Button("Generate", variant="primary")
341
 
342
  with gr.Column():
343
  audio_output = gr.Audio(label="Output Audio")
344
 
345
- # 🎛️ Auto-tune sliders for Sing mode
 
 
 
346
  def on_mode_change(mode):
347
  if mode == "Sing 🎵":
348
- return 1.25, 1.0, 0.45
349
- return 0.5, 0.8, 0.5
 
 
 
 
 
 
 
 
 
 
 
 
 
350
 
351
  mode.change(
352
  fn=on_mode_change,
353
  inputs=mode,
354
- outputs=[exaggeration, temp, cfg_weight],
355
  show_progress=False
356
  )
357
 
 
22
 
23
 
24
  LANGUAGE_CONFIG = {
25
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  "en": {
27
  "audio": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/en_f1.flac",
28
  "text": "Last month, we reached a new milestone with two billion views on our YouTube channel."
29
  },
30
+
 
 
 
 
 
 
 
31
  "fr": {
32
  "audio": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/fr_f1.flac",
33
  "text": "Le mois dernier, nous avons atteint un nouveau jalon avec deux milliards de vues sur notre chaîne YouTube."
 
40
  "audio": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/hi_f1.flac",
41
  "text": "पिछले महीने हमने एक नया मील का पत्थर छुआ: हमारे YouTube चैनल पर दो अरब व्यूज़।"
42
  },
43
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  }
45
 
46
  # --- UI Helpers ---
 
70
  {line2}
71
  """
72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
 
75
  DEVICE = "cpu"
 
132
 
133
 
134
 
135
+ # ===============================
136
+ # Singing formatter (TEXT ONLY)
137
+ # ===============================
138
+ def format_for_singing(lyrics: str) -> str:
139
+ """
140
+ Encode melody directly into text for Chatterbox.
141
+ NO instructions. ONLY singable text.
142
+ """
143
+ lines = []
144
+
145
+ for line in lyrics.splitlines():
146
+ line = line.strip()
147
+ if not line:
148
+ continue
149
+
150
+ # Light vowel stretching (safe, readable)
151
+ line = (
152
+ line.replace("a", "aa")
153
+ .replace("e", "ee")
154
+ .replace("i", "ii")
155
+ .replace("o", "oo")
156
+ .replace("u", "uu")
157
+ )
158
+
159
+ # Add rhythm + pause
160
+ lines.append(f"{line} ♪ ...")
161
+
162
+ return "\n".join(lines)
163
+
164
+
165
+ # ===============================
166
+ # TTS generator (FIXED)
167
+ # ===============================
168
  def generate_tts_audio(
169
  text_input: str,
170
  lyrics_input: str,
 
181
  if current_model is None:
182
  raise RuntimeError("TTS model is not loaded.")
183
 
184
+ if seed_num_input and seed_num_input != 0:
185
  set_seed(int(seed_num_input))
186
 
187
  chosen_prompt = audio_prompt_path_input or default_audio_for_ui(language_id)
 
195
  if chosen_prompt:
196
  generate_kwargs["audio_prompt_path"] = chosen_prompt
197
 
198
+ # ===============================
199
+ # STRICT MODE TOGGLE (IMPORTANT)
200
+ # ===============================
201
+ if mode == "Sing 🎵":
202
+ if not lyrics_input.strip():
203
+ raise gr.Error("Please enter lyrics for Sing mode.")
204
  final_text = format_for_singing(lyrics_input)
205
  else:
206
+ if not text_input.strip():
207
+ raise gr.Error("Please enter text for Speak mode.")
208
  final_text = text_input
209
 
210
+ # ===============================
211
+ # CPU-safe inference
212
+ # ===============================
213
  with torch.no_grad():
214
  wav = current_model.generate(
215
  final_text[:300],
 
221
  return current_model.sr, wav
222
 
223
 
224
+ # ===============================
225
+ # GRADIO UI
226
+ # ===============================
227
  with gr.Blocks() as demo:
228
  gr.Markdown(
229
  """
230
  # Chatterbox Multilingual Demo
231
+ Speak or sing text using Chatterbox (CPU-only).
232
  """
233
  )
234
 
 
246
 
247
  text = gr.Textbox(
248
  value=default_text_for_ui(initial_lang),
249
+ label="Text (Speak mode only)",
250
  max_lines=4
251
  )
252
 
253
  lyrics = gr.Textbox(
254
+ label="Lyrics (Sing mode only)",
255
+ placeholder="Paste singable lyrics (one line per phrase)",
256
  max_lines=10
257
  )
258
 
 
270
  )
271
 
272
  exaggeration = gr.Slider(
273
+ 0.25, 2.0, step=0.05,
274
  label="Exaggeration",
275
  value=0.5
276
  )
277
 
278
  cfg_weight = gr.Slider(
279
+ 0.2, 1.0, step=0.05,
280
  label="CFG / Pace",
281
  value=0.5
282
  )
283
 
284
  with gr.Accordion("More options", open=False):
285
  seed_num = gr.Number(value=0, label="Random seed (0 = random)")
286
+ temp = gr.Slider(0.05, 5.0, step=0.05, label="Temperature", value=0.8)
287
 
288
  run_btn = gr.Button("Generate", variant="primary")
289
 
290
  with gr.Column():
291
  audio_output = gr.Audio(label="Output Audio")
292
 
293
+
294
+ # ===============================
295
+ # AUTO-TUNE FOR SING MODE
296
+ # ===============================
297
  def on_mode_change(mode):
298
  if mode == "Sing 🎵":
299
+ return (
300
+ gr.update(visible=False), # hide text
301
+ gr.update(visible=True), # show lyrics
302
+ 1.3, # exaggeration
303
+ 1.0, # temperature
304
+ 0.45 # cfg
305
+ )
306
+ else:
307
+ return (
308
+ gr.update(visible=True),
309
+ gr.update(visible=False),
310
+ 0.5,
311
+ 0.8,
312
+ 0.5
313
+ )
314
 
315
  mode.change(
316
  fn=on_mode_change,
317
  inputs=mode,
318
+ outputs=[text, lyrics, exaggeration, temp, cfg_weight],
319
  show_progress=False
320
  )
321