Michael Hu commited on
Commit
1561c93
·
1 Parent(s): a9213f0

refactor: rename generate_speech to generate_chatterbox_speech and add generation kwargs

Browse files

- Rename function for clarity and consistency with Chatterbox TTS
- Add exaggeration, temperature, and cfg_weight parameters to model.generate calls
- Update all references to use the new function name

Files changed (1) hide show
  1. app.py +12 -5
app.py CHANGED
@@ -83,7 +83,7 @@ voices_by_lang = scan_piper_voices()
83
 
84
  # No global piper_voice, load dynamically
85
 
86
- def generate_speech(text, language, audio_prompt=None):
87
  """
88
  Generate speech from text using Chatterbox multilingual TTS with optional audio prompt
89
 
@@ -102,14 +102,21 @@ def generate_speech(text, language, audio_prompt=None):
102
  }
103
 
104
  language_id = language_map.get(language, "en")
 
 
 
 
 
 
 
105
 
106
  # Generate speech using Chatterbox
107
  if audio_prompt and os.path.exists(audio_prompt):
108
  # Use audio prompt for voice cloning
109
- wav = model.generate(text, language_id=language_id, audio_prompt_path=audio_prompt)
110
  else:
111
  # Generate without audio prompt (default voice)
112
- wav = model.generate(text, language_id=language_id)
113
 
114
  # Save to a temporary file
115
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
@@ -280,13 +287,13 @@ with gr.Blocks(css=custom_css, title="🎙️ TTS Model Gallery", theme=gr.theme
280
  ],
281
  inputs=[text_input, language_selection, audio_prompt],
282
  outputs=audio_output,
283
- fn=generate_speech,
284
  cache_examples=False
285
  )
286
 
287
  # Connect the generate button to the function
288
  generate_btn.click(
289
- fn=generate_speech,
290
  inputs=[text_input, language_selection, audio_prompt],
291
  outputs=audio_output
292
  )
 
83
 
84
  # No global piper_voice, load dynamically
85
 
86
+ def generate_chatterbox_speech(text, language, audio_prompt=None):
87
  """
88
  Generate speech from text using Chatterbox multilingual TTS with optional audio prompt
89
 
 
102
  }
103
 
104
  language_id = language_map.get(language, "en")
105
+
106
+ # https://huggingface.co/spaces/ResembleAI/Chatterbox/blob/main/app.py#L64-L67
107
+ generate_kwargs = {
108
+ "exaggeration": 0.5,
109
+ "temperature": 0.8,
110
+ "cfg_weight": 0.3,
111
+ }
112
 
113
  # Generate speech using Chatterbox
114
  if audio_prompt and os.path.exists(audio_prompt):
115
  # Use audio prompt for voice cloning
116
+ wav = model.generate(text, language_id=language_id, audio_prompt_path=audio_prompt, **generate_kwargs)
117
  else:
118
  # Generate without audio prompt (default voice)
119
+ wav = model.generate(text, language_id=language_id, **generate_kwargs)
120
 
121
  # Save to a temporary file
122
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
 
287
  ],
288
  inputs=[text_input, language_selection, audio_prompt],
289
  outputs=audio_output,
290
+ fn=generate_chatterbox_speech,
291
  cache_examples=False
292
  )
293
 
294
  # Connect the generate button to the function
295
  generate_btn.click(
296
+ fn=generate_chatterbox_speech,
297
  inputs=[text_input, language_selection, audio_prompt],
298
  outputs=audio_output
299
  )