NLPV commited on
Commit
47ccc28
·
verified ·
1 Parent(s): c3e4ce0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -14
app.py CHANGED
@@ -4,8 +4,9 @@ import os
4
  from TTS.api import TTS
5
  from moviepy.editor import VideoFileClip
6
 
7
- # Initialize the TTS model that supports voice cloning.
8
- tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=True, gpu=False)
 
9
 
10
  def convert_mp4_to_wav(mp4_file):
11
  """
@@ -18,35 +19,43 @@ def convert_mp4_to_wav(mp4_file):
18
  video.audio.write_audiofile(wav_filename, verbose=False, logger=None)
19
  return wav_filename
20
 
21
- def text_to_speech_with_voice(text, voice_sample):
22
  """
23
- Converts input text to speech using the voice characteristics extracted from the uploaded voice sample.
24
- If the voice sample is an MP4, its audio is extracted first.
 
25
  """
26
- # Check if the uploaded voice sample is an MP4 file.
27
  file_ext = os.path.splitext(voice_sample)[1].lower()
28
  if file_ext == ".mp4":
29
  voice_sample = convert_mp4_to_wav(voice_sample)
30
 
31
- # Create a temporary file to store the generated audio (WAV format)
32
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as fp:
33
  output_file = fp.name
 
 
 
 
 
 
 
 
34
 
35
- # Generate speech using the voice sample for speaker conditioning,
36
- # while specifying the language as Hindi ("hi") for multi-lingual support.
37
- tts.tts_to_file(text=text, speaker_wav=voice_sample, file_path=output_file, language="hi")
38
  return output_file
39
 
40
- # Create the Gradio interface with a textbox and an audio uploader.
41
  iface = gr.Interface(
42
- fn=text_to_speech_with_voice,
43
  inputs=[
44
  gr.Textbox(lines=5, placeholder="हिंदी में टेक्स्ट दर्ज करें...", label="Text"),
45
  gr.Audio(type="filepath", label="Voice Sample")
46
  ],
47
  outputs=gr.Audio(type="filepath", label="Generated Speech"),
48
- title="Voice Cloning Text-to-Speech",
49
- description="Generate speech in the voice of your sample. Provide a voice sample (audio or MP4) and text, and the model will synthesize speech in that voice."
 
 
 
 
50
  )
51
 
52
  iface.launch()
 
4
  from TTS.api import TTS
5
  from moviepy.editor import VideoFileClip
6
 
7
+ # Use a Hindi TTS model.
8
+ # Note: Many monolingual models (such as this one) may not support voice cloning.
9
+ tts = TTS(model_name="tts_models/hi/tacotron2-DDC", progress_bar=True, gpu=False)
10
 
11
  def convert_mp4_to_wav(mp4_file):
12
  """
 
19
  video.audio.write_audiofile(wav_filename, verbose=False, logger=None)
20
  return wav_filename
21
 
22
+ def text_to_speech(text, voice_sample):
23
  """
24
+ Converts input Hindi text to speech.
25
+ If a voice sample (audio or MP4) is provided, it attempts to use it for voice cloning.
26
+ If voice cloning is not supported by the model, it falls back to the default voice.
27
  """
28
+ # If the uploaded voice sample is an MP4, extract the audio.
29
  file_ext = os.path.splitext(voice_sample)[1].lower()
30
  if file_ext == ".mp4":
31
  voice_sample = convert_mp4_to_wav(voice_sample)
32
 
 
33
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as fp:
34
  output_file = fp.name
35
+
36
+ try:
37
+ # Attempt to generate speech using the provided voice sample.
38
+ tts.tts_to_file(text=text, speaker_wav=voice_sample, file_path=output_file)
39
+ except Exception as e:
40
+ # If voice cloning isn't supported, fallback to using the default voice.
41
+ print("Voice cloning not supported, using default voice. Error:", e)
42
+ tts.tts_to_file(text=text, file_path=output_file)
43
 
 
 
 
44
  return output_file
45
 
 
46
  iface = gr.Interface(
47
+ fn=text_to_speech,
48
  inputs=[
49
  gr.Textbox(lines=5, placeholder="हिंदी में टेक्स्ट दर्ज करें...", label="Text"),
50
  gr.Audio(type="filepath", label="Voice Sample")
51
  ],
52
  outputs=gr.Audio(type="filepath", label="Generated Speech"),
53
+ title="Hindi Text-to-Speech",
54
+ description=(
55
+ "Generate Hindi speech from text. If a voice sample (audio or MP4) is provided, "
56
+ "the app will attempt voice cloning. Note that the Hindi model might not support "
57
+ "voice cloning, in which case the default voice is used."
58
+ )
59
  )
60
 
61
  iface.launch()