Spaces:

NLPV
/

TTS_Hindi

Runtime error

App Files Files Community

NLPV commited on Mar 22, 2025

Commit

47ccc28

verified ·

1 Parent(s): c3e4ce0

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -14

app.py CHANGED Viewed

@@ -4,8 +4,9 @@ import os
 from TTS.api import TTS
 from moviepy.editor import VideoFileClip
-# Initialize the TTS model that supports voice cloning.
-tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=True, gpu=False)
 def convert_mp4_to_wav(mp4_file):
     """
@@ -18,35 +19,43 @@ def convert_mp4_to_wav(mp4_file):
         video.audio.write_audiofile(wav_filename, verbose=False, logger=None)
     return wav_filename
-def text_to_speech_with_voice(text, voice_sample):
     """
-    Converts input text to speech using the voice characteristics extracted from the uploaded voice sample.
-    If the voice sample is an MP4, its audio is extracted first.
     """
-    # Check if the uploaded voice sample is an MP4 file.
     file_ext = os.path.splitext(voice_sample)[1].lower()
     if file_ext == ".mp4":
         voice_sample = convert_mp4_to_wav(voice_sample)
-    # Create a temporary file to store the generated audio (WAV format)
     with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as fp:
         output_file = fp.name
-    # Generate speech using the voice sample for speaker conditioning,
-    # while specifying the language as Hindi ("hi") for multi-lingual support.
-    tts.tts_to_file(text=text, speaker_wav=voice_sample, file_path=output_file, language="hi")
     return output_file
-# Create the Gradio interface with a textbox and an audio uploader.
 iface = gr.Interface(
-    fn=text_to_speech_with_voice,
     inputs=[
         gr.Textbox(lines=5, placeholder="हिंदी में टेक्स्ट दर्ज करें...", label="Text"),
         gr.Audio(type="filepath", label="Voice Sample")
     ],
     outputs=gr.Audio(type="filepath", label="Generated Speech"),
-    title="Voice Cloning Text-to-Speech",
-    description="Generate speech in the voice of your sample. Provide a voice sample (audio or MP4) and text, and the model will synthesize speech in that voice."
 )
 iface.launch()

 from TTS.api import TTS
 from moviepy.editor import VideoFileClip
+# Use a Hindi TTS model.
+# Note: Many monolingual models (such as this one) may not support voice cloning.
+tts = TTS(model_name="tts_models/hi/tacotron2-DDC", progress_bar=True, gpu=False)
 def convert_mp4_to_wav(mp4_file):
     """
         video.audio.write_audiofile(wav_filename, verbose=False, logger=None)
     return wav_filename
+def text_to_speech(text, voice_sample):
     """
+    Converts input Hindi text to speech.
+    If a voice sample (audio or MP4) is provided, it attempts to use it for voice cloning.
+    If voice cloning is not supported by the model, it falls back to the default voice.
     """
+    # If the uploaded voice sample is an MP4, extract the audio.
     file_ext = os.path.splitext(voice_sample)[1].lower()
     if file_ext == ".mp4":
         voice_sample = convert_mp4_to_wav(voice_sample)
     with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as fp:
         output_file = fp.name
+    try:
+        # Attempt to generate speech using the provided voice sample.
+        tts.tts_to_file(text=text, speaker_wav=voice_sample, file_path=output_file)
+    except Exception as e:
+        # If voice cloning isn't supported, fallback to using the default voice.
+        print("Voice cloning not supported, using default voice. Error:", e)
+        tts.tts_to_file(text=text, file_path=output_file)
     return output_file
 iface = gr.Interface(
+    fn=text_to_speech,
     inputs=[
         gr.Textbox(lines=5, placeholder="हिंदी में टेक्स्ट दर्ज करें...", label="Text"),
         gr.Audio(type="filepath", label="Voice Sample")
     ],
     outputs=gr.Audio(type="filepath", label="Generated Speech"),
+    title="Hindi Text-to-Speech",
+    description=(
+        "Generate Hindi speech from text. If a voice sample (audio or MP4) is provided, "
+        "the app will attempt voice cloning. Note that the Hindi model might not support "
+        "voice cloning, in which case the default voice is used."
+    )
 )
 iface.launch()