NLPV commited on
Commit
3a12dbe
·
verified ·
1 Parent(s): c6df900

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -50
app.py CHANGED
@@ -1,57 +1,26 @@
1
  import gradio as gr
2
- import librosa
3
- import numpy as np
4
 
5
- # Try to import a helper function to load the TTS model.
6
- try:
7
- from parler_tts import load_tts
8
- except ImportError:
9
- raise ImportError(
10
- "The function 'load_tts' could not be imported from parler_tts. "
11
- "Please check the documentation or the installed package structure for the correct API."
12
- )
13
-
14
- # Initialize the TTS model for Hindi with voice cloning enabled.
15
- # (Parameters may vary depending on the actual API.)
16
- tts_model = load_tts(language="hi", voice_cloning=True)
17
-
18
- def extract_speaker_embedding(voice_sample_path):
19
- """
20
- Extract a speaker embedding from an uploaded Hindi voice sample.
21
- This function loads the audio file, resamples to 16 kHz, and extracts the speaker embedding.
22
- """
23
- wav, sr = librosa.load(voice_sample_path, sr=16000)
24
- # Assuming the tts_model provides a method for embedding extraction.
25
- speaker_embedding = tts_model.extract_embedding(wav)
26
- return speaker_embedding
27
-
28
- def synthesize_voice_with_cloning(voice_sample_path, hindi_text):
29
- """
30
- Synthesize Hindi speech from text, cloning the voice characteristics from the uploaded sample.
31
- """
32
- # Extract the speaker embedding.
33
- speaker_embedding = extract_speaker_embedding(voice_sample_path)
34
-
35
- # Synthesize speech using the provided text and speaker embedding.
36
- audio_waveform = tts_model.synthesize(text=hindi_text, speaker_embedding=speaker_embedding)
37
-
38
- # Convert output to a numpy array if necessary.
39
- if not isinstance(audio_waveform, np.ndarray):
40
- audio_waveform = np.array(audio_waveform)
41
 
42
- return audio_waveform
 
 
 
 
 
43
 
44
- # Create a Gradio interface.
45
  iface = gr.Interface(
46
- fn=synthesize_voice_with_cloning,
47
- inputs=[
48
- gr.Audio(source="upload", type="filepath", label="Upload Hindi Voice Sample"),
49
- gr.Textbox(lines=3, placeholder="Enter Hindi text here...", label="Hindi Text")
50
- ],
51
- outputs=gr.Audio(label="Generated Speech"),
52
- title="Hindi TTS with Voice Cloning",
53
- description="Upload a Hindi voice sample and enter Hindi text to generate cloned speech."
54
  )
55
 
56
- if __name__ == "__main__":
57
- iface.launch()
 
1
  import gradio as gr
2
+ from gtts import gTTS
3
+ import tempfile
4
 
5
+ def text_to_speech(text):
6
+ # Generate speech using gTTS with Hindi language ('hi')
7
+ tts = gTTS(text=text, lang='hi')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
+ # Save the audio to a temporary file
10
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
11
+ tts.save(fp.name)
12
+ audio_file = fp.name
13
+
14
+ return audio_file
15
 
16
+ # Create the Gradio interface
17
  iface = gr.Interface(
18
+ fn=text_to_speech,
19
+ inputs=gr.Textbox(lines=5, placeholder="हिंदी में टेक्स्ट दर्ज करें...", label="Enter Hindi Text"),
20
+ outputs=gr.Audio(type="file", label="Generated Speech"),
21
+ title="Hindi Text-to-Speech",
22
+ description="Convert Hindi text into speech using gTTS."
 
 
 
23
  )
24
 
25
+ # Launch the app
26
+ iface.launch()