rishidahiya commited on
Commit
2d146cc
·
verified ·
1 Parent(s): 80f2c88

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -28
app.py CHANGED
@@ -5,24 +5,40 @@ from vocoder import inference as vocoder_inference
5
  import librosa
6
  import soundfile as sf
7
  import numpy as np
8
- from io import BytesIO
9
  import os
10
 
11
  # Load models at startup
12
  print("Loading models...")
13
- encoder_inference.load_model("saved_models/encoder.pt")
14
- synthesizer = Synthesizer("saved_models/synthesizer.pt")
15
- vocoder_inference.load_model("saved_models/vocoder.pt")
16
- print("✓ Models loaded!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  def clone_voice(voice_sample, text):
19
  """Clone voice and generate speech"""
20
  try:
21
  if voice_sample is None:
22
- return None, "Error: No voice sample provided"
23
 
24
  if not text or len(text.strip()) == 0:
25
- return None, "Error: No text provided"
 
 
26
 
27
  # Extract audio data and sample rate
28
  if isinstance(voice_sample, tuple):
@@ -53,39 +69,41 @@ def clone_voice(voice_sample, text):
53
  wav_generated = vocoder_inference.vocoder(mels[0])
54
  print(f"Generated audio: {wav_generated.shape}")
55
 
56
- return (22050, (wav_generated * 32768).astype(np.int16)), " Success!"
57
 
58
  except Exception as e:
59
  print(f"Error: {e}")
60
  import traceback
61
  traceback.print_exc()
62
- return None, f"Error: {str(e)}"
63
 
64
  # Create Gradio interface
65
- with gr.Blocks(title="Voice Cloning - Real-Time Test") as demo:
66
- gr.Markdown("# 🎤 Voice Cloning Test")
67
- gr.Markdown("Record your voice, enter text, and hear it synthesized in your voice!")
 
68
 
69
  with gr.Row():
70
  with gr.Column():
71
- gr.Markdown("### Step 1: Record Your Voice")
 
72
  voice_input = gr.Audio(
73
- label="Record or Upload Voice Sample (5-10 seconds)",
74
  type="numpy",
75
  sources=["microphone", "upload"]
76
  )
77
 
78
- gr.Markdown("### Step 2: Enter Text")
79
  text_input = gr.Textbox(
80
- label="Text to Synthesize (Hindi or Kannada)",
81
  placeholder="नमस्ते, यह एक परीक्षण है",
82
  lines=3
83
  )
84
 
85
  with gr.Column():
86
- gr.Markdown("### Step 3: Generated Speech")
87
- audio_output = gr.Audio(label="Cloned Voice Output", type="numpy")
88
- status_output = gr.Textbox(label="Status", interactive=False)
89
 
90
  clone_button = gr.Button("🎯 Clone Voice & Generate Speech", variant="primary", size="lg")
91
  clone_button.click(
@@ -95,18 +113,27 @@ with gr.Blocks(title="Voice Cloning - Real-Time Test") as demo:
95
  )
96
 
97
  gr.Markdown("""
98
- ### Instructions:
99
- 1. **Record your voice** using the microphone (5-10 seconds in Hindi/Kannada) OR upload a WAV/OGG file
100
- 2. **Enter text** you want to generate in your voice (Hindi or Kannada)
 
 
 
101
  3. **Click "Clone Voice & Generate Speech"**
102
  4. **Wait** (10-30 seconds on CPU) and hear the result!
103
 
104
- ### Tips:
105
- - Clearer voice samples = better results
106
- - Longer samples (10 seconds) = better voice cloning
107
- - Same language as input voice works best
108
- - Be patient - CPU processing takes time!
 
 
 
 
 
 
109
  """)
110
 
111
  if __name__ == "__main__":
112
- demo.launch(share=True)
 
5
  import librosa
6
  import soundfile as sf
7
  import numpy as np
 
8
  import os
9
 
10
  # Load models at startup
11
  print("Loading models...")
12
+ try:
13
+ encoder_inference.load_model("saved_models/encoder.pt")
14
+ print("✓ Encoder loaded!")
15
+ except Exception as e:
16
+ print(f"Encoder load error: {e}")
17
+
18
+ try:
19
+ synthesizer = Synthesizer("saved_models/synthesizer.pt")
20
+ print("✓ Synthesizer loaded!")
21
+ except Exception as e:
22
+ print(f"Synthesizer load error: {e}")
23
+
24
+ try:
25
+ vocoder_inference.load_model("saved_models/vocoder.pt")
26
+ print("✓ Vocoder loaded!")
27
+ except Exception as e:
28
+ print(f"Vocoder load error: {e}")
29
+
30
+ print("Ready for voice cloning!")
31
 
32
  def clone_voice(voice_sample, text):
33
  """Clone voice and generate speech"""
34
  try:
35
  if voice_sample is None:
36
+ return None, "Error: No voice sample provided"
37
 
38
  if not text or len(text.strip()) == 0:
39
+ return None, "Error: No text provided"
40
+
41
+ print(f"Processing: text='{text}', voice_sample={voice_sample}")
42
 
43
  # Extract audio data and sample rate
44
  if isinstance(voice_sample, tuple):
 
69
  wav_generated = vocoder_inference.vocoder(mels[0])
70
  print(f"Generated audio: {wav_generated.shape}")
71
 
72
+ return (22050, (wav_generated * 32768).astype(np.int16)), " Success! Your voice has been cloned!"
73
 
74
  except Exception as e:
75
  print(f"Error: {e}")
76
  import traceback
77
  traceback.print_exc()
78
+ return None, f"Error: {str(e)}"
79
 
80
  # Create Gradio interface
81
+ with gr.Blocks(title="Voice Cloning - Real-Time Test", theme=gr.themes.Soft()) as demo:
82
+ gr.Markdown("# 🎤 Real-Time Voice Cloning")
83
+ gr.Markdown("**Record your voice, enter text, and hear it synthesized in your voice!**")
84
+ gr.Markdown("---")
85
 
86
  with gr.Row():
87
  with gr.Column():
88
+ gr.Markdown("### 📝 Step 1: Record Your Voice")
89
+ gr.Markdown("Record 5-10 seconds of clear audio in **Hindi or Kannada**")
90
  voice_input = gr.Audio(
91
+ label="🎙️ Voice Sample (Microphone or Upload)",
92
  type="numpy",
93
  sources=["microphone", "upload"]
94
  )
95
 
96
+ gr.Markdown("### ✍️ Step 2: Enter Text")
97
  text_input = gr.Textbox(
98
+ label="📄 Text to Synthesize (Hindi or Kannada)",
99
  placeholder="नमस्ते, यह एक परीक्षण है",
100
  lines=3
101
  )
102
 
103
  with gr.Column():
104
+ gr.Markdown("### 🔊 Step 3: Generated Speech")
105
+ audio_output = gr.Audio(label="🎧 Cloned Voice Output", type="numpy")
106
+ status_output = gr.Textbox(label="📊 Status", interactive=False, lines=2)
107
 
108
  clone_button = gr.Button("🎯 Clone Voice & Generate Speech", variant="primary", size="lg")
109
  clone_button.click(
 
113
  )
114
 
115
  gr.Markdown("""
116
+ ---
117
+ ### 📋 Instructions:
118
+ 1. **Record your voice** using the microphone (5-10 seconds) OR upload a WAV/OGG file
119
+ - Speak clearly in Hindi or Kannada
120
+ - Avoid background noise
121
+ 2. **Enter text** you want to generate in your voice (same language as recording)
122
  3. **Click "Clone Voice & Generate Speech"**
123
  4. **Wait** (10-30 seconds on CPU) and hear the result!
124
 
125
+ ### 💡 Tips for Best Results:
126
+ - **Clear voice samples** = better results
127
+ - **10+ seconds** = better voice cloning accuracy
128
+ - **Same language** as input voice works best
129
+ - **Patience** - CPU processing takes time (GPU would be 2-3x faster)
130
+ - **Quality audio** - minimize background noise
131
+
132
+ ### ⚠️ Limitations:
133
+ - CPU processing is slower (~10-30 seconds per request)
134
+ - Long texts (500+ characters) may timeout
135
+ - Best results with 10+ second voice samples
136
  """)
137
 
138
  if __name__ == "__main__":
139
+ demo.launch(share=False, server_name="0.0.0.0", server_port=7860)