Update app.py
Browse files
app.py
CHANGED
|
@@ -173,15 +173,20 @@ def synthesize_speech(synth_text, target_lang, gender="Male", pace="normal", out
|
|
| 173 |
with open(ref_text_path, encoding='utf-8') as f:
|
| 174 |
ref_text = f.read()
|
| 175 |
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
|
| 186 |
def match_audio_duration(original_path, translated_path, output_path="temp_audio_synced.wav"):
|
| 187 |
"""
|
|
|
|
| 173 |
with open(ref_text_path, encoding='utf-8') as f:
|
| 174 |
ref_text = f.read()
|
| 175 |
|
| 176 |
+
print("> Loading IndicF5 TTS model (ai4bharat/IndicF5)...")
|
| 177 |
+
indicf5_repo_id = "ai4bharat/IndicF5"
|
| 178 |
+
tts_model = AutoModel.from_pretrained(indicf5_repo_id, trust_remote_code=True).to(device)
|
| 179 |
+
|
| 180 |
+
audio = tts_model(synth_text, ref_audio_path=ref_audio_path, ref_text=ref_text)
|
| 181 |
+
|
| 182 |
+
if audio.dtype == np.int16:
|
| 183 |
+
audio = audio.astype(np.float32) / 32768.0
|
| 184 |
+
|
| 185 |
+
sf.write(output_path, np.array(audio, dtype=np.float32), samplerate=24000)
|
| 186 |
+
print(f"✅ Speech synthesis complete.")
|
| 187 |
+
print(f"> Final audio saved to: {output_path}")
|
| 188 |
+
|
| 189 |
+
return output_path
|
| 190 |
|
| 191 |
def match_audio_duration(original_path, translated_path, output_path="temp_audio_synced.wav"):
|
| 192 |
"""
|