Update app.py
Browse files
app.py
CHANGED
|
@@ -96,7 +96,13 @@ def synthesize_speech(text, ref_audio, ref_text):
|
|
| 96 |
sf.write(temp_audio.name, audio_data, samplerate=sample_rate, format='WAV')
|
| 97 |
temp_audio.flush()
|
| 98 |
|
| 99 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
out = model(text, ref_audio_path=temp_audio.name, ref_text=ref_text)
|
| 101 |
|
| 102 |
# Normalize output and save
|
|
|
|
| 96 |
sf.write(temp_audio.name, audio_data, samplerate=sample_rate, format='WAV')
|
| 97 |
temp_audio.flush()
|
| 98 |
|
| 99 |
+
# Load with soundfile (not torchaudio) and convert to tensor
|
| 100 |
+
wav, sr = sf.read(temp_audio.name)
|
| 101 |
+
wav = torch.from_numpy(wav).float()
|
| 102 |
+
if sr != 24000:
|
| 103 |
+
wav = torchaudio.functional.resample(wav, sr, 24000)
|
| 104 |
+
|
| 105 |
+
# Feed tensor directly to model – never touches torchaudio.load
|
| 106 |
out = model(text, ref_audio_path=temp_audio.name, ref_text=ref_text)
|
| 107 |
|
| 108 |
# Normalize output and save
|