Spaces:
Running on Zero
Running on Zero
Normalize audio samples to int16 format to prevent Gradio warnings
Browse files
app.py
CHANGED
|
@@ -534,6 +534,12 @@ def generate_zero_shot_speech(audio_file, transcript, target_text, progress=gr.P
|
|
| 534 |
|
| 535 |
if wav_forms and len(wav_forms) > 0:
|
| 536 |
audio_samples = wav_forms[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 537 |
print("Zero-shot speech generation completed successfully")
|
| 538 |
return (24000, audio_samples)
|
| 539 |
else:
|
|
|
|
| 534 |
|
| 535 |
if wav_forms and len(wav_forms) > 0:
|
| 536 |
audio_samples = wav_forms[0]
|
| 537 |
+
# Convert from float32 to int16 to avoid Gradio warning
|
| 538 |
+
if isinstance(audio_samples, torch.Tensor):
|
| 539 |
+
audio_samples = audio_samples.detach().cpu().numpy()
|
| 540 |
+
# Normalize and convert to int16
|
| 541 |
+
audio_samples = np.clip(audio_samples, -1.0, 1.0)
|
| 542 |
+
audio_samples = (audio_samples * 32767).astype(np.int16)
|
| 543 |
print("Zero-shot speech generation completed successfully")
|
| 544 |
return (24000, audio_samples)
|
| 545 |
else:
|