mrrtmob commited on
Commit
cb33cc1
·
1 Parent(s): 1398bcf

Normalize audio samples to int16 format to prevent Gradio warnings

Browse files
Files changed (1) hide show
  1. app.py +6 -0
app.py CHANGED
@@ -534,6 +534,12 @@ def generate_zero_shot_speech(audio_file, transcript, target_text, progress=gr.P
534
 
535
  if wav_forms and len(wav_forms) > 0:
536
  audio_samples = wav_forms[0]
 
 
 
 
 
 
537
  print("Zero-shot speech generation completed successfully")
538
  return (24000, audio_samples)
539
  else:
 
534
 
535
  if wav_forms and len(wav_forms) > 0:
536
  audio_samples = wav_forms[0]
537
+ # Convert from float32 to int16 to avoid Gradio warning
538
+ if isinstance(audio_samples, torch.Tensor):
539
+ audio_samples = audio_samples.detach().cpu().numpy()
540
+ # Normalize and convert to int16
541
+ audio_samples = np.clip(audio_samples, -1.0, 1.0)
542
+ audio_samples = (audio_samples * 32767).astype(np.int16)
543
  print("Zero-shot speech generation completed successfully")
544
  return (24000, audio_samples)
545
  else: