Dinoking commited on
Commit
4a0144d
·
verified ·
1 Parent(s): b688c52
Files changed (1) hide show
  1. app.py +12 -0
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import gradio as gr
 
2
  import scipy.io.wavfile as wavfile
3
  from transformers import pipeline
4
 
@@ -10,6 +11,17 @@ def speech_to_speech(audio):
10
  # audio = (sample_rate, numpy_array)
11
  sample_rate, audio_data = audio
12
 
 
 
 
 
 
 
 
 
 
 
 
13
  # Speech → Text
14
  result = asr(audio_data, sampling_rate=sample_rate)
15
  text = result["text"]
 
1
  import gradio as gr
2
+ import numpy as np
3
  import scipy.io.wavfile as wavfile
4
  from transformers import pipeline
5
 
 
11
  # audio = (sample_rate, numpy_array)
12
  sample_rate, audio_data = audio
13
 
14
+ # ---- FIX AUDIO FORMAT ----
15
+ # Convert stereo to mono
16
+ if len(audio_data.shape) > 1:
17
+ audio_data = np.mean(audio_data, axis=1)
18
+
19
+ # Convert to float32
20
+ audio_data = audio_data.astype(np.float32)
21
+
22
+ # Normalize
23
+ audio_data = audio_data / np.max(np.abs(audio_data) + 1e-9)
24
+
25
  # Speech → Text
26
  result = asr(audio_data, sampling_rate=sample_rate)
27
  text = result["text"]