Spaces:
Runtime error
Runtime error
Update app.py
Browse filesNormalize the audio volume from the microphone
app.py
CHANGED
|
@@ -142,7 +142,14 @@ def transcribe_microphone_stream(audio_chunk, stream_state, language):
|
|
| 142 |
sample_rate, waveform_np = audio_chunk
|
| 143 |
if len(waveform_np.shape) > 1:
|
| 144 |
waveform_np = waveform_np.mean(axis=1)
|
| 145 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
# Resample if needed
|
| 147 |
if sample_rate != 16000:
|
| 148 |
waveform = torch.from_numpy(waveform_np).float().unsqueeze(0)
|
|
@@ -151,6 +158,8 @@ def transcribe_microphone_stream(audio_chunk, stream_state, language):
|
|
| 151 |
waveform_np = waveform.squeeze(0).numpy()
|
| 152 |
sample_rate = 16000
|
| 153 |
|
|
|
|
|
|
|
| 154 |
# Initialize stream if first chunk
|
| 155 |
if stream_state is None:
|
| 156 |
stream_state = recognizer.create_stream()
|
|
|
|
| 142 |
sample_rate, waveform_np = audio_chunk
|
| 143 |
if len(waveform_np.shape) > 1:
|
| 144 |
waveform_np = waveform_np.mean(axis=1)
|
| 145 |
+
|
| 146 |
+
# Normalize if needed
|
| 147 |
+
if waveform_np.dtype != np.float32:
|
| 148 |
+
waveform_np = waveform_np.astype(np.float32)
|
| 149 |
+
|
| 150 |
+
if np.max(np.abs(waveform_np)) > 1.0:
|
| 151 |
+
waveform_np = waveform_np / np.max(np.abs(waveform_np))
|
| 152 |
+
|
| 153 |
# Resample if needed
|
| 154 |
if sample_rate != 16000:
|
| 155 |
waveform = torch.from_numpy(waveform_np).float().unsqueeze(0)
|
|
|
|
| 158 |
waveform_np = waveform.squeeze(0).numpy()
|
| 159 |
sample_rate = 16000
|
| 160 |
|
| 161 |
+
waveform_np = np.clip(waveform_np, -1.0, 1.0)
|
| 162 |
+
|
| 163 |
# Initialize stream if first chunk
|
| 164 |
if stream_state is None:
|
| 165 |
stream_state = recognizer.create_stream()
|