Spaces:
Sleeping
Sleeping
Commit ·
5bff499
1
Parent(s): ddae84a
Fix transcription errors by improving audio segment handling\n\n- Add checks for empty audio segments to avoid creating invalid files\n- Pad very short audio segments to ensure Whisper compatibility\n- Use explicit WAV format with PCM_16 subtype for better compatibility\n- Add error handling around transcription to gracefully handle segment errors
Browse files
app.py
CHANGED
|
@@ -189,19 +189,35 @@ class DiarizationTranscriptionTranslation:
|
|
| 189 |
end_sample = int(segment["end"] * orig_sr)
|
| 190 |
segment_audio = audio[start_sample:end_sample]
|
| 191 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
# Save the segment as a temporary file for Whisper
|
| 193 |
temp_file = f"temp_segment_{segment['start']}_{segment['end']}.wav"
|
| 194 |
-
|
|
|
|
| 195 |
|
| 196 |
# Transcribe the segment
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 205 |
|
| 206 |
# Translate if necessary
|
| 207 |
translated_text = self.translate_text(transcribed_text)
|
|
|
|
| 189 |
end_sample = int(segment["end"] * orig_sr)
|
| 190 |
segment_audio = audio[start_sample:end_sample]
|
| 191 |
|
| 192 |
+
# Ensure segment_audio is not empty
|
| 193 |
+
if len(segment_audio) == 0:
|
| 194 |
+
continue # Skip empty segments
|
| 195 |
+
|
| 196 |
+
# Add a small amount of silence if segment is too short for Whisper
|
| 197 |
+
if len(segment_audio) < orig_sr * 0.1: # Less than 0.1 seconds
|
| 198 |
+
min_samples = int(orig_sr * 0.1)
|
| 199 |
+
zeros_to_add = min_samples - len(segment_audio)
|
| 200 |
+
segment_audio = np.pad(segment_audio, (0, zeros_to_add), mode='constant')
|
| 201 |
+
|
| 202 |
# Save the segment as a temporary file for Whisper
|
| 203 |
temp_file = f"temp_segment_{segment['start']}_{segment['end']}.wav"
|
| 204 |
+
# Use subtype parameter to ensure proper WAV format
|
| 205 |
+
sf.write(temp_file, segment_audio, orig_sr, format='WAV', subtype='PCM_16')
|
| 206 |
|
| 207 |
# Transcribe the segment
|
| 208 |
+
try:
|
| 209 |
+
transcription_result = self.transcribe_audio(temp_file)
|
| 210 |
+
# Handle both possible return formats
|
| 211 |
+
if isinstance(transcription_result, dict) and "text" in transcription_result:
|
| 212 |
+
transcribed_text = transcription_result["text"]
|
| 213 |
+
elif isinstance(transcription_result, str):
|
| 214 |
+
transcribed_text = transcription_result
|
| 215 |
+
else:
|
| 216 |
+
transcribed_text = str(transcription_result)
|
| 217 |
+
except Exception as e:
|
| 218 |
+
print(f"Error transcribing segment {temp_file}: {str(e)}")
|
| 219 |
+
transcribed_text = f"Transcription error: {str(e)}"
|
| 220 |
+
# Continue with the error message as the transcription
|
| 221 |
|
| 222 |
# Translate if necessary
|
| 223 |
translated_text = self.translate_text(transcribed_text)
|