DineshJ96 commited on
Commit
d926715
·
1 Parent(s): 127cc86

app file updated- process audio

Browse files
Files changed (1) hide show
  1. app.py +22 -6
app.py CHANGED
@@ -144,9 +144,25 @@ def process_audio_for_web(audio_input):
144
 
145
  # 2. Align
146
  print("Aligning transcription with audio...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  transcription_result = whisperx.align(transcription_result["segments"], align_model_local, audio, return_char_alignments=False)
148
- transcription_result = whisperx.align(transcription_result["segments"], align_model_local, audio, device, return_char_alignments=False)
149
- del align_model_local
 
150
  gc.collect()
151
  if device == "cuda":
152
  torch.cuda.empty_cache()
@@ -157,7 +173,7 @@ def process_audio_for_web(audio_input):
157
  final_result = whisperx.assign_word_speakers(diarize_segments, transcription_result)
158
 
159
  speaker_transcripts_raw = {}
160
- # Prepare for display in dianzed_transcription_output
161
  diarized_display_lines = []
162
 
163
  for segment in final_result["segments"]:
@@ -205,8 +221,8 @@ def process_audio_for_web(audio_input):
205
  "translated_text": translated_text_output
206
  })
207
  translated_display_lines.append(f"[{seg['start']:.2f}s - {seg['end']:.2f}s] Original: {original_text}")
208
- translated_display_lines.append(f" Translated: {translated_text_output}")
209
-
210
  translated_output_str = "\n".join(translated_display_lines)
211
  else:
212
  translated_output_str = "Translation model not loaded. Skipping translation."
@@ -228,7 +244,7 @@ def process_audio_for_web(audio_input):
228
  f.write(f"\n### Speaker {speaker} ###\n")
229
  for seg in segments:
230
  f.write(f"[{seg['start']:.2f}s - {seg['end']:.2f}s] Original: {seg['original_text']}\n")
231
- f.write(f" Translated: {seg['translated_text']}\n")
232
  else:
233
  f.write("Translation output not available or translation model not loaded.\n")
234
 
 
144
 
145
  # 2. Align
146
  print("Aligning transcription with audio...")
147
+ align_model_local = None # Initialize to None to prevent UnboundLocalError in outer except
148
+ try:
149
+ # Load the alignment model based on the detected language
150
+ # The 'device' parameter is passed here, not to whisperx.align
151
+ align_model_local, metadata = whisperx.load_align_model(language_code=detected_language, device=device)
152
+ except Exception as e:
153
+ # Handle cases where the alignment model for the detected language cannot be loaded
154
+ print(f"Error loading alignment model for language '{detected_language}': {e}")
155
+ import traceback
156
+ print(traceback.format_exc())
157
+ # Provide a user-friendly message, possibly suggesting supported languages
158
+ return f"Error: Could not load alignment model for language '{detected_language}'. Alignment is typically supported for English, French, German, Spanish, Italian, Japanese, Chinese, Dutch, and Portuguese. Details: {e}", "", "", None
159
+
160
+ # Perform alignment using the loaded model
161
+ # Removed 'device' from here as the model itself is already on the correct device
162
  transcription_result = whisperx.align(transcription_result["segments"], align_model_local, audio, return_char_alignments=False)
163
+
164
+ # Removed the duplicate whisperx.align call and 'del align_model_local'
165
+ # as it can cause issues if an error occurs later.
166
  gc.collect()
167
  if device == "cuda":
168
  torch.cuda.empty_cache()
 
173
  final_result = whisperx.assign_word_speakers(diarize_segments, transcription_result)
174
 
175
  speaker_transcripts_raw = {}
176
+ # Prepare for display in diarized_transcription_output
177
  diarized_display_lines = []
178
 
179
  for segment in final_result["segments"]:
 
221
  "translated_text": translated_text_output
222
  })
223
  translated_display_lines.append(f"[{seg['start']:.2f}s - {seg['end']:.2f}s] Original: {original_text}")
224
+ translated_display_lines.append(f" Translated: {translated_text_output}")
225
+
226
  translated_output_str = "\n".join(translated_display_lines)
227
  else:
228
  translated_output_str = "Translation model not loaded. Skipping translation."
 
244
  f.write(f"\n### Speaker {speaker} ###\n")
245
  for seg in segments:
246
  f.write(f"[{seg['start']:.2f}s - {seg['end']:.2f}s] Original: {seg['original_text']}\n")
247
+ f.write(f" Translated: {seg['translated_text']}\n")
248
  else:
249
  f.write("Translation output not available or translation model not loaded.\n")
250