hafsaabd82 commited on
Commit
9fb2b44
·
verified ·
1 Parent(s): 4991c43

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -16
app.py CHANGED
@@ -38,6 +38,12 @@ except Exception as e:
38
  print(f"Error loading pyannote pipeline: {type(e).__name__}: {e}. Diarization will be skipped.")
39
  diarization_pipeline = None
40
  global_diarizer = diarization_pipeline
 
 
 
 
 
 
41
  model_name = "medium"
42
  class TimelineItem(BaseModel):
43
  start: float
@@ -191,26 +197,44 @@ def analyze_audio(audio_file: str,
191
  result = model.transcribe(audio_loaded, batch_size=4 )
192
  language_code = result.get("language") or result.get("detected_language") or "en"
193
  results.languageCode = language_code
 
194
  print(f"Detected language: {language_code}. Aligning transcription...")
195
- try:
196
- align_model, metadata = whisperx.load_align_model(language_code=language_code, device=device)
197
- aligned = whisperx.align(result["segments"], align_model, metadata, audio_loaded, device)
198
- except Exception:
199
- aligned = {"segments": result["segments"]}
200
- warn(results, "ALIGN_SKIP", "Alignment unavailable; using raw Whisper segments.")
201
- diarize_output = None
202
- if global_diarizer is not None:
203
- print("Performing speaker diarization (Requires HF_TOKEN)...")
204
  try:
205
- diarize_output = global_diarizer(audio_for_model)
206
- for segment, _, label in diarize_output.itertracks(yield_label=True):
207
- print(f"start={segment.start:.1f}s stop={segment.end:.1f}s {label}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
  except Exception as e:
209
- warn(results, "DIAR_SKIP", f"Error during diarization (likely token/model failure): {type(e).__name__}: {e}. Skipping diarization.")
210
- diarize_output = None
211
  else:
212
- warn(results, "DIAR_SKIP", "HF_TOKEN not set. Skipping speaker diarization.")
213
- print("Assigning speakers to words...")
214
  try:
215
  diarize_segments_for_assignment = []
216
  if diarize_output is not None and hasattr(diarize_output, "itertracks"):
 
38
  print(f"Error loading pyannote pipeline: {type(e).__name__}: {e}. Diarization will be skipped.")
39
  diarization_pipeline = None
40
  global_diarizer = diarization_pipeline
41
+ ALIGN_MODEL_MAP = {
42
+ "ur": "kingabzpro/wav2vec2-large-xls-r-300m-Urdu",
43
+ "pa": "kingabzpro/wav2vec2-large-xlsr-53-punjabi",
44
+ "sd": "Abdullah104/wav2vec2-large-xls-r-300m-sindhi-kaggle",
45
+ "ps": "ihanif/wav2vec2-xls-r-300m-pashto",
46
+ }
47
  model_name = "medium"
48
  class TimelineItem(BaseModel):
49
  start: float
 
197
  result = model.transcribe(audio_loaded, batch_size=4 )
198
  language_code = result.get("language") or result.get("detected_language") or "en"
199
  results.languageCode = language_code
200
+ global global_align_model_cache
201
  print(f"Detected language: {language_code}. Aligning transcription...")
202
+ aligned = {"segments": result["segments"]}
203
+ align_model = None
204
+ metadata = None
205
+ if language_code not in global_align_model_cache:
206
+ align_model_name = ALIGN_MODEL_MAP.get(language_code)
 
 
 
 
207
  try:
208
+ if align_model_name:
209
+ print(f"Loading custom alignment model for {language_code}: {align_model_name}...")
210
+ align_model, metadata = whisperx.load_align_model(
211
+ language_code=language_code,
212
+ model_name=align_model_name,
213
+ device=device
214
+ )
215
+ global_align_model_cache[language_code] = (align_model, metadata)
216
+ print(f"Alignment model loaded/cached for language: {language_code}")
217
+
218
+ except Exception as e:
219
+ warn(results, "ALIGN_LOAD_FAIL", f"Failed to load alignment model for {language_code}: {e}. Alignment skipped.")
220
+ global_align_model_cache[language_code] = (None, None) # Cache the failure/skip
221
+ else:
222
+ align_model, metadata = global_align_model_cache[language_code]
223
+ if align_model:
224
+ print(f"Alignment model loaded from cache for language: {language_code}")
225
+ if align_model:
226
+ try:
227
+ aligned = whisperx.align(
228
+ result["segments"],
229
+ align_model,
230
+ metadata,
231
+ audio_loaded,
232
+ device
233
+ )
234
  except Exception as e:
235
+ warn(results, "ALIGN_RUN_FAIL", f"Alignment execution failed: {type(e).__name__}: {e}. Using raw segments.")
 
236
  else:
237
+ warn(results, "ALIGN_SKIP", "Alignment model unavailable; using raw Whisper segments.")
 
238
  try:
239
  diarize_segments_for_assignment = []
240
  if diarize_output is not None and hasattr(diarize_output, "itertracks"):