hafsaabd82 commited on
Commit
7d5900a
·
verified ·
1 Parent(s): c23d4c5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -13
app.py CHANGED
@@ -27,21 +27,25 @@ Segment: Any = None
27
 
28
  device = "cuda" if torch.cuda.is_available() else "cpu"
29
  COMPUTE_TYPE = "float16" if device == "cuda" else "float32"
 
30
  token = os.environ.get("HF_TOKEN")
31
- try:
32
- if token:
 
 
 
 
 
33
  pyannote_device = torch.device(device)
34
- diarization_pipeline = Pipeline.from_pretrained(
35
  "pyannote/speaker-diarization-3.1",
36
  use_auth_token=token
37
  ).to(pyannote_device)
38
- print("Pyannote pipeline loaded successfully.")
39
- else:
40
- diarization_pipeline = None
41
- except Exception as e:
42
- print(f"Error loading pyannote pipeline: {type(e).__name__}: {e}. Diarization will be skipped.")
43
- diarization_pipeline = None
44
- global_diarizer = diarization_pipeline
45
  model_name = "medium"
46
  ALIGN_MODEL_MAP = {
47
  "ur": "kingabzpro/wav2vec2-large-xls-r-300m-Urdu"}
@@ -197,6 +201,9 @@ def analyze_audio(audio_file: str,
197
  temp_preproc = None
198
 
199
  start_ml_time = time.time()
 
 
 
200
  try:
201
  print(f"Loading Whisper model '{model_name}' on {device}...")
202
  model = whisperx.load_model(model_name, device, compute_type="float32")
@@ -250,16 +257,19 @@ def analyze_audio(audio_file: str,
250
  warn(results, "ALIGN_SKIP", "Alignment model unavailable; using raw Whisper segments.")
251
  print("Cleaning up Whisper model memory...")
252
  del model
 
253
  del audio_loaded
 
254
  if device == "cuda":
255
  torch.cuda.empty_cache()
256
  gc.collect()
257
  print("Memory cleanup complete.")
258
  diarize_output = None
259
- if global_diarizer is not None:
 
260
  print("Performing speaker diarization (Requires HF_TOKEN)...")
261
  try:
262
- diarize_output = global_diarizer(audio_for_model)
263
  for segment, _, label in diarize_output.itertracks(yield_label=True):
264
  print(f"start={segment.start:.1f}s stop={segment.end:.1f}s {label}")
265
  except Exception as e:
@@ -267,7 +277,15 @@ def analyze_audio(audio_file: str,
267
  diarize_output = None
268
  else:
269
  warn(results, "DIAR_SKIP", "HF_TOKEN not set or Diarization Pipeline failed to load globally. Skipping speaker diarization.")
270
- print("Assigning speakers to words...")
 
 
 
 
 
 
 
 
271
  try:
272
  diarize_segments_for_assignment = []
273
  if diarize_output is not None and hasattr(diarize_output, "itertracks"):
 
27
 
28
  device = "cuda" if torch.cuda.is_available() else "cpu"
29
  COMPUTE_TYPE = "float16" if device == "cuda" else "float32"
30
+ BATCH_SIZE = 16
31
  token = os.environ.get("HF_TOKEN")
32
+ global_diarizer = None
33
+ def load_pyannote_pipeline():
34
+ """Loads and returns the Pyannote Diarization pipeline."""
35
+ if not token:
36
+ print("HF_TOKEN not set. Diarization is unavailable.")
37
+ return None
38
+ try:
39
  pyannote_device = torch.device(device)
40
+ pipeline = Pipeline.from_pretrained(
41
  "pyannote/speaker-diarization-3.1",
42
  use_auth_token=token
43
  ).to(pyannote_device)
44
+ print("Pyannote pipeline loaded dynamically.")
45
+ return pipeline
46
+ except Exception as e:
47
+ print(f"Error loading pyannote pipeline dynamically: {type(e).__name__}: {e}. Diarization will be skipped.")
48
+ return None
 
 
49
  model_name = "medium"
50
  ALIGN_MODEL_MAP = {
51
  "ur": "kingabzpro/wav2vec2-large-xls-r-300m-Urdu"}
 
201
  temp_preproc = None
202
 
203
  start_ml_time = time.time()
204
+ model = None
205
+ audio_loaded = None
206
+ diarization_pipeline = None
207
  try:
208
  print(f"Loading Whisper model '{model_name}' on {device}...")
209
  model = whisperx.load_model(model_name, device, compute_type="float32")
 
257
  warn(results, "ALIGN_SKIP", "Alignment model unavailable; using raw Whisper segments.")
258
  print("Cleaning up Whisper model memory...")
259
  del model
260
+ model = None
261
  del audio_loaded
262
+ audio_loaded = None
263
  if device == "cuda":
264
  torch.cuda.empty_cache()
265
  gc.collect()
266
  print("Memory cleanup complete.")
267
  diarize_output = None
268
+ diarization_pipeline = load_pyannote_pipeline()
269
+ if diarization_pipeline is not None:
270
  print("Performing speaker diarization (Requires HF_TOKEN)...")
271
  try:
272
+ diarize_output = diarization_pipeline(audio_for_model)
273
  for segment, _, label in diarize_output.itertracks(yield_label=True):
274
  print(f"start={segment.start:.1f}s stop={segment.end:.1f}s {label}")
275
  except Exception as e:
 
277
  diarize_output = None
278
  else:
279
  warn(results, "DIAR_SKIP", "HF_TOKEN not set or Diarization Pipeline failed to load globally. Skipping speaker diarization.")
280
+ if diarization_pipeline is not None:
281
+ print("Cleaning up Pyannote model memory...")
282
+ del diarization_pipeline
283
+ diarization_pipeline = None
284
+ if device == "cuda":
285
+ torch.cuda.empty_cache()
286
+ gc.collect()
287
+ print("Pyannote cleanup complete.")
288
+ print("Assigning speakers to words...")
289
  try:
290
  diarize_segments_for_assignment = []
291
  if diarize_output is not None and hasattr(diarize_output, "itertracks"):