Spaces:

hafsaabd82
/

Audio-Analyzer

Sleeping

App Files Files Community

hafsaabd82 commited on Dec 8, 2025

Commit

7d5900a

verified ·

1 Parent(s): c23d4c5

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -13

app.py CHANGED Viewed

@@ -27,21 +27,25 @@ Segment: Any = None
 device = "cuda" if torch.cuda.is_available() else "cpu"
 COMPUTE_TYPE = "float16" if device == "cuda" else "float32"
 token = os.environ.get("HF_TOKEN")
-try:
-    if token:
         pyannote_device = torch.device(device)
-        diarization_pipeline = Pipeline.from_pretrained(
             "pyannote/speaker-diarization-3.1",
             use_auth_token=token
         ).to(pyannote_device)
-        print("Pyannote pipeline loaded successfully.")
-    else:
-        diarization_pipeline = None
-except Exception as e:
-    print(f"Error loading pyannote pipeline: {type(e).__name__}: {e}. Diarization will be skipped.")
-    diarization_pipeline = None
-global_diarizer = diarization_pipeline
 model_name = "medium"
 ALIGN_MODEL_MAP = {
     "ur": "kingabzpro/wav2vec2-large-xls-r-300m-Urdu"}
@@ -197,6 +201,9 @@ def analyze_audio(audio_file: str,
             temp_preproc = None
     start_ml_time = time.time()
     try:
         print(f"Loading Whisper model '{model_name}' on {device}...")
         model = whisperx.load_model(model_name, device, compute_type="float32")
@@ -250,16 +257,19 @@ def analyze_audio(audio_file: str,
             warn(results, "ALIGN_SKIP", "Alignment model unavailable; using raw Whisper segments.")
         print("Cleaning up Whisper model memory...")
         del model
         del audio_loaded
         if device == "cuda":
             torch.cuda.empty_cache()
         gc.collect()
         print("Memory cleanup complete.")
         diarize_output = None
-        if global_diarizer is not None:
             print("Performing speaker diarization (Requires HF_TOKEN)...")
             try:
-                diarize_output = global_diarizer(audio_for_model)
                 for segment, _, label in diarize_output.itertracks(yield_label=True):
                     print(f"start={segment.start:.1f}s stop={segment.end:.1f}s {label}")
             except Exception as e:
@@ -267,7 +277,15 @@ def analyze_audio(audio_file: str,
                 diarize_output = None
         else:
             warn(results, "DIAR_SKIP", "HF_TOKEN not set or Diarization Pipeline failed to load globally. Skipping speaker diarization.")
-        print("Assigning speakers to words...")
         try:
             diarize_segments_for_assignment = []
             if diarize_output is not None and hasattr(diarize_output, "itertracks"):

 device = "cuda" if torch.cuda.is_available() else "cpu"
 COMPUTE_TYPE = "float16" if device == "cuda" else "float32"
+BATCH_SIZE = 16
 token = os.environ.get("HF_TOKEN")
+global_diarizer = None
+def load_pyannote_pipeline():
+    """Loads and returns the Pyannote Diarization pipeline."""
+    if not token:
+        print("HF_TOKEN not set. Diarization is unavailable.")
+        return None
+    try:
         pyannote_device = torch.device(device)
+        pipeline = Pipeline.from_pretrained(
             "pyannote/speaker-diarization-3.1",
             use_auth_token=token
         ).to(pyannote_device)
+        print("Pyannote pipeline loaded dynamically.")
+        return pipeline
+    except Exception as e:
+        print(f"Error loading pyannote pipeline dynamically: {type(e).__name__}: {e}. Diarization will be skipped.")
+        return None
 model_name = "medium"
 ALIGN_MODEL_MAP = {
     "ur": "kingabzpro/wav2vec2-large-xls-r-300m-Urdu"}
             temp_preproc = None
     start_ml_time = time.time()
+    model = None
+    audio_loaded = None
+    diarization_pipeline = None
     try:
         print(f"Loading Whisper model '{model_name}' on {device}...")
         model = whisperx.load_model(model_name, device, compute_type="float32")
             warn(results, "ALIGN_SKIP", "Alignment model unavailable; using raw Whisper segments.")
         print("Cleaning up Whisper model memory...")
         del model
+        model = None
         del audio_loaded
+        audio_loaded = None
         if device == "cuda":
             torch.cuda.empty_cache()
         gc.collect()
         print("Memory cleanup complete.")
         diarize_output = None
+        diarization_pipeline = load_pyannote_pipeline()
+        if diarization_pipeline is not None:
             print("Performing speaker diarization (Requires HF_TOKEN)...")
             try:
+                diarize_output = diarization_pipeline(audio_for_model)
                 for segment, _, label in diarize_output.itertracks(yield_label=True):
                     print(f"start={segment.start:.1f}s stop={segment.end:.1f}s {label}")
             except Exception as e:
                 diarize_output = None
         else:
             warn(results, "DIAR_SKIP", "HF_TOKEN not set or Diarization Pipeline failed to load globally. Skipping speaker diarization.")
+        if diarization_pipeline is not None:
+             print("Cleaning up Pyannote model memory...")
+             del diarization_pipeline
+             diarization_pipeline = None
+             if device == "cuda":
+                torch.cuda.empty_cache()
+             gc.collect()
+             print("Pyannote cleanup complete.")
+        print("Assigning speakers to words...")
         try:
             diarize_segments_for_assignment = []
             if diarize_output is not None and hasattr(diarize_output, "itertracks"):