Spaces:

nsfwalex
/

whisper-transcribe-new

Runtime error

liuyang commited on Jul 20, 2025

Commit

e48217c

1 Parent(s): 6d56dd1

restore diarization initialization

Files changed (1) hide show

app.py CHANGED Viewed

@@ -32,11 +32,39 @@ from faster_whisper import WhisperModel
 from faster_whisper.vad import VadOptions
 import requests
 import base64
 # Lazy global holder ----------------------------------------------------------
 _whisper = None
 _diarizer = None
 @spaces.GPU   # GPU is guaranteed to exist *inside* this function
 def _load_models():
     global _whisper, _diarizer
@@ -48,19 +76,6 @@ def _load_models():
             compute_type="float16",
         )
         print("Whisper model loaded successfully")
-    if _diarizer is None:
-        print("Loading diarization model...")
-        try:
-            from pyannote.audio import Pipeline
-            _diarizer = Pipeline.from_pretrained(
-                "pyannote/speaker-diarization-3.1",
-                use_auth_token=os.getenv("HF_TOKEN"),
-                torch_dtype=torch.float16,
-            ).to(torch.device("cuda"))
-            print("Diarization model loaded successfully")
-        except Exception as e:
-            print(f"Could not load diarization model: {e}")
-            _diarizer = None
     return _whisper, _diarizer
 # -----------------------------------------------------------------------------

 from faster_whisper.vad import VadOptions
 import requests
 import base64
+from pyannote.audio import Pipeline
 # Lazy global holder ----------------------------------------------------------
 _whisper = None
 _diarizer = None
+# Create global diarization pipeline
+try:
+    print("Loading diarization model...")
+    torch.backends.cuda.matmul.allow_tf32 = True
+    torch.backends.cudnn.allow_tf32 = True
+    torch.set_float32_matmul_precision('high')
+    _diarizer = Pipeline.from_pretrained(
+        "pyannote/speaker-diarization-3.1",
+        use_auth_token=os.getenv("HF_TOKEN"),
+        torch_dtype=torch.float16,
+    ).to(torch.device("cuda"))
+    _diarizer.model.half()                       # FP16
+    for m in _diarizer.model.modules():          # compact LSTM weights
+        if isinstance(m, torch.nn.LSTM):
+            m.flatten_parameters()
+    _diarizer.model = torch.compile(_diarizer.model, mode="reduce-overhead")
+    print("Diarization model loaded successfully")
+except Exception as e:
+    import traceback
+    traceback.print_exc()
+    print(f"Could not load diarization model: {e}")
+    _diarizer = None
 @spaces.GPU   # GPU is guaranteed to exist *inside* this function
 def _load_models():
     global _whisper, _diarizer
             compute_type="float16",
         )
         print("Whisper model loaded successfully")
     return _whisper, _diarizer
 # -----------------------------------------------------------------------------