Spaces:

nsfwalex
/

whisper-transcribe-new

Runtime error

App Files Files Community

liuyang commited on Oct 10

Commit

c18172e

1 Parent(s): 02f099e

preload

Browse files

Files changed (1) hide show

app.py +54 -43

app.py CHANGED Viewed

@@ -363,34 +363,12 @@ _whipser_x_align_models = {}
 _diarizer = None
 _embedder = None
-# Preload all WhisperX transcribe models
-print("Preloading all WhisperX transcribe models...")
-for model_name in MODELS.keys():
-    try:
-        print(f"Loading WhisperX model '{model_name}'...")
-        whisperx_model_name = MODELS[model_name]["whisperx_name"]
-        device = "cuda"  # Load on CPU initially, will move to GPU when needed
-        compute_type = "float16"
-        model = whisperx.load_model(
-            whisperx_model_name,
-            device=device,
-            compute_type=compute_type,
-            download_root=CACHE_ROOT
-        )
-        _whipser_x_transcribe_models[model_name] = model
-        print(f"WhisperX model '{model_name}' loaded successfully")
-    except Exception as e:
-        import traceback
-        traceback.print_exc()
-        print(f"Could not load WhisperX model '{model_name}': {e}")
-# Preload all alignment models for supported languages
 print("Preloading all WhisperX alignment models...")
 for lang in ALIGN_LANGUAGES:
     try:
         print(f"Loading alignment model for language '{lang}'...")
-        device = "cuda"  # Load on CPU initially, will move to GPU when needed
         align_model, align_metadata = whisperx.load_align_model(
             language_code=lang,
@@ -405,7 +383,7 @@ for lang in ALIGN_LANGUAGES:
     except Exception as e:
         print(f"Could not load alignment model for '{lang}': {e}")
-# Create global diarization pipeline
 try:
     print("Loading diarization model...")
     torch.backends.cuda.matmul.allow_tf32 = True
@@ -424,25 +402,61 @@ except Exception as e:
     print(f"Could not load diarization model: {e}")
     _diarizer = None
 @spaces.GPU   # GPU is guaranteed to exist *inside* this function
 def _load_models(model_name: str = DEFAULT_MODEL):
     global _whipser_x_transcribe_models, _whipser_x_align_models, _diarizer
-    if model_name not in _whipser_x_transcribe_models:
-        raise ValueError(f"Model '{model_name}' not preloaded. Available models: {list(_whipser_x_transcribe_models.keys())}")
-    whisper_model = _whipser_x_transcribe_models[model_name]
-    # Move model to GPU if not already
-    if hasattr(whisper_model, 'model') and hasattr(whisper_model.model, 'device'):
-        current_device = str(whisper_model.model.device)
-        if 'cpu' in current_device:
-            print(f"Moving WhisperX model '{model_name}' to GPU...")
-            whisper_model = whisper_model.to("cuda")
-            _whipser_x_transcribe_models[model_name] = whisper_model
     return whisper_model, _diarizer
 # -----------------------------------------------------------------------------
 class WhisperTranscriber:
     def __init__(self):
@@ -498,12 +512,6 @@ class WhisperTranscriber:
             print(f"Performing alignment for language '{detected_language}'...")
             align_info = _whipser_x_align_models[detected_language]
-            # Move alignment model to GPU if needed
-            align_model = align_info["model"]
-            if hasattr(align_model, 'to'):
-                align_model = align_model.to("cuda")
-                _whipser_x_align_models[detected_language]["model"] = align_model
             result = whisperx.align(
                 result["segments"],
                 align_info["model"],
@@ -1501,6 +1509,9 @@ with demo:
     - Languages: Supports 100+ languages with auto-detection
     - Vocabulary: Add names and technical terms in the prompt for better accuracy
     """)
 if __name__ == "__main__":
     demo.launch(debug=True)

 _diarizer = None
 _embedder = None
+# Preload alignment and diarization models at startup (no GPU decorator needed)
 print("Preloading all WhisperX alignment models...")
 for lang in ALIGN_LANGUAGES:
     try:
         print(f"Loading alignment model for language '{lang}'...")
+        device = "cuda"
         align_model, align_metadata = whisperx.load_align_model(
             language_code=lang,
     except Exception as e:
         print(f"Could not load alignment model for '{lang}': {e}")
+# Create global diarization pipeline at startup
 try:
     print("Loading diarization model...")
     torch.backends.cuda.matmul.allow_tf32 = True
     print(f"Could not load diarization model: {e}")
     _diarizer = None
+print("Alignment and diarization models preloaded successfully!")
+@spaces.GPU   # GPU is guaranteed to exist *inside* this function
+def _load_whisper_model(model_name: str):
+    """Load a specific WhisperX transcribe model on GPU (lazy loading)"""
+    global _whipser_x_transcribe_models
+    if model_name in _whipser_x_transcribe_models:
+        print(f"WhisperX model '{model_name}' already loaded")
+        return _whipser_x_transcribe_models[model_name]
+    if model_name not in MODELS:
+        raise ValueError(f"Model '{model_name}' not found in MODELS registry. Available: {list(MODELS.keys())}")
+    print(f"Loading WhisperX model '{model_name}' on GPU...")
+    whisperx_model_name = MODELS[model_name]["whisperx_name"]
+    device = "cuda"
+    compute_type = "float16"
+    try:
+        model = whisperx.load_model(
+            whisperx_model_name,
+            device=device,
+            compute_type=compute_type,
+            download_root=CACHE_ROOT
+        )
+        _whipser_x_transcribe_models[model_name] = model
+        print(f"WhisperX model '{model_name}' loaded successfully")
+        return model
+    except Exception as e:
+        import traceback
+        traceback.print_exc()
+        raise RuntimeError(f"Could not load WhisperX model '{model_name}': {e}")
 @spaces.GPU   # GPU is guaranteed to exist *inside* this function
 def _load_models(model_name: str = DEFAULT_MODEL):
     global _whipser_x_transcribe_models, _whipser_x_align_models, _diarizer
+    # Load the specific whisper model (lazy loading)
+    whisper_model = _load_whisper_model(model_name)
     return whisper_model, _diarizer
+# Optional: Preload all whisper models explicitly
+@spaces.GPU
+def preload_all_whisper_models():
+    """Preload all WhisperX transcribe models - optional, for faster first-time use"""
+    print("Preloading all WhisperX transcribe models...")
+    for model_name in MODELS.keys():
+        try:
+            _load_whisper_model(model_name)
+        except Exception as e:
+            print(f"Failed to preload model '{model_name}': {e}")
+    print("All WhisperX transcribe models preloaded!")
 # -----------------------------------------------------------------------------
 class WhisperTranscriber:
     def __init__(self):
             print(f"Performing alignment for language '{detected_language}'...")
             align_info = _whipser_x_align_models[detected_language]
             result = whisperx.align(
                 result["segments"],
                 align_info["model"],
     - Languages: Supports 100+ languages with auto-detection
     - Vocabulary: Add names and technical terms in the prompt for better accuracy
     """)
+    # Preload all whisper models on startup
+    demo.load(fn=preload_all_whisper_models, inputs=None, outputs=None)
 if __name__ == "__main__":
     demo.launch(debug=True)