Spaces:

nsfwalex
/

whisper-transcribe-new

Runtime error

App Files Files Community

liuyang commited on Oct 10

Commit

ae73284

1 Parent(s): d36869b

modify preload logic

Browse files

Files changed (1) hide show

app.py +22 -12

app.py CHANGED Viewed

@@ -506,11 +506,26 @@ class WhisperTranscriber:
         """Transcribe the entire audio file using WhisperX with alignment"""
         global _whipser_x_transcribe_models, _whipser_x_align_models
-        # Get preloaded whisperX model
         if model_name not in _whipser_x_transcribe_models:
-            raise ValueError(f"WhisperX model '{model_name}' not preloaded. Available models: {list(_whipser_x_transcribe_models.keys())}")
-        whisper_model = _whipser_x_transcribe_models[model_name]
         print(f"Transcribing full audio with WhisperX model '{model_name}' and batch size {batch_size}...")
         start_time = time.time()
@@ -1538,14 +1553,9 @@ with demo:
     - Vocabulary: Add names and technical terms in the prompt for better accuracy
     """)
-# Preload all WhisperX transcribe models once at service initialization
-print("Preloading all WhisperX transcribe models at startup...")
-try:
-    _preload_whisperx_transcribe_models()
-    print("All WhisperX transcribe models preloaded at startup!")
-except Exception as e:
-    print(f"Warning: Could not preload WhisperX transcribe models at startup: {e}")
-    print("Models will be loaded on first use instead.")
 if __name__ == "__main__":
     demo.launch(debug=True)

         """Transcribe the entire audio file using WhisperX with alignment"""
         global _whipser_x_transcribe_models, _whipser_x_align_models
+        # Load whisperX model lazily on first use (within GPU context)
         if model_name not in _whipser_x_transcribe_models:
+            print(f"Loading WhisperX transcribe model '{model_name}' on GPU...")
+            if model_name not in MODELS:
+                raise ValueError(f"Model '{model_name}' not found in MODELS registry. Available: {list(MODELS.keys())}")
+            whisperx_model_name = MODELS[model_name]["whisperx_name"]
+            device = "cuda"
+            compute_type = "float16"
+            whisper_model = whisperx.load_model(
+                whisperx_model_name,
+                device=device,
+                compute_type=compute_type,
+                download_root=CACHE_ROOT
+            )
+            _whipser_x_transcribe_models[model_name] = whisper_model
+            print(f"WhisperX transcribe model '{model_name}' loaded successfully")
+        else:
+            whisper_model = _whipser_x_transcribe_models[model_name]
         print(f"Transcribing full audio with WhisperX model '{model_name}' and batch size {batch_size}...")
         start_time = time.time()
     - Vocabulary: Add names and technical terms in the prompt for better accuracy
     """)
+# Note: WhisperX transcribe models are loaded lazily on first use within GPU context
+# This is because @spaces.GPU creates separate contexts, so preloading at startup won't work
+print("WhisperX transcribe models will be loaded on first use (lazy loading)...")
 if __name__ == "__main__":
     demo.launch(debug=True)