liuyang commited on
Commit
ae73284
·
1 Parent(s): d36869b

modify preload logic

Browse files
Files changed (1) hide show
  1. app.py +22 -12
app.py CHANGED
@@ -506,11 +506,26 @@ class WhisperTranscriber:
506
  """Transcribe the entire audio file using WhisperX with alignment"""
507
  global _whipser_x_transcribe_models, _whipser_x_align_models
508
 
509
- # Get preloaded whisperX model
510
  if model_name not in _whipser_x_transcribe_models:
511
- raise ValueError(f"WhisperX model '{model_name}' not preloaded. Available models: {list(_whipser_x_transcribe_models.keys())}")
512
-
513
- whisper_model = _whipser_x_transcribe_models[model_name]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
514
 
515
  print(f"Transcribing full audio with WhisperX model '{model_name}' and batch size {batch_size}...")
516
  start_time = time.time()
@@ -1538,14 +1553,9 @@ with demo:
1538
  - Vocabulary: Add names and technical terms in the prompt for better accuracy
1539
  """)
1540
 
1541
- # Preload all WhisperX transcribe models once at service initialization
1542
- print("Preloading all WhisperX transcribe models at startup...")
1543
- try:
1544
- _preload_whisperx_transcribe_models()
1545
- print("All WhisperX transcribe models preloaded at startup!")
1546
- except Exception as e:
1547
- print(f"Warning: Could not preload WhisperX transcribe models at startup: {e}")
1548
- print("Models will be loaded on first use instead.")
1549
 
1550
  if __name__ == "__main__":
1551
  demo.launch(debug=True)
 
506
  """Transcribe the entire audio file using WhisperX with alignment"""
507
  global _whipser_x_transcribe_models, _whipser_x_align_models
508
 
509
+ # Load whisperX model lazily on first use (within GPU context)
510
  if model_name not in _whipser_x_transcribe_models:
511
+ print(f"Loading WhisperX transcribe model '{model_name}' on GPU...")
512
+ if model_name not in MODELS:
513
+ raise ValueError(f"Model '{model_name}' not found in MODELS registry. Available: {list(MODELS.keys())}")
514
+
515
+ whisperx_model_name = MODELS[model_name]["whisperx_name"]
516
+ device = "cuda"
517
+ compute_type = "float16"
518
+
519
+ whisper_model = whisperx.load_model(
520
+ whisperx_model_name,
521
+ device=device,
522
+ compute_type=compute_type,
523
+ download_root=CACHE_ROOT
524
+ )
525
+ _whipser_x_transcribe_models[model_name] = whisper_model
526
+ print(f"WhisperX transcribe model '{model_name}' loaded successfully")
527
+ else:
528
+ whisper_model = _whipser_x_transcribe_models[model_name]
529
 
530
  print(f"Transcribing full audio with WhisperX model '{model_name}' and batch size {batch_size}...")
531
  start_time = time.time()
 
1553
  - Vocabulary: Add names and technical terms in the prompt for better accuracy
1554
  """)
1555
 
1556
+ # Note: WhisperX transcribe models are loaded lazily on first use within GPU context
1557
+ # This is because @spaces.GPU creates separate contexts, so preloading at startup won't work
1558
+ print("WhisperX transcribe models will be loaded on first use (lazy loading)...")
 
 
 
 
 
1559
 
1560
  if __name__ == "__main__":
1561
  demo.launch(debug=True)