Spaces:
Runtime error
Runtime error
liuyang
commited on
Commit
·
ae73284
1
Parent(s):
d36869b
modify preload logic
Browse files
app.py
CHANGED
|
@@ -506,11 +506,26 @@ class WhisperTranscriber:
|
|
| 506 |
"""Transcribe the entire audio file using WhisperX with alignment"""
|
| 507 |
global _whipser_x_transcribe_models, _whipser_x_align_models
|
| 508 |
|
| 509 |
-
#
|
| 510 |
if model_name not in _whipser_x_transcribe_models:
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 514 |
|
| 515 |
print(f"Transcribing full audio with WhisperX model '{model_name}' and batch size {batch_size}...")
|
| 516 |
start_time = time.time()
|
|
@@ -1538,14 +1553,9 @@ with demo:
|
|
| 1538 |
- Vocabulary: Add names and technical terms in the prompt for better accuracy
|
| 1539 |
""")
|
| 1540 |
|
| 1541 |
-
#
|
| 1542 |
-
|
| 1543 |
-
|
| 1544 |
-
_preload_whisperx_transcribe_models()
|
| 1545 |
-
print("All WhisperX transcribe models preloaded at startup!")
|
| 1546 |
-
except Exception as e:
|
| 1547 |
-
print(f"Warning: Could not preload WhisperX transcribe models at startup: {e}")
|
| 1548 |
-
print("Models will be loaded on first use instead.")
|
| 1549 |
|
| 1550 |
if __name__ == "__main__":
|
| 1551 |
demo.launch(debug=True)
|
|
|
|
| 506 |
"""Transcribe the entire audio file using WhisperX with alignment"""
|
| 507 |
global _whipser_x_transcribe_models, _whipser_x_align_models
|
| 508 |
|
| 509 |
+
# Load whisperX model lazily on first use (within GPU context)
|
| 510 |
if model_name not in _whipser_x_transcribe_models:
|
| 511 |
+
print(f"Loading WhisperX transcribe model '{model_name}' on GPU...")
|
| 512 |
+
if model_name not in MODELS:
|
| 513 |
+
raise ValueError(f"Model '{model_name}' not found in MODELS registry. Available: {list(MODELS.keys())}")
|
| 514 |
+
|
| 515 |
+
whisperx_model_name = MODELS[model_name]["whisperx_name"]
|
| 516 |
+
device = "cuda"
|
| 517 |
+
compute_type = "float16"
|
| 518 |
+
|
| 519 |
+
whisper_model = whisperx.load_model(
|
| 520 |
+
whisperx_model_name,
|
| 521 |
+
device=device,
|
| 522 |
+
compute_type=compute_type,
|
| 523 |
+
download_root=CACHE_ROOT
|
| 524 |
+
)
|
| 525 |
+
_whipser_x_transcribe_models[model_name] = whisper_model
|
| 526 |
+
print(f"WhisperX transcribe model '{model_name}' loaded successfully")
|
| 527 |
+
else:
|
| 528 |
+
whisper_model = _whipser_x_transcribe_models[model_name]
|
| 529 |
|
| 530 |
print(f"Transcribing full audio with WhisperX model '{model_name}' and batch size {batch_size}...")
|
| 531 |
start_time = time.time()
|
|
|
|
| 1553 |
- Vocabulary: Add names and technical terms in the prompt for better accuracy
|
| 1554 |
""")
|
| 1555 |
|
| 1556 |
+
# Note: WhisperX transcribe models are loaded lazily on first use within GPU context
|
| 1557 |
+
# This is because @spaces.GPU creates separate contexts, so preloading at startup won't work
|
| 1558 |
+
print("WhisperX transcribe models will be loaded on first use (lazy loading)...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1559 |
|
| 1560 |
if __name__ == "__main__":
|
| 1561 |
demo.launch(debug=True)
|