Prathamesh Sarjerao Vaidya
commited on
Commit
·
65f46e8
1
Parent(s):
938d58f
made changes
Browse files- Dockerfile +9 -6
- model_preloader.py +61 -8
- requirements.txt +1 -1
- startup.py +12 -0
Dockerfile
CHANGED
|
@@ -64,18 +64,21 @@ ENV PYTHONPATH=/app \
|
|
| 64 |
HUGGINGFACE_HUB_CACHE=/app/model_cache \
|
| 65 |
HF_HUB_CACHE=/app/model_cache \
|
| 66 |
FONTCONFIG_PATH=/tmp/fontconfig \
|
| 67 |
-
#
|
| 68 |
ORT_DYLIB_DEFAULT_OPTIONS=DisableExecutablePageAllocator=1 \
|
| 69 |
ONNXRUNTIME_EXECUTION_PROVIDERS=CPUExecutionProvider \
|
| 70 |
-
|
| 71 |
CTRANSLATE2_FORCE_CPU_ISA=generic \
|
| 72 |
-
#
|
| 73 |
TF_CPP_MIN_LOG_LEVEL=2 \
|
| 74 |
TOKENIZERS_PARALLELISM=false \
|
| 75 |
-
# Disable problematic optimizations
|
| 76 |
OMP_NUM_THREADS=1 \
|
| 77 |
-
|
| 78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
|
| 80 |
# Expose port for Hugging Face Spaces
|
| 81 |
EXPOSE 7860
|
|
|
|
| 64 |
HUGGINGFACE_HUB_CACHE=/app/model_cache \
|
| 65 |
HF_HUB_CACHE=/app/model_cache \
|
| 66 |
FONTCONFIG_PATH=/tmp/fontconfig \
|
| 67 |
+
# Critical ONNX Runtime fixes for containers
|
| 68 |
ORT_DYLIB_DEFAULT_OPTIONS=DisableExecutablePageAllocator=1 \
|
| 69 |
ONNXRUNTIME_EXECUTION_PROVIDERS=CPUExecutionProvider \
|
| 70 |
+
ORT_DISABLE_TLS_ARENA=1 \
|
| 71 |
CTRANSLATE2_FORCE_CPU_ISA=generic \
|
| 72 |
+
# Threading and memory optimizations
|
| 73 |
TF_CPP_MIN_LOG_LEVEL=2 \
|
| 74 |
TOKENIZERS_PARALLELISM=false \
|
|
|
|
| 75 |
OMP_NUM_THREADS=1 \
|
| 76 |
+
MKL_NUM_THREADS=1 \
|
| 77 |
+
NUMBA_NUM_THREADS=1 \
|
| 78 |
+
TF_ENABLE_ONEDNN_OPTS=0 \
|
| 79 |
+
# Additional security for containers
|
| 80 |
+
MALLOC_ARENA_MAX=2 \
|
| 81 |
+
PYTHONUNBUFFERED=1
|
| 82 |
|
| 83 |
# Expose port for Hugging Face Spaces
|
| 84 |
EXPOSE 7860
|
model_preloader.py
CHANGED
|
@@ -29,6 +29,32 @@ from rich.panel import Panel
|
|
| 29 |
from rich.text import Text
|
| 30 |
import psutil
|
| 31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
# Add src directory to path
|
| 33 |
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
|
| 34 |
|
|
@@ -372,7 +398,7 @@ class ModelPreloader:
|
|
| 372 |
logger.warning(f"Error saving cache for {model_key}: {e}")
|
| 373 |
|
| 374 |
def load_pyannote_pipeline(self, task_id: str) -> Optional[Pipeline]:
|
| 375 |
-
"""Load pyannote speaker diarization pipeline."""
|
| 376 |
try:
|
| 377 |
console.print(f"[yellow]Loading pyannote.audio pipeline...[/yellow]")
|
| 378 |
|
|
@@ -381,21 +407,48 @@ class ModelPreloader:
|
|
| 381 |
if not hf_token:
|
| 382 |
console.print("[red]Warning: HUGGINGFACE_TOKEN not found. Some models may not be accessible.[/red]")
|
| 383 |
|
| 384 |
-
pipeline
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 388 |
|
| 389 |
-
# Test the pipeline
|
| 390 |
console.print(f"[green]SUCCESS: pyannote.audio pipeline loaded successfully on {self.device}[/green]")
|
| 391 |
-
|
| 392 |
return pipeline
|
| 393 |
|
| 394 |
except Exception as e:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 395 |
console.print(f"[red]ERROR: Failed to load pyannote.audio pipeline: {e}[/red]")
|
| 396 |
logger.error(f"Pyannote loading failed: {e}")
|
| 397 |
return None
|
| 398 |
-
|
| 399 |
def load_whisper_model(self, task_id: str) -> Optional[whisper.Whisper]:
|
| 400 |
"""Load Whisper speech recognition model with enhanced cache checking."""
|
| 401 |
try:
|
|
|
|
| 29 |
from rich.text import Text
|
| 30 |
import psutil
|
| 31 |
|
| 32 |
+
# CRITICAL: Configure ONNX Runtime BEFORE any ML library imports
|
| 33 |
+
import os
|
| 34 |
+
os.environ.update({
|
| 35 |
+
'ORT_DYLIB_DEFAULT_OPTIONS': 'DisableExecutablePageAllocator=1',
|
| 36 |
+
'ONNXRUNTIME_EXECUTION_PROVIDERS': 'CPUExecutionProvider',
|
| 37 |
+
'ORT_DISABLE_TLS_ARENA': '1',
|
| 38 |
+
'OMP_NUM_THREADS': '1',
|
| 39 |
+
'MKL_NUM_THREADS': '1',
|
| 40 |
+
'NUMBA_NUM_THREADS': '1',
|
| 41 |
+
'TF_ENABLE_ONEDNN_OPTS': '0',
|
| 42 |
+
'TOKENIZERS_PARALLELISM': 'false',
|
| 43 |
+
'MALLOC_ARENA_MAX': '2'
|
| 44 |
+
})
|
| 45 |
+
|
| 46 |
+
# Import ONNX Runtime with error suppression
|
| 47 |
+
try:
|
| 48 |
+
import warnings
|
| 49 |
+
warnings.filterwarnings("ignore", category=UserWarning, module="onnxruntime")
|
| 50 |
+
import onnxruntime as ort
|
| 51 |
+
# Force CPU provider only
|
| 52 |
+
ort.set_default_logger_severity(3) # ERROR level only
|
| 53 |
+
except ImportError:
|
| 54 |
+
pass
|
| 55 |
+
except Exception as e:
|
| 56 |
+
print(f"ONNX Runtime warning (expected in containers): {e}")
|
| 57 |
+
|
| 58 |
# Add src directory to path
|
| 59 |
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
|
| 60 |
|
|
|
|
| 398 |
logger.warning(f"Error saving cache for {model_key}: {e}")
|
| 399 |
|
| 400 |
def load_pyannote_pipeline(self, task_id: str) -> Optional[Pipeline]:
|
| 401 |
+
"""Load pyannote speaker diarization pipeline with container-safe settings."""
|
| 402 |
try:
|
| 403 |
console.print(f"[yellow]Loading pyannote.audio pipeline...[/yellow]")
|
| 404 |
|
|
|
|
| 407 |
if not hf_token:
|
| 408 |
console.print("[red]Warning: HUGGINGFACE_TOKEN not found. Some models may not be accessible.[/red]")
|
| 409 |
|
| 410 |
+
# Container-safe pipeline loading with error suppression
|
| 411 |
+
with warnings.catch_warnings():
|
| 412 |
+
warnings.filterwarnings("ignore", category=UserWarning)
|
| 413 |
+
warnings.filterwarnings("ignore", message=".*executable stack.*")
|
| 414 |
+
|
| 415 |
+
pipeline = Pipeline.from_pretrained(
|
| 416 |
+
"pyannote/speaker-diarization-3.1",
|
| 417 |
+
use_auth_token=hf_token,
|
| 418 |
+
cache_dir=str(self.cache_dir / "pyannote")
|
| 419 |
+
)
|
| 420 |
+
|
| 421 |
+
# Force CPU execution
|
| 422 |
+
if hasattr(pipeline, '_models'):
|
| 423 |
+
for model_name, model in pipeline._models.items():
|
| 424 |
+
if hasattr(model, 'to'):
|
| 425 |
+
model.to('cpu')
|
| 426 |
|
|
|
|
| 427 |
console.print(f"[green]SUCCESS: pyannote.audio pipeline loaded successfully on {self.device}[/green]")
|
|
|
|
| 428 |
return pipeline
|
| 429 |
|
| 430 |
except Exception as e:
|
| 431 |
+
# Check if it's the expected ONNX Runtime warning
|
| 432 |
+
if "executable stack" in str(e).lower():
|
| 433 |
+
console.print("[yellow]ONNX Runtime executable stack warning (expected in containers) - continuing...[/yellow]")
|
| 434 |
+
# Try alternative loading method
|
| 435 |
+
try:
|
| 436 |
+
import warnings
|
| 437 |
+
with warnings.catch_warnings():
|
| 438 |
+
warnings.simplefilter("ignore")
|
| 439 |
+
pipeline = Pipeline.from_pretrained(
|
| 440 |
+
"pyannote/speaker-diarization-3.1",
|
| 441 |
+
use_auth_token=hf_token,
|
| 442 |
+
cache_dir=str(self.cache_dir / "pyannote")
|
| 443 |
+
)
|
| 444 |
+
return pipeline
|
| 445 |
+
except:
|
| 446 |
+
pass
|
| 447 |
+
|
| 448 |
console.print(f"[red]ERROR: Failed to load pyannote.audio pipeline: {e}[/red]")
|
| 449 |
logger.error(f"Pyannote loading failed: {e}")
|
| 450 |
return None
|
| 451 |
+
|
| 452 |
def load_whisper_model(self, task_id: str) -> Optional[whisper.Whisper]:
|
| 453 |
"""Load Whisper speech recognition model with enhanced cache checking."""
|
| 454 |
try:
|
requirements.txt
CHANGED
|
@@ -4,7 +4,7 @@ torchaudio==2.0.2
|
|
| 4 |
torchvision==0.15.2
|
| 5 |
|
| 6 |
# Keep regular ONNX Runtime with container-safe environment variables
|
| 7 |
-
onnxruntime==1.
|
| 8 |
|
| 9 |
# Audio processing
|
| 10 |
pyannote.audio==3.1.1
|
|
|
|
| 4 |
torchvision==0.15.2
|
| 5 |
|
| 6 |
# Keep regular ONNX Runtime with container-safe environment variables
|
| 7 |
+
onnxruntime==1.15.1
|
| 8 |
|
| 9 |
# Audio processing
|
| 10 |
pyannote.audio==3.1.1
|
startup.py
CHANGED
|
@@ -4,11 +4,23 @@ Startup script for Hugging Face Spaces deployment.
|
|
| 4 |
Handles model preloading and graceful fallbacks for containerized environments.
|
| 5 |
"""
|
| 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
import os
|
| 8 |
import subprocess
|
| 9 |
import sys
|
| 10 |
import logging
|
| 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
# Configure logging
|
| 13 |
logging.basicConfig(
|
| 14 |
level=logging.INFO,
|
|
|
|
| 4 |
Handles model preloading and graceful fallbacks for containerized environments.
|
| 5 |
"""
|
| 6 |
|
| 7 |
+
# Suppress ONNX Runtime warnings BEFORE any imports
|
| 8 |
+
import warnings
|
| 9 |
+
warnings.filterwarnings("ignore", message=".*executable stack.*")
|
| 10 |
+
warnings.filterwarnings("ignore", category=UserWarning, module="onnxruntime")
|
| 11 |
+
|
| 12 |
import os
|
| 13 |
import subprocess
|
| 14 |
import sys
|
| 15 |
import logging
|
| 16 |
|
| 17 |
+
# Set critical environment variables immediately
|
| 18 |
+
os.environ.update({
|
| 19 |
+
'ORT_DYLIB_DEFAULT_OPTIONS': 'DisableExecutablePageAllocator=1',
|
| 20 |
+
'ONNXRUNTIME_EXECUTION_PROVIDERS': 'CPUExecutionProvider',
|
| 21 |
+
'ORT_DISABLE_TLS_ARENA': '1'
|
| 22 |
+
})
|
| 23 |
+
|
| 24 |
# Configure logging
|
| 25 |
logging.basicConfig(
|
| 26 |
level=logging.INFO,
|