Prathamesh Sarjerao Vaidya commited on
Commit
65f46e8
·
1 Parent(s): 938d58f

made changes

Browse files
Files changed (4) hide show
  1. Dockerfile +9 -6
  2. model_preloader.py +61 -8
  3. requirements.txt +1 -1
  4. startup.py +12 -0
Dockerfile CHANGED
@@ -64,18 +64,21 @@ ENV PYTHONPATH=/app \
64
  HUGGINGFACE_HUB_CACHE=/app/model_cache \
65
  HF_HUB_CACHE=/app/model_cache \
66
  FONTCONFIG_PATH=/tmp/fontconfig \
67
- # Fix for ONNX Runtime in containers (KEY FIX)
68
  ORT_DYLIB_DEFAULT_OPTIONS=DisableExecutablePageAllocator=1 \
69
  ONNXRUNTIME_EXECUTION_PROVIDERS=CPUExecutionProvider \
70
- # Fix for audio processing libraries
71
  CTRANSLATE2_FORCE_CPU_ISA=generic \
72
- # Disable problematic features
73
  TF_CPP_MIN_LOG_LEVEL=2 \
74
  TOKENIZERS_PARALLELISM=false \
75
- # Disable problematic optimizations
76
  OMP_NUM_THREADS=1 \
77
- # Suppress tensorboard warnings
78
- TF_ENABLE_ONEDNN_OPTS=0
 
 
 
 
79
 
80
  # Expose port for Hugging Face Spaces
81
  EXPOSE 7860
 
64
  HUGGINGFACE_HUB_CACHE=/app/model_cache \
65
  HF_HUB_CACHE=/app/model_cache \
66
  FONTCONFIG_PATH=/tmp/fontconfig \
67
+ # Critical ONNX Runtime fixes for containers
68
  ORT_DYLIB_DEFAULT_OPTIONS=DisableExecutablePageAllocator=1 \
69
  ONNXRUNTIME_EXECUTION_PROVIDERS=CPUExecutionProvider \
70
+ ORT_DISABLE_TLS_ARENA=1 \
71
  CTRANSLATE2_FORCE_CPU_ISA=generic \
72
+ # Threading and memory optimizations
73
  TF_CPP_MIN_LOG_LEVEL=2 \
74
  TOKENIZERS_PARALLELISM=false \
 
75
  OMP_NUM_THREADS=1 \
76
+ MKL_NUM_THREADS=1 \
77
+ NUMBA_NUM_THREADS=1 \
78
+ TF_ENABLE_ONEDNN_OPTS=0 \
79
+ # Additional security for containers
80
+ MALLOC_ARENA_MAX=2 \
81
+ PYTHONUNBUFFERED=1
82
 
83
  # Expose port for Hugging Face Spaces
84
  EXPOSE 7860
model_preloader.py CHANGED
@@ -29,6 +29,32 @@ from rich.panel import Panel
29
  from rich.text import Text
30
  import psutil
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  # Add src directory to path
33
  sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
34
 
@@ -372,7 +398,7 @@ class ModelPreloader:
372
  logger.warning(f"Error saving cache for {model_key}: {e}")
373
 
374
  def load_pyannote_pipeline(self, task_id: str) -> Optional[Pipeline]:
375
- """Load pyannote speaker diarization pipeline."""
376
  try:
377
  console.print(f"[yellow]Loading pyannote.audio pipeline...[/yellow]")
378
 
@@ -381,21 +407,48 @@ class ModelPreloader:
381
  if not hf_token:
382
  console.print("[red]Warning: HUGGINGFACE_TOKEN not found. Some models may not be accessible.[/red]")
383
 
384
- pipeline = Pipeline.from_pretrained(
385
- "pyannote/speaker-diarization-3.1",
386
- use_auth_token=hf_token
387
- )
 
 
 
 
 
 
 
 
 
 
 
 
388
 
389
- # Test the pipeline
390
  console.print(f"[green]SUCCESS: pyannote.audio pipeline loaded successfully on {self.device}[/green]")
391
-
392
  return pipeline
393
 
394
  except Exception as e:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
395
  console.print(f"[red]ERROR: Failed to load pyannote.audio pipeline: {e}[/red]")
396
  logger.error(f"Pyannote loading failed: {e}")
397
  return None
398
-
399
  def load_whisper_model(self, task_id: str) -> Optional[whisper.Whisper]:
400
  """Load Whisper speech recognition model with enhanced cache checking."""
401
  try:
 
29
  from rich.text import Text
30
  import psutil
31
 
32
+ # CRITICAL: Configure ONNX Runtime BEFORE any ML library imports
33
+ import os
34
+ os.environ.update({
35
+ 'ORT_DYLIB_DEFAULT_OPTIONS': 'DisableExecutablePageAllocator=1',
36
+ 'ONNXRUNTIME_EXECUTION_PROVIDERS': 'CPUExecutionProvider',
37
+ 'ORT_DISABLE_TLS_ARENA': '1',
38
+ 'OMP_NUM_THREADS': '1',
39
+ 'MKL_NUM_THREADS': '1',
40
+ 'NUMBA_NUM_THREADS': '1',
41
+ 'TF_ENABLE_ONEDNN_OPTS': '0',
42
+ 'TOKENIZERS_PARALLELISM': 'false',
43
+ 'MALLOC_ARENA_MAX': '2'
44
+ })
45
+
46
+ # Import ONNX Runtime with error suppression
47
+ try:
48
+ import warnings
49
+ warnings.filterwarnings("ignore", category=UserWarning, module="onnxruntime")
50
+ import onnxruntime as ort
51
+ # Force CPU provider only
52
+ ort.set_default_logger_severity(3) # ERROR level only
53
+ except ImportError:
54
+ pass
55
+ except Exception as e:
56
+ print(f"ONNX Runtime warning (expected in containers): {e}")
57
+
58
  # Add src directory to path
59
  sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
60
 
 
398
  logger.warning(f"Error saving cache for {model_key}: {e}")
399
 
400
  def load_pyannote_pipeline(self, task_id: str) -> Optional[Pipeline]:
401
+ """Load pyannote speaker diarization pipeline with container-safe settings."""
402
  try:
403
  console.print(f"[yellow]Loading pyannote.audio pipeline...[/yellow]")
404
 
 
407
  if not hf_token:
408
  console.print("[red]Warning: HUGGINGFACE_TOKEN not found. Some models may not be accessible.[/red]")
409
 
410
+ # Container-safe pipeline loading with error suppression
411
+ with warnings.catch_warnings():
412
+ warnings.filterwarnings("ignore", category=UserWarning)
413
+ warnings.filterwarnings("ignore", message=".*executable stack.*")
414
+
415
+ pipeline = Pipeline.from_pretrained(
416
+ "pyannote/speaker-diarization-3.1",
417
+ use_auth_token=hf_token,
418
+ cache_dir=str(self.cache_dir / "pyannote")
419
+ )
420
+
421
+ # Force CPU execution
422
+ if hasattr(pipeline, '_models'):
423
+ for model_name, model in pipeline._models.items():
424
+ if hasattr(model, 'to'):
425
+ model.to('cpu')
426
 
 
427
  console.print(f"[green]SUCCESS: pyannote.audio pipeline loaded successfully on {self.device}[/green]")
 
428
  return pipeline
429
 
430
  except Exception as e:
431
+ # Check if it's the expected ONNX Runtime warning
432
+ if "executable stack" in str(e).lower():
433
+ console.print("[yellow]ONNX Runtime executable stack warning (expected in containers) - continuing...[/yellow]")
434
+ # Try alternative loading method
435
+ try:
436
+ import warnings
437
+ with warnings.catch_warnings():
438
+ warnings.simplefilter("ignore")
439
+ pipeline = Pipeline.from_pretrained(
440
+ "pyannote/speaker-diarization-3.1",
441
+ use_auth_token=hf_token,
442
+ cache_dir=str(self.cache_dir / "pyannote")
443
+ )
444
+ return pipeline
445
+ except:
446
+ pass
447
+
448
  console.print(f"[red]ERROR: Failed to load pyannote.audio pipeline: {e}[/red]")
449
  logger.error(f"Pyannote loading failed: {e}")
450
  return None
451
+
452
  def load_whisper_model(self, task_id: str) -> Optional[whisper.Whisper]:
453
  """Load Whisper speech recognition model with enhanced cache checking."""
454
  try:
requirements.txt CHANGED
@@ -4,7 +4,7 @@ torchaudio==2.0.2
4
  torchvision==0.15.2
5
 
6
  # Keep regular ONNX Runtime with container-safe environment variables
7
- onnxruntime==1.16.3
8
 
9
  # Audio processing
10
  pyannote.audio==3.1.1
 
4
  torchvision==0.15.2
5
 
6
  # Keep regular ONNX Runtime with container-safe environment variables
7
+ onnxruntime==1.15.1
8
 
9
  # Audio processing
10
  pyannote.audio==3.1.1
startup.py CHANGED
@@ -4,11 +4,23 @@ Startup script for Hugging Face Spaces deployment.
4
  Handles model preloading and graceful fallbacks for containerized environments.
5
  """
6
 
 
 
 
 
 
7
  import os
8
  import subprocess
9
  import sys
10
  import logging
11
 
 
 
 
 
 
 
 
12
  # Configure logging
13
  logging.basicConfig(
14
  level=logging.INFO,
 
4
  Handles model preloading and graceful fallbacks for containerized environments.
5
  """
6
 
7
+ # Suppress ONNX Runtime warnings BEFORE any imports
8
+ import warnings
9
+ warnings.filterwarnings("ignore", message=".*executable stack.*")
10
+ warnings.filterwarnings("ignore", category=UserWarning, module="onnxruntime")
11
+
12
  import os
13
  import subprocess
14
  import sys
15
  import logging
16
 
17
+ # Set critical environment variables immediately
18
+ os.environ.update({
19
+ 'ORT_DYLIB_DEFAULT_OPTIONS': 'DisableExecutablePageAllocator=1',
20
+ 'ONNXRUNTIME_EXECUTION_PROVIDERS': 'CPUExecutionProvider',
21
+ 'ORT_DISABLE_TLS_ARENA': '1'
22
+ })
23
+
24
  # Configure logging
25
  logging.basicConfig(
26
  level=logging.INFO,