Spaces:

snikhilesh
/

medical-report-analyzer

Sleeping

App Files Files Community

snikhilesh commited on Oct 28, 2025

Commit

6340326

verified ·

1 Parent(s): 060930d

Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

Dockerfile +13 -10
backend/model_loader.py +13 -30
backend/requirements.txt +7 -9

Dockerfile CHANGED Viewed

@@ -1,29 +1,32 @@
-FROM python:3.10
 WORKDIR /app
-# Install system dependencies for PDF processing
-RUN apt-get update && apt-get install -y \
     tesseract-ocr \
     tesseract-ocr-eng \
     poppler-utils \
     git \
     && rm -rf /var/lib/apt/lists/*
 # Copy requirements and install Python packages
 COPY backend/requirements.txt requirements.txt
-RUN pip install --no-cache-dir -r requirements.txt
 # Copy application code
 COPY backend/ .
-# Set environment variables
-ENV TRANSFORMERS_CACHE=/app/.cache/huggingface
-ENV HF_HOME=/app/.cache/huggingface
-ENV PYTHONUNBUFFERED=1
-# Create cache directory
-RUN mkdir -p /app/.cache/huggingface
 EXPOSE 7860

+FROM python:3.10-slim
 WORKDIR /app
+# Install system dependencies for PDF processing and ML
+RUN apt-get update && apt-get install -y --no-install-recommends \
     tesseract-ocr \
     tesseract-ocr-eng \
     poppler-utils \
+    libgomp1 \
     git \
     && rm -rf /var/lib/apt/lists/*
 # Copy requirements and install Python packages
 COPY backend/requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r requirements.txt
 # Copy application code
 COPY backend/ .
+# Set environment variables for HuggingFace and performance
+ENV TRANSFORMERS_CACHE=/app/.cache/huggingface \
+    HF_HOME=/app/.cache/huggingface \
+    PYTHONUNBUFFERED=1 \
+    TOKENIZERS_PARALLELISM=false
+# Create cache directory with proper permissions
+RUN mkdir -p /app/.cache/huggingface && chmod -R 777 /app/.cache
 EXPOSE 7860

backend/model_loader.py CHANGED Viewed

@@ -6,25 +6,18 @@ Manages model loading, caching, and inference
 import os
 import logging
 from typing import Dict, Any, Optional, List
-# Lazy imports for ML libraries
-try:
-    import torch
-    from transformers import (
-        AutoTokenizer,
-        AutoModel,
-        AutoModelForSequenceClassification,
-        AutoModelForTokenClassification,
-        pipeline
-    )
-    TRANSFORMERS_AVAILABLE = True
-except ImportError:
-    TRANSFORMERS_AVAILABLE = False
-    logger = logging.getLogger(__name__)
-    logger.warning("Transformers not available - AI models will not load")
 from functools import lru_cache
 logger = logging.getLogger(__name__)
 # Get HF token from environment
@@ -38,13 +31,7 @@ class ModelLoader:
     """
     def __init__(self):
-        if not TRANSFORMERS_AVAILABLE:
-            logger.warning("Transformers library not available - using fallback mode")
-            self.device = "cpu"
-            self.loaded_models = {}
-            self.model_configs = {}
-            return
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.loaded_models = {}
         self.model_configs = self._get_model_configs()
@@ -117,10 +104,6 @@ class ModelLoader:
         """
         Load a model by key, with caching
         """
-        if not TRANSFORMERS_AVAILABLE:
-            logger.warning(f"Cannot load model {model_key} - transformers not available")
-            return None
         try:
             # Check if already loaded
             if model_key in self.loaded_models:
@@ -266,8 +249,8 @@ class ModelLoader:
             self.loaded_models.clear()
             logger.info("Cleared all model caches")
-        # Force garbage collection
-        if TRANSFORMERS_AVAILABLE and torch.cuda.is_available():
             torch.cuda.empty_cache()

 import os
 import logging
 from typing import Dict, Any, Optional, List
 from functools import lru_cache
+# Required ML libraries - these MUST be installed
+import torch
+from transformers import (
+    AutoTokenizer,
+    AutoModel,
+    AutoModelForSequenceClassification,
+    AutoModelForTokenClassification,
+    pipeline
+)
 logger = logging.getLogger(__name__)
 # Get HF token from environment
     """
     def __init__(self):
+        """Initialize the model loader with GPU support if available"""
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.loaded_models = {}
         self.model_configs = self._get_model_configs()
         """
         Load a model by key, with caching
         """
         try:
             # Check if already loaded
             if model_key in self.loaded_models:
             self.loaded_models.clear()
             logger.info("Cleared all model caches")
+        # Force garbage collection and clear GPU cache if available
+        if torch.cuda.is_available():
             torch.cuda.empty_cache()

backend/requirements.txt CHANGED Viewed

@@ -10,20 +10,18 @@ Pillow==10.2.0
 pytesseract==0.3.10
 PyMuPDF==1.23.8
-# Machine Learning - HuggingFace Models
-torch==2.2.0
-torchvision==0.17.0
-transformers==4.38.1
-accelerate==0.27.2
 sentencepiece==0.2.0
-protobuf==4.25.3
-safetensors==0.4.2
-huggingface-hub==0.21.4
 # Data Processing
 numpy==1.26.4
 pandas==2.2.0
-scikit-learn==1.4.1
 # Utilities
 requests==2.31.0

 pytesseract==0.3.10
 PyMuPDF==1.23.8
+# Machine Learning - HuggingFace Models (optimized for Docker)
+torch==2.4.0
+transformers==4.45.0
+accelerate==0.34.0
 sentencepiece==0.2.0
+safetensors==0.4.5
+huggingface-hub==0.25.0
+scipy==1.14.1
 # Data Processing
 numpy==1.26.4
 pandas==2.2.0
 # Utilities
 requests==2.31.0