FROM python:3.11-slim WORKDIR /app # Install system dependencies for PyMuPDF and curl for healthcheck RUN apt-get update && \ apt-get install -y --no-install-recommends gcc g++ curl && \ rm -rf /var/lib/apt/lists/* COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt COPY . . # Create a non-root user for security and assign ownership RUN useradd -m -u 1000 appuser && \ chown -R appuser:appuser /app USER appuser # Pre-download default models at build time to eliminate cold-start latency # Downloaded as appuser so the cache is located in /home/appuser/.cache/huggingface RUN python -c "import os; os.environ['HF_HUB_DISABLE_SYMLINKS_WARNING']='1'; from sentence_transformers import SentenceTransformer; SentenceTransformer('BAAI/bge-m3')" RUN python -c "from transformers import AutoTokenizer, AutoModelForSequenceClassification; AutoTokenizer.from_pretrained('cross-encoder/nli-deberta-v3-base'); AutoModelForSequenceClassification.from_pretrained('cross-encoder/nli-deberta-v3-base')" EXPOSE 7860 HEALTHCHECK --interval=30s --timeout=10s --retries=3 \ CMD curl -f http://localhost:7860/_stcore/health || exit 1 CMD ["streamlit", "run", "app.py", \ "--server.port=7860", \ "--server.address=0.0.0.0", \ "--server.headless=true", \ "--browser.gatherUsageStats=false", \ "--server.enableCORS=false", \ "--server.enableXsrfProtection=false"]