# Dockerfile for Hugging Face Spaces
FROM python:3.11-slim

# Set working directory
WORKDIR /app

# Set environment variables
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1
ENV HF_HOME=/app/.cache/huggingface

# Install system dependencies for PyMuPDF and other libs
RUN apt-get update && apt-get install -y \
    libgl1 \
    libglib2.0-0 \
    libsm6 \
    libxext6 \
    libxrender-dev \
    curl \
    && rm -rf /var/lib/apt/lists/*

# Copy requirements first (Docker layer caching optimization)
COPY requirements.txt .

# Install Python dependencies
RUN pip install --no-cache-dir --upgrade pip && \
    pip install --no-cache-dir -r requirements.txt

# Download NLTK data during build
COPY download_nltk.py .
RUN python download_nltk.py

# Copy application code
COPY . .

# Create necessary directories with write permissions
# HF Spaces only allows writes to certain directories
RUN mkdir -p /app/uploads /app/user_data /app/vector_store /app/instance /app/.cache
RUN chmod -R 777 /app/uploads /app/user_data /app/vector_store /app/instance /app/.cache

# Expose port 7860 (required by Hugging Face Spaces)
EXPOSE 7860

# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \
    CMD curl -f http://localhost:7860/health || exit 1

# Start with gunicorn
# - Single worker to conserve memory for ML models
# - 120s timeout to allow model loading on first request
# - Preload disabled to allow lazy loading
CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--timeout", "120", "--workers", "1", "--threads", "2", "app:app"]