FROM python:3.10-slim

WORKDIR /app

# Install system dependencies
RUN apt-get update && apt-get install -y \
    build-essential \
    curl \
    tesseract-ocr \
    tesseract-ocr-eng \
    poppler-utils \
    && rm -rf /var/lib/apt/lists/*

# Copy requirements and install Python dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

# Pre-download the sentence-transformers embedding model during build
# so it's cached in the image and doesn't need network at runtime
ENV HF_HOME=/app/.cache/huggingface
RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')"

# Copy application code
COPY . .

# Create data directories and user for HuggingFace Spaces (runs as UUID 1000)
# PyTorch requires a named user in /etc/passwd, otherwise getpass.getuser() throws a KeyError
RUN useradd -m -u 1000 appuser && \
    mkdir -p data chroma_db logs && \
    chown -R appuser:appuser /app

# Specify user for HF Spaces
USER appuser

# Expose ports for backend (8000) and frontend (7860 = HF Spaces default)
EXPOSE 7860 8000

# Set environment to use Hugging Face
ENV LLM_PROVIDER=huggingface

# Fix: TRANSFORMERS_OFFLINE MUST be 1 otherwise the embedder tries to write to the cache dir (which is owned by root)
# and throws a PermissionError, crashing Uvicorn at startup.
ENV TRANSFORMERS_OFFLINE=1

# Fix: HF_HUB_OFFLINE MUST be 0 otherwise the Inference APIs (for generating lessons) are blocked from making network requests.
ENV HF_HUB_OFFLINE=0

# Enable Supabase for persistent storage
ENV USE_SUPABASE=true

# Create production startup script with health checks
RUN echo '#!/bin/bash\n\
    set -e\n\
    \n\
    # OVERRIDE ANY SPACE SETTINGS TO ENSURE INFERENCE API WORKS\n\
    export HF_HUB_OFFLINE=0\n\
    export TRANSFORMERS_OFFLINE=1\n\
    \n\
    echo "===== Application Startup at $(date) =====" \n\
    echo "=== FocusFlow Startup ===" \n\
    \n\
    # Wait for DNS/networking to be ready (HF Spaces can be slow)\n\
    echo "Waiting for network readiness..." \n\
    sleep 3\n\
    \n\
    echo "Starting backend on port 8000..." \n\
    \n\
    # Start FastAPI backend\n\
    uvicorn backend.main:app --host 0.0.0.0 --port 8000 > logs/backend.log 2>&1 &\n\
    BACKEND_PID=$!\n\
    echo "Backend started with PID $BACKEND_PID" \n\
    \n\
    # Wait for backend to be healthy (max 90 seconds)\n\
    echo "Waiting for backend health check..." \n\
    for i in {1..90}; do\n\
    if curl -sf http://localhost:8000/health > /dev/null 2>&1; then\n\
    echo "✅ Backend is healthy!" \n\
    break\n\
    fi\n\
    if [ $i -eq 90 ]; then\n\
    echo "❌ Backend failed to start. Logs:" \n\
    tail -50 logs/backend.log\n\
    exit 1\n\
    fi\n\
    echo "Attempt $i/90 - waiting..." \n\
    sleep 1\n\
    done\n\
    \n\
    # Start Streamlit frontend\n\
    echo "Starting frontend on port 7860..." \n\
    exec streamlit run app.py --server.port 7860 --server.address 0.0.0.0 --server.headless true 2>&1\n\
    ' > /app/start.sh && chmod +x /app/start.sh

# Run startup script
CMD ["/app/start.sh"]