# Use an official Python runtime as a parent image
FROM python:3.10-slim

# Set the working directory to /app
WORKDIR /app

# Install system dependencies
RUN apt-get update && apt-get install -y \
    build-essential \
    git \
    git-lfs \
    && rm -rf /var/lib/apt/lists/*

# Copy the requirements file into the container
COPY requirements.txt .

# Install Python dependencies
RUN pip install --no-cache-dir -r requirements.txt

# Pre-download HF Models to cache them in the Docker image (prevents downloading on every boot)
RUN python -c "from transformers import AutoTokenizer, AutoModelForSeq2SeqLM; AutoTokenizer.from_pretrained('nsi319/legal-pegasus'); AutoModelForSeq2SeqLM.from_pretrained('nsi319/legal-pegasus');"
RUN python -c "from sentence_transformers import SentenceTransformer, CrossEncoder; SentenceTransformer('BAAI/bge-base-en-v1.5'); CrossEncoder('BAAI/bge-reranker-base');"

# Copy the rest of the application code (including .git if not ignored)
COPY . .


# Download heavy databases from Dataset into their correct folders
RUN huggingface-cli download SaiPranav09/NyayLens-Data data/processed/indexed/paragraphs.db --repo-type dataset --local-dir .
RUN huggingface-cli download SaiPranav09/NyayLens-Data data/processed/faiss/faiss_index.bin --repo-type dataset --local-dir .
RUN huggingface-cli download SaiPranav09/NyayLens-Data data/processed/embeddings/paragraph_ids.json --repo-type dataset --local-dir .
RUN huggingface-cli download SaiPranav09/NyayLens-Data outputs/summarization/final/model.safetensors --repo-type dataset --local-dir .


# Expose port 7860 (Hugging Face standard)
EXPOSE 7860

# Command to run the application using Uvicorn
CMD ["uvicorn", "src.api.main:app", "--host", "0.0.0.0", "--port", "7860"]