FROM python:3.11-slim WORKDIR /app RUN apt-get update && apt-get install -y --no-install-recommends \ ca-certificates \ && rm -rf /var/lib/apt/lists/* COPY requirements.txt /app/requirements.txt # Install Python dependencies # --no-cache-dir keeps image size small RUN pip install --no-cache-dir --upgrade pip && \ pip install --no-cache-dir -r requirements.txt # Copy all source files and scripts COPY src/ /app/src/ COPY scripts/ /app/scripts/ # Copy data directory (skill_aliases.json) COPY data/ /app/data/ # Copy precomputed artifacts (BM25 index + LightGBM model) # Generated by precompute.py — must exist before building image COPY precomputed/ /app/precomputed/ # Create output directories RUN mkdir -p /app/logs /app/out # Default candidates file location (override with -v mount) # The full candidates.jsonl is NOT baked into the image (487MB) — mount it. ENV CANDIDATES_PATH=/app/candidates.jsonl ENV OUT_PATH=/app/out/CTRL_COFFEE_REPEAT.csv ENV BASE_DIR=/app # Entrypoint script selects precompute, rank, or full pipeline COPY docker-entrypoint.sh /app/docker-entrypoint.sh RUN chmod +x /app/docker-entrypoint.sh # Default: run full pipeline (precompute + rank) CMD ["/app/docker-entrypoint.sh", "full"]