InsuranceBot / Dockerfile
rohitsar567's picture
fix(api): KI-145 β€” RENAME-vs-VARIANT smart dedup (final 164 cards)
261abb2
Raw
History Blame Contribute Delete
4.6 kB
# Multi-stage build:
# Stage 1 β€” build the Next.js frontend to a static export
# Stage 2 β€” Python runtime serving FastAPI + the built frontend on the same port
# ----------------------------------------------------------------------------
# Stage 1 β€” Node builder
# ----------------------------------------------------------------------------
FROM node:22-alpine AS frontend-builder
WORKDIR /app/frontend
# Install deps first for layer caching
COPY frontend/package.json frontend/package-lock.json* ./
RUN npm ci --no-audit --no-fund
# Copy the rest of the frontend and build
COPY frontend/ ./
# In production, the frontend calls the same origin (no separate backend URL).
ENV NEXT_PUBLIC_BACKEND_URL=""
# Static-export the app β€” produces ./out
RUN npm run build
# ----------------------------------------------------------------------------
# Stage 2 β€” Python runtime (FastAPI + corpus + Chroma + DuckDB + frontend)
# ----------------------------------------------------------------------------
FROM python:3.11-slim
WORKDIR /app
# System deps:
# pdfplumber + torch CPU + sentence-transformers β†’ build-essential, libpoppler
# pydub (webm→wav transcode for Sarvam STT) → ffmpeg
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
libpoppler-cpp-dev \
pkg-config \
poppler-utils \
ffmpeg \
&& rm -rf /var/lib/apt/lists/*
# Install Python deps
COPY requirements.txt ./
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir -r requirements.txt
# Pre-download the embedding model so the first request is fast (no cold load)
RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('BAAI/bge-small-en-v1.5')"
# Copy the backend source + RAG modules (rag/ holds .py only at this stage)
COPY backend ./backend
COPY rag ./rag
COPY eval ./eval
COPY 70-docs ./70-docs
# Curated structured data the backend reads at request time:
# - 40-data/reviews/<slug>.json β†’ /api/insurers/{slug}/reviews
# - 40-data/policy_facts/*.json β†’ marketplace + scorecard fact cards
# - 40-data/premiums/*.json β†’ premium calculator illustrative baseline
# Total ~2.3 MB β€” small enough to bake into the Space image.
COPY 40-data ./40-data
# Pull the large data (corpus PDFs + pre-built Chroma vectors + extracted JSONs)
# from the companion HF dataset rather than baking it into the Space repo.
# Why: the free-tier Space repo has a 1 GB cap; rag/corpus + rag/vectors is
# ~310 MB and would have made the Space repo unviable on top of the regular
# code. HF datasets get 50 GB free quota β€” the right place for this data.
# Public dataset, no token needed at build time. See D-019.
#
# KI-119 (2026-05-15) β€” CACHE_BUST arg forces this layer to re-execute
# whenever we update the dataset. Without it, Docker reuses the cached
# snapshot_download layer (command string unchanged) even though the
# remote dataset's content changed. Symptom: HF Space served stale Chroma
# (7356 chunks from a prior ingest) instead of the freshly-uploaded
# cleaned one (3799 chunks). Bump CACHE_BUST manually each time the
# dataset is re-uploaded; the value just needs to change.
ARG DATASET_CACHE_BUST=2026-05-15-ki145-v1
RUN echo "Dataset cache bust: ${DATASET_CACHE_BUST}" && python -c "\
from huggingface_hub import snapshot_download; \
snapshot_download(\
repo_id='rohitsar567/insurance-bot-data', \
repo_type='dataset', \
local_dir='/app/rag', \
allow_patterns=['rag/corpus/**','rag/vectors/**','rag/extracted/**'], \
) " && \
# The dataset preserves the rag/ prefix in path_in_repo, so the snapshot
# writes to /app/rag/rag/corpus/... β€” flatten one level so existing
# backend imports (rag/corpus/, rag/vectors/) keep working unchanged.
if [ -d /app/rag/rag ]; then \
cp -r /app/rag/rag/* /app/rag/ && rm -rf /app/rag/rag; \
fi && \
echo "Dataset pull complete:" && \
du -sh /app/rag/corpus /app/rag/vectors /app/rag/extracted 2>&1 | sed 's/^/ /'
# Copy the built frontend from stage 1
COPY --from=frontend-builder /app/frontend/out ./frontend/out
# HF Spaces sends traffic to $PORT (default 7860). uvicorn will bind to it.
ENV PORT=7860
EXPOSE 7860
# Copy entrypoint and make it executable
COPY entrypoint.sh ./entrypoint.sh
RUN chmod +x ./entrypoint.sh
# Use a non-root user (HF Spaces recommends this for Docker spaces)
RUN useradd -m -u 1000 user && chown -R user:user /app
USER user
# Start: entrypoint validates Chroma + (re-)ingests if needed, then runs uvicorn
CMD ["sh", "/app/entrypoint.sh"]