File size: 3,265 Bytes
d507c32 88519e8 d507c32 88519e8 d507c32 88519e8 d507c32 88519e8 d507c32 88519e8 d507c32 88519e8 d507c32 88519e8 d507c32 88519e8 d507c32 88519e8 8d65e7a 88519e8 d507c32 16459ae d507c32 88519e8 16459ae 88519e8 16459ae 88519e8 16459ae 88519e8 d507c32 88519e8 16459ae 88519e8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 | # =============================================================================
# Stage 1: Builder - install dependencies and download models
# =============================================================================
FROM python:3.11-slim-bookworm AS builder
WORKDIR /app
# System dependencies for building
RUN apt-get update && \
apt-get install -y --no-install-recommends curl && \
rm -rf /var/lib/apt/lists/*
# Use CPU-only torch (avoids 2GB+ CUDA libs)
ENV PIP_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cpu
# Install torch CPU-only first
RUN pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu
# Install pinned dependencies from requirements.txt for reproducible builds
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy application code and install package (--no-deps since deps already installed)
# Note: pyproject.toml is copied last to maximize layer caching. If only
# pyproject.toml changes (e.g., version bump), only this layer rebuilds.
COPY pyproject.toml .
COPY sage/ sage/
RUN pip install --no-cache-dir . --no-deps
# Pre-download models to cache directory
ENV HF_HOME=/app/.cache/huggingface
# Download E5-small embedding model (~134MB)
RUN python -c "\
from sentence_transformers import SentenceTransformer; \
SentenceTransformer('intfloat/e5-small-v2')"
# Download HHEM hallucination detection model (~892MB)
# HHEM uses custom config pointing to foundation T5 model for tokenizer
RUN python -c "\
from transformers import AutoConfig, AutoTokenizer; \
from huggingface_hub import hf_hub_download; \
config = AutoConfig.from_pretrained('vectara/hallucination_evaluation_model', trust_remote_code=True); \
AutoTokenizer.from_pretrained(config.foundation); \
AutoConfig.from_pretrained(config.foundation); \
hf_hub_download('vectara/hallucination_evaluation_model', 'model.safetensors')"
# =============================================================================
# Stage 2: Runtime - slim image with only what's needed
# =============================================================================
FROM python:3.11-slim-bookworm AS runtime
WORKDIR /app
# Only curl for healthcheck (no build tools)
RUN apt-get update && \
apt-get install -y --no-install-recommends curl && \
rm -rf /var/lib/apt/lists/*
# Non-root user with UID 1000 (required by HF Spaces)
RUN useradd -m -u 1000 user
# Copy installed packages from builder
COPY --from=builder /usr/local/lib/python3.11/site-packages /usr/local/lib/python3.11/site-packages
COPY --from=builder /usr/local/bin /usr/local/bin
# Copy application code
COPY --from=builder /app/sage /app/sage
# Copy pre-downloaded models from builder
COPY --from=builder /app/.cache /app/.cache
# Environment
ENV HF_HOME=/app/.cache/huggingface
ENV PYTHONUNBUFFERED=1
# Fix ownership for non-root user
RUN chown -R user:user /app
USER user
# Default port 7860 for HF Spaces; overridden by PORT env var at runtime
ENV PORT=7860
EXPOSE 7860
# Health check with startup grace period (models take ~30s to load)
HEALTHCHECK --interval=30s --timeout=5s --start-period=60s --retries=3 \
CMD curl -sf http://localhost:${PORT:-7860}/health || exit 1
CMD ["python", "-m", "sage.api.run", "--host", "0.0.0.0"]
|