ai-codelens / Dockerfile
nnsohamnn's picture
feat: pre-download and cache embedding models in Docker image for faster startup
c7ae96b
# Use a high-quality Python image
FROM python:3.11-slim
# Install system dependencies (git for cloning repos)
RUN apt-get update && apt-get install -y \
git \
&& rm -rf /var/lib/apt/lists/*
# Set working directory
WORKDIR /app
# Copy requirements first for better caching
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# --- Pre-download Embedding Models to cache them in the Docker image ---
# This ensures the app boots instantly and doesn't get stuck in 'Starting...'
RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('all-MiniLM-L6-v2'); SentenceTransformer('intfloat/e5-small-v2')"
# Copy the rest of the application
COPY . .
# Environment variables for performance and avoiding telemetry issues
ENV CHROMA_TELEMETRY_NO_ANALYTICS=True
ENV ANONYMIZED_TELEMETRY=False
ENV STREAMLIT_SERVER_HEADLESS=true
# Create dummy db_store for local testing during build if needed
RUN mkdir -p db_store
# Set permissions for Hugging Face (any user can write)
RUN chmod -R 777 /app
# Expose Streamlit's default port
EXPOSE 7860
# Command to run the application (Hugging Face expects port 7860)
CMD ["streamlit", "run", "app.py", "--server.port", "7860", "--server.address", "0.0.0.0"]