FROM python:3.11-slim # Install system dependencies including zstd for Ollama RUN apt-get update && apt-get install -y \ curl \ ca-certificates \ zstd \ && rm -rf /var/lib/apt/lists/* # Install Ollama RUN curl -fsSL https://ollama.com/install.sh | sh # Set working directory WORKDIR /app # Copy requirements and install Python dependencies COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt # Copy application code COPY app.py . # Create startup script with no-cache options RUN echo '#!/bin/bash\n\ set -e\n\ echo "Starting Ollama service..."\n\ ollama serve &\n\ OLLAMA_PID=$!\n\ echo "Waiting for Ollama to be ready..."\n\ sleep 8\n\ echo "Pulling model deepseek-r1:1.5b (no cache)..."\n\ OLLAMA_NOHISTORY=1 ollama pull deepseek-r1:1.5b\n\ echo "Model ready. Starting FastAPI server..."\n\ exec uvicorn app:app --host 0.0.0.0 --port 7860 --workers 1 --timeout-keep-alive 300 --no-access-log\n\ ' > /app/start.sh && chmod +x /app/start.sh # Expose port EXPOSE 7860 # Disable Ollama telemetry and history ENV OLLAMA_NOHISTORY=1 ENV OLLAMA_FLASH_ATTENTION=1 # Health check HEALTHCHECK --interval=30s --timeout=10s --start-period=90s --retries=3 \ CMD curl -f http://localhost:7860/health || exit 1 # Start services CMD ["/app/start.sh"]