FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 WORKDIR /app # Install Python and system dependencies RUN apt-get update && apt-get install -y \ python3.11 \ python3.11-dev \ python3-pip \ build-essential \ cmake \ git \ && rm -rf /var/lib/apt/lists/* # Set Python 3.11 as default RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.11 1 && \ update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1 # Upgrade pip RUN python -m pip install --upgrade pip # Copy requirements and install COPY requirements.txt . # Install llama-cpp-python with CUDA support RUN CMAKE_ARGS="-DLLAMA_CUDA=on" pip install llama-cpp-python --no-cache-dir # Install remaining dependencies RUN pip install --no-cache-dir -r requirements.txt # Copy application code COPY . . # Create data directory for persistence RUN mkdir -p /data # Set environment variables ENV PYTHONUNBUFFERED=1 ENV MODEL_REPO=daskalos-apps/phi4-cybersec-Q4_K_M ENV MODEL_FILENAME=phi4-mini-instruct-Q4_K_M.gguf ENV USE_RAG=false ENV CACHE_ENABLED=true # Expose port EXPOSE 8000 # Health check HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ CMD python -c "import requests; requests.get('http://localhost:8000/health')" # Run the application CMD ["python", "main.py"]