FROM nvidia/cuda:12.1.0-runtime-ubuntu22.04 # Set working directory WORKDIR /app # Install system dependencies RUN apt-get update && apt-get install -y \ python3.11 \ python3.11-dev \ python3-pip \ git \ curl \ && rm -rf /var/lib/apt/lists/* # Create symlink for python RUN ln -s /usr/bin/python3.11 /usr/bin/python # Copy requirements first for better caching COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt # Copy application code COPY . . # Set environment variables ENV PYTHONUNBUFFERED=1 ENV TOKENIZERS_PARALLELISM=false ENV CUDA_VISIBLE_DEVICES=0 # Expose ports EXPOSE 8000 8001 # Default command (can be overridden) CMD ["python", "-m", "agent.model_server"]