chessecon / Dockerfile
suvasis's picture
feat: auto-download Qwen model at Docker build time
babc5b1
# ─────────────────────────────────────────────────────────────────────────────
# ChessEcon — Unified Multi-Stage Dockerfile
#
# Stages:
# 1. frontend-builder — builds the React TypeScript dashboard (Node.js)
# 2. backend-cpu — Python FastAPI backend, serves built frontend as static
# 3. backend-gpu — same as backend-cpu but with CUDA PyTorch
#
# Usage:
# CPU: docker build --target backend-cpu -t chessecon:cpu .
# GPU: docker build --target backend-gpu -t chessecon:gpu .
# ─────────────────────────────────────────────────────────────────────────────
# ── Stage 1: Build the React frontend ────────────────────────────────────────
FROM node:22-alpine AS frontend-builder
WORKDIR /app/frontend
# Copy package files AND patches dir (required by pnpm for patched dependencies)
COPY frontend/package.json frontend/pnpm-lock.yaml* ./
COPY frontend/patches/ ./patches/
RUN npm install -g pnpm && pnpm install --frozen-lockfile
# Copy the full frontend source
COPY frontend/ ./
# Build the production bundle (frontend only — no Express server build)
# vite.config.ts outputs to dist/public/ relative to the project root
RUN pnpm build:docker
# ── Stage 2: CPU backend ──────────────────────────────────────────────────────
FROM python:3.11-slim AS backend-cpu
LABEL maintainer="ChessEcon Team"
LABEL description="ChessEcon — Multi-Agent Chess RL System (CPU)"
# System dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
stockfish \
curl \
git \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
# Install Python dependencies
COPY backend/requirements.txt ./backend/requirements.txt
RUN pip install --no-cache-dir -r backend/requirements.txt
# Copy the backend source
COPY backend/ ./backend/
COPY shared/ ./shared/
# Copy the built frontend into the backend's static directory
# vite.config.ts outputs to dist/public/ (see build.outDir in vite.config.ts)
COPY --from=frontend-builder /app/frontend/dist/public ./backend/static/
# Copy entrypoint
COPY docker-entrypoint.sh ./
RUN chmod +x docker-entrypoint.sh
# Create directories for model cache and training data
RUN mkdir -p /app/models /app/data/games /app/data/training /app/logs \
/app/models/Qwen_Qwen2.5-0.5B-Instruct \
/app/models/meta-llama_Llama-3.2-1B-Instruct
# ── Download models at build time ────────────────────────────────────────────
# Qwen2.5-0.5B — no token required
RUN pip install --no-cache-dir huggingface_hub && \
python3 -c " \
from huggingface_hub import snapshot_download; \
snapshot_download( \
repo_id='Qwen/Qwen2.5-0.5B-Instruct', \
local_dir='/app/models/Qwen_Qwen2.5-0.5B-Instruct', \
local_dir_use_symlinks=False, \
ignore_patterns=['*.msgpack','*.h5','flax_model*','tf_model*'] \
)"
# Llama-3.2-1B — requires HF token (pass as build arg: --build-arg HF_TOKEN=hf_...)
ARG HF_TOKEN=""
RUN if [ -n "$HF_TOKEN" ]; then \
python3 -c " \
from huggingface_hub import snapshot_download; \
snapshot_download( \
repo_id='meta-llama/Llama-3.2-1B-Instruct', \
local_dir='/app/models/meta-llama_Llama-3.2-1B-Instruct', \
local_dir_use_symlinks=False, \
token='${HF_TOKEN}', \
ignore_patterns=['*.msgpack','*.h5','flax_model*','tf_model*'] \
)"; \
fi
ENV WHITE_MODEL=/app/models/Qwen_Qwen2.5-0.5B-Instruct
ENV BLACK_MODEL=/app/models/meta-llama_Llama-3.2-1B-Instruct
# Expose the application port
EXPOSE 8000
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
CMD curl -f http://localhost:8000/health || exit 1
ENTRYPOINT ["./docker-entrypoint.sh"]
CMD ["backend"]
# ── Stage 3: GPU backend ──────────────────────────────────────────────────────
FROM nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04 AS backend-gpu
LABEL maintainer="ChessEcon Team"
LABEL description="ChessEcon — Multi-Agent Chess RL System (GPU/CUDA)"
# System dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
python3.11 \
python3.11-dev \
python3-pip \
stockfish \
curl \
git \
&& rm -rf /var/lib/apt/lists/* \
&& ln -sf /usr/bin/python3.11 /usr/bin/python3 \
&& ln -sf /usr/bin/python3 /usr/bin/python
WORKDIR /app
# Install PyTorch with CUDA support first (separate layer for caching)
RUN pip install --no-cache-dir torch==2.3.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
# Install remaining Python dependencies
COPY backend/requirements.txt ./backend/requirements.txt
COPY training/requirements.txt ./training/requirements.txt
RUN pip install --no-cache-dir -r backend/requirements.txt
RUN pip install --no-cache-dir -r training/requirements.txt
# Copy source
COPY backend/ ./backend/
COPY training/ ./training/
COPY shared/ ./shared/
# Copy the built frontend
COPY --from=frontend-builder /app/frontend/dist/public ./backend/static/
# Copy entrypoint
COPY docker-entrypoint.sh ./
RUN chmod +x docker-entrypoint.sh
# Create directories
RUN mkdir -p /app/models /app/data/games /app/data/training /app/logs
EXPOSE 8000
HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \
CMD curl -f http://localhost:8000/health || exit 1
ENTRYPOINT ["./docker-entrypoint.sh"]
CMD ["backend"]