chessecon

Runtime error

App Files Files Community

chessecon / Dockerfile

suvasis

feat: auto-download Qwen model at Docker build time

babc5b1 3 months ago

raw

history blame contribute delete

5.96 kB

	# ─────────────────────────────────────────────────────────────────────────────
	# ChessEcon — Unified Multi-Stage Dockerfile
	#
	# Stages:
	# 1. frontend-builder — builds the React TypeScript dashboard (Node.js)
	# 2. backend-cpu — Python FastAPI backend, serves built frontend as static
	# 3. backend-gpu — same as backend-cpu but with CUDA PyTorch
	#
	# Usage:
	# CPU: docker build --target backend-cpu -t chessecon:cpu .
	# GPU: docker build --target backend-gpu -t chessecon:gpu .
	# ─────────────────────────────────────────────────────────────────────────────

	# ── Stage 1: Build the React frontend ────────────────────────────────────────
	FROM node:22-alpine AS frontend-builder

	WORKDIR /app/frontend

	# Copy package files AND patches dir (required by pnpm for patched dependencies)
	COPY frontend/package.json frontend/pnpm-lock.yaml* ./
	COPY frontend/patches/ ./patches/
	RUN npm install -g pnpm && pnpm install --frozen-lockfile

	# Copy the full frontend source
	COPY frontend/ ./

	# Build the production bundle (frontend only — no Express server build)
	# vite.config.ts outputs to dist/public/ relative to the project root
	RUN pnpm build:docker

	# ── Stage 2: CPU backend ──────────────────────────────────────────────────────
	FROM python:3.11-slim AS backend-cpu

	LABEL maintainer="ChessEcon Team"
	LABEL description="ChessEcon — Multi-Agent Chess RL System (CPU)"

	# System dependencies
	RUN apt-get update && apt-get install -y --no-install-recommends \
	stockfish \
	curl \
	git \
	&& rm -rf /var/lib/apt/lists/*

	WORKDIR /app

	# Install Python dependencies
	COPY backend/requirements.txt ./backend/requirements.txt
	RUN pip install --no-cache-dir -r backend/requirements.txt

	# Copy the backend source
	COPY backend/ ./backend/
	COPY shared/ ./shared/

	# Copy the built frontend into the backend's static directory
	# vite.config.ts outputs to dist/public/ (see build.outDir in vite.config.ts)
	COPY --from=frontend-builder /app/frontend/dist/public ./backend/static/

	# Copy entrypoint
	COPY docker-entrypoint.sh ./
	RUN chmod +x docker-entrypoint.sh

	# Create directories for model cache and training data
	RUN mkdir -p /app/models /app/data/games /app/data/training /app/logs \
	/app/models/Qwen_Qwen2.5-0.5B-Instruct \
	/app/models/meta-llama_Llama-3.2-1B-Instruct

	# ── Download models at build time ────────────────────────────────────────────
	# Qwen2.5-0.5B — no token required
	RUN pip install --no-cache-dir huggingface_hub && \
	python3 -c " \
	from huggingface_hub import snapshot_download; \
	snapshot_download( \
	repo_id='Qwen/Qwen2.5-0.5B-Instruct', \
	local_dir='/app/models/Qwen_Qwen2.5-0.5B-Instruct', \
	local_dir_use_symlinks=False, \
	ignore_patterns=['.msgpack','.h5','flax_model','tf_model'] \
	)"

	# Llama-3.2-1B — requires HF token (pass as build arg: --build-arg HF_TOKEN=hf_...)
	ARG HF_TOKEN=""
	RUN if [ -n "$HF_TOKEN" ]; then \
	python3 -c " \
	from huggingface_hub import snapshot_download; \
	snapshot_download( \
	repo_id='meta-llama/Llama-3.2-1B-Instruct', \
	local_dir='/app/models/meta-llama_Llama-3.2-1B-Instruct', \
	local_dir_use_symlinks=False, \
	token='${HF_TOKEN}', \
	ignore_patterns=['.msgpack','.h5','flax_model','tf_model'] \
	)"; \
	fi

	ENV WHITE_MODEL=/app/models/Qwen_Qwen2.5-0.5B-Instruct
	ENV BLACK_MODEL=/app/models/meta-llama_Llama-3.2-1B-Instruct

	# Expose the application port
	EXPOSE 8000

	# Health check
	HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
	CMD curl -f http://localhost:8000/health \|\| exit 1

	ENTRYPOINT ["./docker-entrypoint.sh"]
	CMD ["backend"]

	# ── Stage 3: GPU backend ──────────────────────────────────────────────────────
	FROM nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04 AS backend-gpu

	LABEL maintainer="ChessEcon Team"
	LABEL description="ChessEcon — Multi-Agent Chess RL System (GPU/CUDA)"

	# System dependencies
	RUN apt-get update && apt-get install -y --no-install-recommends \
	python3.11 \
	python3.11-dev \
	python3-pip \
	stockfish \
	curl \
	git \
	&& rm -rf /var/lib/apt/lists/* \
	&& ln -sf /usr/bin/python3.11 /usr/bin/python3 \
	&& ln -sf /usr/bin/python3 /usr/bin/python

	WORKDIR /app

	# Install PyTorch with CUDA support first (separate layer for caching)
	RUN pip install --no-cache-dir torch==2.3.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121

	# Install remaining Python dependencies
	COPY backend/requirements.txt ./backend/requirements.txt
	COPY training/requirements.txt ./training/requirements.txt
	RUN pip install --no-cache-dir -r backend/requirements.txt
	RUN pip install --no-cache-dir -r training/requirements.txt

	# Copy source
	COPY backend/ ./backend/
	COPY training/ ./training/
	COPY shared/ ./shared/

	# Copy the built frontend
	COPY --from=frontend-builder /app/frontend/dist/public ./backend/static/

	# Copy entrypoint
	COPY docker-entrypoint.sh ./
	RUN chmod +x docker-entrypoint.sh

	# Create directories
	RUN mkdir -p /app/models /app/data/games /app/data/training /app/logs

	EXPOSE 8000

	HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \
	CMD curl -f http://localhost:8000/health \|\| exit 1

	ENTRYPOINT ["./docker-entrypoint.sh"]
	CMD ["backend"]