Spaces:
Runtime error
Runtime error
File size: 5,955 Bytes
e4d7d50 babc5b1 e4d7d50 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 | # ─────────────────────────────────────────────────────────────────────────────
# ChessEcon — Unified Multi-Stage Dockerfile
#
# Stages:
# 1. frontend-builder — builds the React TypeScript dashboard (Node.js)
# 2. backend-cpu — Python FastAPI backend, serves built frontend as static
# 3. backend-gpu — same as backend-cpu but with CUDA PyTorch
#
# Usage:
# CPU: docker build --target backend-cpu -t chessecon:cpu .
# GPU: docker build --target backend-gpu -t chessecon:gpu .
# ─────────────────────────────────────────────────────────────────────────────
# ── Stage 1: Build the React frontend ────────────────────────────────────────
FROM node:22-alpine AS frontend-builder
WORKDIR /app/frontend
# Copy package files AND patches dir (required by pnpm for patched dependencies)
COPY frontend/package.json frontend/pnpm-lock.yaml* ./
COPY frontend/patches/ ./patches/
RUN npm install -g pnpm && pnpm install --frozen-lockfile
# Copy the full frontend source
COPY frontend/ ./
# Build the production bundle (frontend only — no Express server build)
# vite.config.ts outputs to dist/public/ relative to the project root
RUN pnpm build:docker
# ── Stage 2: CPU backend ──────────────────────────────────────────────────────
FROM python:3.11-slim AS backend-cpu
LABEL maintainer="ChessEcon Team"
LABEL description="ChessEcon — Multi-Agent Chess RL System (CPU)"
# System dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
stockfish \
curl \
git \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
# Install Python dependencies
COPY backend/requirements.txt ./backend/requirements.txt
RUN pip install --no-cache-dir -r backend/requirements.txt
# Copy the backend source
COPY backend/ ./backend/
COPY shared/ ./shared/
# Copy the built frontend into the backend's static directory
# vite.config.ts outputs to dist/public/ (see build.outDir in vite.config.ts)
COPY --from=frontend-builder /app/frontend/dist/public ./backend/static/
# Copy entrypoint
COPY docker-entrypoint.sh ./
RUN chmod +x docker-entrypoint.sh
# Create directories for model cache and training data
RUN mkdir -p /app/models /app/data/games /app/data/training /app/logs \
/app/models/Qwen_Qwen2.5-0.5B-Instruct \
/app/models/meta-llama_Llama-3.2-1B-Instruct
# ── Download models at build time ────────────────────────────────────────────
# Qwen2.5-0.5B — no token required
RUN pip install --no-cache-dir huggingface_hub && \
python3 -c " \
from huggingface_hub import snapshot_download; \
snapshot_download( \
repo_id='Qwen/Qwen2.5-0.5B-Instruct', \
local_dir='/app/models/Qwen_Qwen2.5-0.5B-Instruct', \
local_dir_use_symlinks=False, \
ignore_patterns=['*.msgpack','*.h5','flax_model*','tf_model*'] \
)"
# Llama-3.2-1B — requires HF token (pass as build arg: --build-arg HF_TOKEN=hf_...)
ARG HF_TOKEN=""
RUN if [ -n "$HF_TOKEN" ]; then \
python3 -c " \
from huggingface_hub import snapshot_download; \
snapshot_download( \
repo_id='meta-llama/Llama-3.2-1B-Instruct', \
local_dir='/app/models/meta-llama_Llama-3.2-1B-Instruct', \
local_dir_use_symlinks=False, \
token='${HF_TOKEN}', \
ignore_patterns=['*.msgpack','*.h5','flax_model*','tf_model*'] \
)"; \
fi
ENV WHITE_MODEL=/app/models/Qwen_Qwen2.5-0.5B-Instruct
ENV BLACK_MODEL=/app/models/meta-llama_Llama-3.2-1B-Instruct
# Expose the application port
EXPOSE 8000
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
CMD curl -f http://localhost:8000/health || exit 1
ENTRYPOINT ["./docker-entrypoint.sh"]
CMD ["backend"]
# ── Stage 3: GPU backend ──────────────────────────────────────────────────────
FROM nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04 AS backend-gpu
LABEL maintainer="ChessEcon Team"
LABEL description="ChessEcon — Multi-Agent Chess RL System (GPU/CUDA)"
# System dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
python3.11 \
python3.11-dev \
python3-pip \
stockfish \
curl \
git \
&& rm -rf /var/lib/apt/lists/* \
&& ln -sf /usr/bin/python3.11 /usr/bin/python3 \
&& ln -sf /usr/bin/python3 /usr/bin/python
WORKDIR /app
# Install PyTorch with CUDA support first (separate layer for caching)
RUN pip install --no-cache-dir torch==2.3.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
# Install remaining Python dependencies
COPY backend/requirements.txt ./backend/requirements.txt
COPY training/requirements.txt ./training/requirements.txt
RUN pip install --no-cache-dir -r backend/requirements.txt
RUN pip install --no-cache-dir -r training/requirements.txt
# Copy source
COPY backend/ ./backend/
COPY training/ ./training/
COPY shared/ ./shared/
# Copy the built frontend
COPY --from=frontend-builder /app/frontend/dist/public ./backend/static/
# Copy entrypoint
COPY docker-entrypoint.sh ./
RUN chmod +x docker-entrypoint.sh
# Create directories
RUN mkdir -p /app/models /app/data/games /app/data/training /app/logs
EXPOSE 8000
HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \
CMD curl -f http://localhost:8000/health || exit 1
ENTRYPOINT ["./docker-entrypoint.sh"]
CMD ["backend"]
|