File size: 5,955 Bytes
e4d7d50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
babc5b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e4d7d50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# ─────────────────────────────────────────────────────────────────────────────
# ChessEcon — Unified Multi-Stage Dockerfile
#
# Stages:
#   1. frontend-builder  — builds the React TypeScript dashboard (Node.js)
#   2. backend-cpu       — Python FastAPI backend, serves built frontend as static
#   3. backend-gpu       — same as backend-cpu but with CUDA PyTorch
#
# Usage:
#   CPU:  docker build --target backend-cpu -t chessecon:cpu .
#   GPU:  docker build --target backend-gpu -t chessecon:gpu .
# ─────────────────────────────────────────────────────────────────────────────

# ── Stage 1: Build the React frontend ────────────────────────────────────────
FROM node:22-alpine AS frontend-builder

WORKDIR /app/frontend

# Copy package files AND patches dir (required by pnpm for patched dependencies)
COPY frontend/package.json frontend/pnpm-lock.yaml* ./
COPY frontend/patches/ ./patches/
RUN npm install -g pnpm && pnpm install --frozen-lockfile

# Copy the full frontend source
COPY frontend/ ./

# Build the production bundle (frontend only — no Express server build)
# vite.config.ts outputs to dist/public/ relative to the project root
RUN pnpm build:docker

# ── Stage 2: CPU backend ──────────────────────────────────────────────────────
FROM python:3.11-slim AS backend-cpu

LABEL maintainer="ChessEcon Team"
LABEL description="ChessEcon — Multi-Agent Chess RL System (CPU)"

# System dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
    stockfish \
    curl \
    git \
    && rm -rf /var/lib/apt/lists/*

WORKDIR /app

# Install Python dependencies
COPY backend/requirements.txt ./backend/requirements.txt
RUN pip install --no-cache-dir -r backend/requirements.txt

# Copy the backend source
COPY backend/ ./backend/
COPY shared/ ./shared/

# Copy the built frontend into the backend's static directory
# vite.config.ts outputs to dist/public/ (see build.outDir in vite.config.ts)
COPY --from=frontend-builder /app/frontend/dist/public ./backend/static/

# Copy entrypoint
COPY docker-entrypoint.sh ./
RUN chmod +x docker-entrypoint.sh

# Create directories for model cache and training data
RUN mkdir -p /app/models /app/data/games /app/data/training /app/logs \
    /app/models/Qwen_Qwen2.5-0.5B-Instruct \
    /app/models/meta-llama_Llama-3.2-1B-Instruct

# ── Download models at build time ────────────────────────────────────────────
# Qwen2.5-0.5B — no token required
RUN pip install --no-cache-dir huggingface_hub && \
    python3 -c " \
from huggingface_hub import snapshot_download; \
snapshot_download( \
    repo_id='Qwen/Qwen2.5-0.5B-Instruct', \
    local_dir='/app/models/Qwen_Qwen2.5-0.5B-Instruct', \
    local_dir_use_symlinks=False, \
    ignore_patterns=['*.msgpack','*.h5','flax_model*','tf_model*'] \
)"

# Llama-3.2-1B — requires HF token (pass as build arg: --build-arg HF_TOKEN=hf_...)
ARG HF_TOKEN=""
RUN if [ -n "$HF_TOKEN" ]; then \
    python3 -c " \
from huggingface_hub import snapshot_download; \
snapshot_download( \
    repo_id='meta-llama/Llama-3.2-1B-Instruct', \
    local_dir='/app/models/meta-llama_Llama-3.2-1B-Instruct', \
    local_dir_use_symlinks=False, \
    token='${HF_TOKEN}', \
    ignore_patterns=['*.msgpack','*.h5','flax_model*','tf_model*'] \
)"; \
fi

ENV WHITE_MODEL=/app/models/Qwen_Qwen2.5-0.5B-Instruct
ENV BLACK_MODEL=/app/models/meta-llama_Llama-3.2-1B-Instruct

# Expose the application port
EXPOSE 8000

# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
    CMD curl -f http://localhost:8000/health || exit 1

ENTRYPOINT ["./docker-entrypoint.sh"]
CMD ["backend"]

# ── Stage 3: GPU backend ──────────────────────────────────────────────────────
FROM nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04 AS backend-gpu

LABEL maintainer="ChessEcon Team"
LABEL description="ChessEcon — Multi-Agent Chess RL System (GPU/CUDA)"

# System dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
    python3.11 \
    python3.11-dev \
    python3-pip \
    stockfish \
    curl \
    git \
    && rm -rf /var/lib/apt/lists/* \
    && ln -sf /usr/bin/python3.11 /usr/bin/python3 \
    && ln -sf /usr/bin/python3 /usr/bin/python

WORKDIR /app

# Install PyTorch with CUDA support first (separate layer for caching)
RUN pip install --no-cache-dir torch==2.3.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121

# Install remaining Python dependencies
COPY backend/requirements.txt ./backend/requirements.txt
COPY training/requirements.txt ./training/requirements.txt
RUN pip install --no-cache-dir -r backend/requirements.txt
RUN pip install --no-cache-dir -r training/requirements.txt

# Copy source
COPY backend/ ./backend/
COPY training/ ./training/
COPY shared/ ./shared/

# Copy the built frontend
COPY --from=frontend-builder /app/frontend/dist/public ./backend/static/

# Copy entrypoint
COPY docker-entrypoint.sh ./
RUN chmod +x docker-entrypoint.sh

# Create directories
RUN mkdir -p /app/models /app/data/games /app/data/training /app/logs

EXPOSE 8000

HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \
    CMD curl -f http://localhost:8000/health || exit 1

ENTRYPOINT ["./docker-entrypoint.sh"]
CMD ["backend"]