version: "3.9"

# ChessEcon — OpenEnv 0.1 compliant multi-agent chess economy
#
# White: Qwen/Qwen2.5-0.5B-Instruct  (GRPO training target)
# Black: meta-llama/Llama-3.2-1B-Instruct  (fixed opponent)
#
# Quick start:
#   docker compose up --build

services:

  backend:
    build:
      context: ./backend
      dockerfile: Dockerfile
    image: chessecon-backend:latest
    container_name: chessecon-backend
    restart: unless-stopped
    ports:
      - "8008:8000"
    env_file:
      - ./backend/.env
    environment:
      - DEVICE=cuda                                         # GPU inference
      - HOST=0.0.0.0
      - PORT=8000
      - WHITE_MODEL=/models/Qwen_Qwen2.5-0.5B-Instruct
      - BLACK_MODEL=/models/meta-llama_Llama-3.2-1B-Instruct
      - HF_HUB_OFFLINE=1
      - CUDA_VISIBLE_DEVICES=0                             # use first GPU
    volumes:
      - ./training/models:/models:ro                                   # model weights
      - /home/minasm/.cache/huggingface:/root/.cache/huggingface:ro    # HF cache
      - checkpoints:/app/checkpoints                                    # LoRA checkpoints
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities: [gpu]
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
      interval: 30s
      timeout: 10s
      retries: 5
      start_period: 180s

  dashboard:
    image: nginx:alpine
    container_name: chessecon-dashboard
    restart: unless-stopped
    ports:
      - "3006:80"
    extra_hosts:
      - "host.docker.internal:host-gateway"
    volumes:
      - ./frontend/dist/public:/usr/share/nginx/html:ro
      - ./nginx.conf:/etc/nginx/conf.d/default.conf:ro

volumes:
  checkpoints: