# BlitzKode — Docker Compose
#
# Quick start (CPU-only):
#   docker compose up --build
#
# With GPU (see blitzkode-gpu service below):
#   docker compose --profile gpu up --build
#
# Override GPU layers at runtime without editing this file:
#   BLITZKODE_GPU_LAYERS=35 docker compose up

services:
  # ─── CPU service (default) ──────────────────────────────────────────────────
  blitzkode:
    build: .
    image: blitzkode:latest
    ports:
      - "7860:7860"
    volumes:
      # The GGUF model is NOT baked into the image.
      # Place blitzkode.gguf next to this file and it will be mounted read-only.
      - ./blitzkode.gguf:/app/blitzkode.gguf:ro
    environment:
      BLITZKODE_MODEL_PATH: /app/blitzkode.gguf
      BLITZKODE_HOST: "0.0.0.0"
      BLITZKODE_PORT: "7860"
      # Set BLITZKODE_GPU_LAYERS in your shell or a .env file to override.
      # 0 = CPU-only (default), -1 = all layers on GPU.
      BLITZKODE_GPU_LAYERS: "${BLITZKODE_GPU_LAYERS:-0}"
      BLITZKODE_N_CTX: "2048"
      BLITZKODE_THREADS: "4"
      BLITZKODE_BATCH: "128"
      BLITZKODE_PRELOAD_MODEL: "true"
    restart: unless-stopped
    healthcheck:
      test: ["CMD", "curl", "-sf", "http://localhost:7860/health"]
      interval: 30s
      timeout: 10s
      start_period: 90s
      retries: 3

  # ─── GPU service (commented out — requires nvidia-container-toolkit) ─────────
  #
  # Prerequisites on the host:
  #   1. NVIDIA driver installed
  #   2. nvidia-container-toolkit installed (https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html)
  #   3. Docker daemon configured with nvidia runtime (nvidia-ctk runtime configure --runtime=docker)
  #
  # To start: docker compose --profile gpu up --build
  #
  # blitzkode-gpu:
  #   build: .
  #   image: blitzkode:latest
  #   profiles: [gpu]
  #   ports:
  #     - "7860:7860"
  #   volumes:
  #     - ./blitzkode.gguf:/app/blitzkode.gguf:ro
  #   environment:
  #     BLITZKODE_MODEL_PATH: /app/blitzkode.gguf
  #     BLITZKODE_HOST: "0.0.0.0"
  #     BLITZKODE_PORT: "7860"
  #     # Tune to your GPU's layer count (run `./scripts/healthcheck.sh` after start)
  #     BLITZKODE_GPU_LAYERS: "35"
  #     BLITZKODE_N_CTX: "4096"
  #     BLITZKODE_THREADS: "4"
  #     BLITZKODE_BATCH: "512"
  #     BLITZKODE_PRELOAD_MODEL: "true"
  #   deploy:
  #     resources:
  #       reservations:
  #         devices:
  #           - driver: nvidia
  #             count: 1
  #             capabilities: [gpu]
  #   runtime: nvidia
  #   restart: unless-stopped
  #   healthcheck:
  #     test: ["CMD", "curl", "-sf", "http://localhost:7860/health"]
  #     interval: 30s
  #     timeout: 10s
  #     start_period: 90s
  #     retries: 3