# BlitzKode — Docker Compose # # Quick start (CPU-only): # docker compose up --build # # With GPU (see blitzkode-gpu service below): # docker compose --profile gpu up --build # # Override GPU layers at runtime without editing this file: # BLITZKODE_GPU_LAYERS=35 docker compose up services: # ─── CPU service (default) ────────────────────────────────────────────────── blitzkode: build: . image: blitzkode:latest ports: - "7860:7860" volumes: # The GGUF model is NOT baked into the image. # Place blitzkode.gguf next to this file and it will be mounted read-only. - ./blitzkode.gguf:/app/blitzkode.gguf:ro environment: BLITZKODE_MODEL_PATH: /app/blitzkode.gguf BLITZKODE_HOST: "0.0.0.0" BLITZKODE_PORT: "7860" # Set BLITZKODE_GPU_LAYERS in your shell or a .env file to override. # 0 = CPU-only (default), -1 = all layers on GPU. BLITZKODE_GPU_LAYERS: "${BLITZKODE_GPU_LAYERS:-0}" BLITZKODE_N_CTX: "2048" BLITZKODE_THREADS: "4" BLITZKODE_BATCH: "128" BLITZKODE_PRELOAD_MODEL: "true" restart: unless-stopped healthcheck: test: ["CMD", "curl", "-sf", "http://localhost:7860/health"] interval: 30s timeout: 10s start_period: 90s retries: 3 # ─── GPU service (commented out — requires nvidia-container-toolkit) ───────── # # Prerequisites on the host: # 1. NVIDIA driver installed # 2. nvidia-container-toolkit installed (https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html) # 3. Docker daemon configured with nvidia runtime (nvidia-ctk runtime configure --runtime=docker) # # To start: docker compose --profile gpu up --build # # blitzkode-gpu: # build: . # image: blitzkode:latest # profiles: [gpu] # ports: # - "7860:7860" # volumes: # - ./blitzkode.gguf:/app/blitzkode.gguf:ro # environment: # BLITZKODE_MODEL_PATH: /app/blitzkode.gguf # BLITZKODE_HOST: "0.0.0.0" # BLITZKODE_PORT: "7860" # # Tune to your GPU's layer count (run `./scripts/healthcheck.sh` after start) # BLITZKODE_GPU_LAYERS: "35" # BLITZKODE_N_CTX: "4096" # BLITZKODE_THREADS: "4" # BLITZKODE_BATCH: "512" # BLITZKODE_PRELOAD_MODEL: "true" # deploy: # resources: # reservations: # devices: # - driver: nvidia # count: 1 # capabilities: [gpu] # runtime: nvidia # restart: unless-stopped # healthcheck: # test: ["CMD", "curl", "-sf", "http://localhost:7860/health"] # interval: 30s # timeout: 10s # start_period: 90s # retries: 3