neuralbroker commited on
Commit
4fe8118
Β·
verified Β·
1 Parent(s): ba3b381

Update docker-compose.yml (v2.1 production)

Browse files
Files changed (1) hide show
  1. docker-compose.yml +83 -0
docker-compose.yml ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # BlitzKode β€” Docker Compose
2
+ #
3
+ # Quick start (CPU-only):
4
+ # docker compose up --build
5
+ #
6
+ # With GPU (see blitzkode-gpu service below):
7
+ # docker compose --profile gpu up --build
8
+ #
9
+ # Override GPU layers at runtime without editing this file:
10
+ # BLITZKODE_GPU_LAYERS=35 docker compose up
11
+
12
+ services:
13
+ # ─── CPU service (default) ──────────────────────────────────────────────────
14
+ blitzkode:
15
+ build: .
16
+ image: blitzkode:latest
17
+ ports:
18
+ - "7860:7860"
19
+ volumes:
20
+ # The GGUF model is NOT baked into the image.
21
+ # Place blitzkode.gguf next to this file and it will be mounted read-only.
22
+ - ./blitzkode.gguf:/app/blitzkode.gguf:ro
23
+ environment:
24
+ BLITZKODE_MODEL_PATH: /app/blitzkode.gguf
25
+ BLITZKODE_HOST: "0.0.0.0"
26
+ BLITZKODE_PORT: "7860"
27
+ # Set BLITZKODE_GPU_LAYERS in your shell or a .env file to override.
28
+ # 0 = CPU-only (default), -1 = all layers on GPU.
29
+ BLITZKODE_GPU_LAYERS: "${BLITZKODE_GPU_LAYERS:-0}"
30
+ BLITZKODE_N_CTX: "2048"
31
+ BLITZKODE_THREADS: "4"
32
+ BLITZKODE_BATCH: "128"
33
+ BLITZKODE_PRELOAD_MODEL: "true"
34
+ restart: unless-stopped
35
+ healthcheck:
36
+ test: ["CMD", "curl", "-sf", "http://localhost:7860/health"]
37
+ interval: 30s
38
+ timeout: 10s
39
+ start_period: 90s
40
+ retries: 3
41
+
42
+ # ─── GPU service (commented out β€” requires nvidia-container-toolkit) ─────────
43
+ #
44
+ # Prerequisites on the host:
45
+ # 1. NVIDIA driver installed
46
+ # 2. nvidia-container-toolkit installed (https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html)
47
+ # 3. Docker daemon configured with nvidia runtime (nvidia-ctk runtime configure --runtime=docker)
48
+ #
49
+ # To start: docker compose --profile gpu up --build
50
+ #
51
+ # blitzkode-gpu:
52
+ # build: .
53
+ # image: blitzkode:latest
54
+ # profiles: [gpu]
55
+ # ports:
56
+ # - "7860:7860"
57
+ # volumes:
58
+ # - ./blitzkode.gguf:/app/blitzkode.gguf:ro
59
+ # environment:
60
+ # BLITZKODE_MODEL_PATH: /app/blitzkode.gguf
61
+ # BLITZKODE_HOST: "0.0.0.0"
62
+ # BLITZKODE_PORT: "7860"
63
+ # # Tune to your GPU's layer count (run `./scripts/healthcheck.sh` after start)
64
+ # BLITZKODE_GPU_LAYERS: "35"
65
+ # BLITZKODE_N_CTX: "4096"
66
+ # BLITZKODE_THREADS: "4"
67
+ # BLITZKODE_BATCH: "512"
68
+ # BLITZKODE_PRELOAD_MODEL: "true"
69
+ # deploy:
70
+ # resources:
71
+ # reservations:
72
+ # devices:
73
+ # - driver: nvidia
74
+ # count: 1
75
+ # capabilities: [gpu]
76
+ # runtime: nvidia
77
+ # restart: unless-stopped
78
+ # healthcheck:
79
+ # test: ["CMD", "curl", "-sf", "http://localhost:7860/health"]
80
+ # interval: 30s
81
+ # timeout: 10s
82
+ # start_period: 90s
83
+ # retries: 3