ConorWang commited on
Commit
34021d5
·
verified ·
1 Parent(s): 0cecab2

Delete docker-compose.yml

Browse files
Files changed (1) hide show
  1. docker-compose.yml +0 -154
docker-compose.yml DELETED
@@ -1,154 +0,0 @@
1
- version: "3.9"
2
-
3
- # Named volumes for persistence (HF cache, logs)
4
- volumes:
5
- hf_cache:
6
- logs:
7
-
8
- networks:
9
- velnet:
10
-
11
- services:
12
- api:
13
- image: veltraxor-api:dev
14
- # If you haven't built the image yet, uncomment the next 3 lines to build from local Dockerfile
15
- build:
16
- context: .
17
- dockerfile: Dockerfile
18
- container_name: veltraxor-api
19
- env_file:
20
- - .env
21
- environment:
22
- # Hard-aligned with your codebase (config.py / api_server.py / engine_runner.py)
23
- ENGINE_KIND: ${ENGINE_KIND:-dry} # dry | real
24
- MODEL_ALIAS: ${MODEL_ALIAS:-veltraxor-1}
25
- HF_REPO_ID: ${HF_REPO_ID:-Veltraxor/Veltraxor_1}
26
- EXPECTED_SHARDS: ${EXPECTED_SHARDS:-163}
27
- HF_HOME: ${HF_HOME:-/data/hf_cache}
28
- MODEL_LOCAL_DIR: ${MODEL_LOCAL_DIR:-/data/hf_cache/Veltraxor_1}
29
- HF_HUB_ENABLE_HF_TRANSFER: ${HF_HUB_ENABLE_HF_TRANSFER:-1}
30
- # Server/limits
31
- REQUEST_TIMEOUT_S: ${REQUEST_TIMEOUT_S:-45}
32
- BACKEND_STEP_TIMEOUT_S: ${BACKEND_STEP_TIMEOUT_S:-30}
33
- MAX_CONCURRENCY: ${MAX_CONCURRENCY:-8}
34
- RATE_LIMIT_QPS: ${RATE_LIMIT_QPS:-3}
35
- BURST_CAPACITY: ${BURST_CAPACITY:-3}
36
- RATE_LIMIT_SCOPE: ${RATE_LIMIT_SCOPE:-route_token}
37
- LOG_LEVEL: ${LOG_LEVEL:-INFO}
38
- API_KEY: ${API_KEY:-} # optional shared API key
39
- # Usage metering
40
- USAGE_BACKEND: ${USAGE_BACKEND:-jsonl}
41
- USAGE_JSONL_PATH: ${USAGE_JSONL_PATH:-/app/logs/usage.jsonl}
42
- USAGE_SQLITE_PATH: ${USAGE_SQLITE_PATH:-/app/logs/usage.db}
43
- USAGE_TOKENIZER: ${USAGE_TOKENIZER:-heuristic}
44
- HEURISTIC_CHARS_PER_TOKEN: ${HEURISTIC_CHARS_PER_TOKEN:-4}
45
- USAGE_FLUSH_INTERVAL_S: ${USAGE_FLUSH_INTERVAL_S:-2.0}
46
- USAGE_MAX_BUFFER: ${USAGE_MAX_BUFFER:-100}
47
- USAGE_MAX_QUEUE: ${USAGE_MAX_QUEUE:-1000}
48
- USAGE_ROTATE_DAILY: ${USAGE_ROTATE_DAILY:-0}
49
- # Real-engine endpoints (only used if ENGINE_KIND=real)
50
- VLLM_ENDPOINT: ${VLLM_ENDPOINT:-http://vllm:8000}
51
- VLLM_API_KEY: ${VLLM_API_KEY:-}
52
- VLLM_MODEL: ${VLLM_MODEL:-${MODEL_ALIAS:-veltraxor-1}}
53
- TGI_ENDPOINT: ${TGI_ENDPOINT:-http://tgi:8080}
54
- TGI_TIMEOUT_S: ${TGI_TIMEOUT_S:-45}
55
- TRANSFORMERS_LOCAL: ${TRANSFORMERS_LOCAL:-0}
56
- TRANSFORMERS_MAX_NEW_TOKENS: ${TRANSFORMERS_MAX_NEW_TOKENS:-512}
57
- TRANSFORMERS_TEMPERATURE: ${TRANSFORMERS_TEMPERATURE:-0.7}
58
- # IMPORTANT: pass HF_TOKEN at runtime (do NOT bake into images)
59
- HF_TOKEN: ${HF_TOKEN:-}
60
- ports:
61
- - "8000:8000"
62
- volumes:
63
- - hf_cache:/data/hf_cache
64
- - logs:/app/logs
65
- healthcheck:
66
- test: ["CMD-SHELL", "curl -fsS http://127.0.0.1:8000/healthz || exit 1"]
67
- interval: 30s
68
- timeout: 3s
69
- retries: 3
70
- start_period: 10s
71
- restart: unless-stopped
72
- networks:
73
- - velnet
74
- # API can run alone (DRY) or with engines. If using real engines via profiles,
75
- # keep this depends_on so compose waits for engine's health (optional).
76
- depends_on:
77
- vllm:
78
- condition: service_healthy
79
- tgi:
80
- condition: service_healthy
81
-
82
- # ===============================
83
- # vLLM (OpenAI-compatible server)
84
- # Enable with: docker compose --profile vllm up
85
- # ===============================
86
- vllm:
87
- profiles: ["vllm"]
88
- image: vllm/vllm-openai:latest
89
- container_name: vllm
90
- # NOTE: Adjust resources & args to your hardware; 671B is extremely heavy.
91
- # Here we point vLLM to the local weights folder mounted at /data/Veltraxor_1
92
- command: >
93
- python -m vllm.entrypoints.openai.api_server
94
- --model /data/Veltraxor_1
95
- --trust-remote-code
96
- --tensor-parallel-size ${TP_DEGREE:-1}
97
- --dtype auto
98
- --port 8000
99
- environment:
100
- HF_HOME: /data/hf_cache
101
- TRANSFORMERS_OFFLINE: "1"
102
- volumes:
103
- - hf_cache:/data/hf_cache
104
- - hf_cache:/data/Veltraxor_1
105
- expose:
106
- - "8000"
107
- healthcheck:
108
- test: ["CMD-SHELL", "curl -fsS http://127.0.0.1:8000/ || exit 1"]
109
- interval: 30s
110
- timeout: 5s
111
- retries: 3
112
- start_period: 20s
113
- restart: unless-stopped
114
- networks:
115
- - velnet
116
- deploy:
117
- resources:
118
- reservations:
119
- devices:
120
- - capabilities: ["gpu"] # uncomment if your runtime supports it (compose with nvidia)
121
-
122
- # ===============================
123
- # TGI (Hugging Face Text Generation Inference)
124
- # Enable with: docker compose --profile tgi up
125
- # ===============================
126
- tgi:
127
- profiles: ["tgi"]
128
- image: ghcr.io/huggingface/text-generation-inference:latest
129
- container_name: tgi
130
- # Point MODEL_ID to local repo path; serve on 8080
131
- environment:
132
- MODEL_ID: /data/Veltraxor_1
133
- PORT: 8080
134
- HF_HOME: /data/hf_cache
135
- NUM_SHARD: ${TP_DEGREE:-1} # Adjust to your GPUs
136
- volumes:
137
- - hf_cache:/data/hf_cache
138
- - hf_cache:/data/Veltraxor_1
139
- expose:
140
- - "8080"
141
- healthcheck:
142
- test: ["CMD-SHELL", "curl -fsS http://127.0.0.1:8080/health || exit 1"]
143
- interval: 30s
144
- timeout: 5s
145
- retries: 3
146
- start_period: 20s
147
- restart: unless-stopped
148
- networks:
149
- - velnet
150
- deploy:
151
- resources:
152
- reservations:
153
- devices:
154
- - capabilities: ["gpu"] # uncomment if runtime supports it