Spaces:
Sleeping
Sleeping
File size: 1,442 Bytes
6d9c72b cf0a8ed 6d9c72b cf0a8ed 6d9c72b cf0a8ed 6d9c72b cf0a8ed 6d9c72b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 | services:
vllm:
image: ollama/rocm:latest
container_name: contextforge-vllm
ports:
- "8000:8000"
environment:
- VLLM_API_KEY=${VLLM_API_KEY:-contextforge-local}
command: >
vllm serve Qwen/Qwen3.6-35B-A3B
--enable-prefix-caching
--enable-chunked-prefill
--tensor-parallel-size 1
--reasoning-parser qwen3
--trust-remote-code
--host 0.0.0.0
--port 8000
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
interval: 30s
timeout: 10s
retries: 3
deploy:
resources:
reservations:
devices:
- driver: amd
count: 1
capabilities: [gpu]
apohara:
build:
context: .
dockerfile: Dockerfile
container_name: apohara
ports:
- "8001:8001"
environment:
- VLLM_BASE_URL=http://vllm:8000
- VLLM_MODEL=Qwen/Qwen3.6-35B-A3B
- CONTEXTFORGE_PORT=8001
depends_on:
vllm:
condition: service_healthy
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8001/health"]
interval: 30s
timeout: 10s
retries: 3
gradio:
build:
context: .
dockerfile: Dockerfile
container_name: apohara-ui
ports:
- "7860:7860"
environment:
- CONTEXTFORGE_PORT=8001
depends_on:
- apohara
command: python demo/app.py
volumes:
models: |