File size: 2,690 Bytes
9c0b225 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 | # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# MAC β GPU Worker Node (PC2-PC7)
# Each worker PC runs vLLM inference + a heartbeat agent.
# The agent enrolls with the control node and sends health data.
#
# Prerequisites:
# 1. Get enrollment token from admin: POST /api/v1/nodes/enrollment-token
# 2. Set ENROLLMENT_TOKEN in .env
# 3. Set CONTROL_NODE_URL to the control node's address
#
# Usage: docker compose -f docker-compose.worker-node.yml up -d
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
services:
# ββ vLLM GPU Inference βββββββββββββββββββββββββββββββββ
vllm:
image: vllm/vllm-openai:latest
container_name: mac-vllm-worker
ports:
- "${VLLM_PORT:-8001}:${VLLM_PORT:-8001}"
environment:
- HF_HOME=/root/.cache/huggingface
volumes:
- hf-cache:/root/.cache/huggingface
command: >
--model ${VLLM_MODEL:-Qwen/Qwen2.5-7B-Instruct-AWQ}
--port ${VLLM_PORT:-8001}
--gpu-memory-utilization ${GPU_MEM_UTIL:-0.85}
--max-model-len ${MAX_MODEL_LEN:-8192}
--trust-remote-code
--enforce-eager
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
restart: unless-stopped
networks:
- worker-net
# ββ Worker Agent β Enrollment + Heartbeat ββββββββββββββ
worker-agent:
image: python:3.11-slim
container_name: mac-worker-agent
environment:
- CONTROL_NODE_URL=${CONTROL_NODE_URL:-http://192.168.1.100:8000}
- ENROLLMENT_TOKEN=${ENROLLMENT_TOKEN:-}
- NODE_NAME=${NODE_NAME:-worker-1}
- VLLM_PORT=${VLLM_PORT:-8001}
- VLLM_MODEL=${VLLM_MODEL:-Qwen/Qwen2.5-7B-Instruct-AWQ}
- GPU_NAME=${GPU_NAME:-NVIDIA RTX 3060}
- GPU_VRAM_MB=${GPU_VRAM_MB:-12288}
- RAM_TOTAL_MB=${RAM_TOTAL_MB:-16384}
- CPU_CORES=${CPU_CORES:-8}
- HEARTBEAT_INTERVAL=${HEARTBEAT_INTERVAL:-30}
volumes:
- ./worker-agent.py:/app/agent.py:ro
command: >
bash -c "pip install httpx psutil --quiet && python /app/agent.py"
depends_on:
- vllm
restart: unless-stopped
networks:
- worker-net
volumes:
hf-cache:
networks:
worker-net:
driver: bridge
|