File size: 2,690 Bytes
9c0b225
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# ═══════════════════════════════════════════════════════════
#  MAC β€” GPU Worker Node (PC2-PC7)
#  Each worker PC runs vLLM inference + a heartbeat agent.
#  The agent enrolls with the control node and sends health data.
#
#  Prerequisites:
#    1. Get enrollment token from admin: POST /api/v1/nodes/enrollment-token
#    2. Set ENROLLMENT_TOKEN in .env
#    3. Set CONTROL_NODE_URL to the control node's address
#
#  Usage:  docker compose -f docker-compose.worker-node.yml up -d
# ═══════════════════════════════════════════════════════════

services:

  # ── vLLM GPU Inference ─────────────────────────────────
  vllm:
    image: vllm/vllm-openai:latest
    container_name: mac-vllm-worker
    ports:
      - "${VLLM_PORT:-8001}:${VLLM_PORT:-8001}"
    environment:
      - HF_HOME=/root/.cache/huggingface
    volumes:
      - hf-cache:/root/.cache/huggingface
    command: >
      --model ${VLLM_MODEL:-Qwen/Qwen2.5-7B-Instruct-AWQ}
      --port ${VLLM_PORT:-8001}
      --gpu-memory-utilization ${GPU_MEM_UTIL:-0.85}
      --max-model-len ${MAX_MODEL_LEN:-8192}
      --trust-remote-code
      --enforce-eager
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities: [gpu]
    restart: unless-stopped
    networks:
      - worker-net

  # ── Worker Agent β€” Enrollment + Heartbeat ──────────────
  worker-agent:
    image: python:3.11-slim
    container_name: mac-worker-agent
    environment:
      - CONTROL_NODE_URL=${CONTROL_NODE_URL:-http://192.168.1.100:8000}
      - ENROLLMENT_TOKEN=${ENROLLMENT_TOKEN:-}
      - NODE_NAME=${NODE_NAME:-worker-1}
      - VLLM_PORT=${VLLM_PORT:-8001}
      - VLLM_MODEL=${VLLM_MODEL:-Qwen/Qwen2.5-7B-Instruct-AWQ}
      - GPU_NAME=${GPU_NAME:-NVIDIA RTX 3060}
      - GPU_VRAM_MB=${GPU_VRAM_MB:-12288}
      - RAM_TOTAL_MB=${RAM_TOTAL_MB:-16384}
      - CPU_CORES=${CPU_CORES:-8}
      - HEARTBEAT_INTERVAL=${HEARTBEAT_INTERVAL:-30}
    volumes:
      - ./worker-agent.py:/app/agent.py:ro
    command: >
      bash -c "pip install httpx psutil --quiet && python /app/agent.py"
    depends_on:
      - vllm
    restart: unless-stopped
    networks:
      - worker-net

volumes:
  hf-cache:

networks:
  worker-net:
    driver: bridge