File size: 9,851 Bytes
9c0b225
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
#!/usr/bin/env python3
"""

MAC Worker Agent — Enrolls with control node and sends periodic heartbeats.

Runs as a sidecar container alongside vLLM on each GPU worker PC.

"""

import asyncio
import json
import os
import socket
import sys
import time

import httpx

CONTROL_URL = os.environ.get("CONTROL_NODE_URL", "http://192.168.1.100:8000")
ENROLLMENT_TOKEN = os.environ.get("ENROLLMENT_TOKEN", "")
NODE_NAME = os.environ.get("NODE_NAME", f"worker-{socket.gethostname()}")
VLLM_PORT = int(os.environ.get("VLLM_PORT", 8001))
VLLM_MODEL = os.environ.get("VLLM_MODEL", "")
GPU_NAME = os.environ.get("GPU_NAME", "NVIDIA GPU")
GPU_VRAM_MB = int(os.environ.get("GPU_VRAM_MB", 12288))
RAM_TOTAL_MB = int(os.environ.get("RAM_TOTAL_MB", 16384))
CPU_CORES = int(os.environ.get("CPU_CORES", 8))
HEARTBEAT_INTERVAL = int(os.environ.get("HEARTBEAT_INTERVAL", 30))

API = f"{CONTROL_URL}/api/v1"
STATE_FILE = "/tmp/mac_worker_state.json"


def get_local_ip():
    """Get the local IP address visible on the network."""
    try:
        s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        s.connect(("8.8.8.8", 80))
        ip = s.getsockname()[0]
        s.close()
        return ip
    except Exception:
        return "127.0.0.1"


def load_state():
    """Load saved node ID from previous enrollment."""
    try:
        with open(STATE_FILE, "r") as f:
            return json.load(f)
    except (FileNotFoundError, json.JSONDecodeError):
        return {}


def save_state(data):
    """Save enrollment state."""
    with open(STATE_FILE, "w") as f:
        json.dump(data, f)


def get_resource_metrics():
    """Collect current resource utilization metrics."""
    metrics = {
        "cpu_util_pct": 0.0,
        "ram_used_mb": 0,
        "gpu_util_pct": 0.0,
        "gpu_vram_used_mb": 0,
    }
    try:
        import psutil
        metrics["cpu_util_pct"] = psutil.cpu_percent(interval=1)
        mem = psutil.virtual_memory()
        metrics["ram_used_mb"] = int(mem.used / 1024 / 1024)
    except ImportError:
        pass

    # Try nvidia-smi for GPU metrics
    try:
        import subprocess
        result = subprocess.run(
            ["nvidia-smi", "--query-gpu=utilization.gpu,memory.used",
             "--format=csv,noheader,nounits"],
            capture_output=True, text=True, timeout=5
        )
        if result.returncode == 0:
            parts = result.stdout.strip().split(",")
            if len(parts) >= 2:
                metrics["gpu_util_pct"] = float(parts[0].strip())
                metrics["gpu_vram_used_mb"] = int(float(parts[1].strip()))
    except (FileNotFoundError, subprocess.TimeoutExpired, ValueError):
        pass

    return metrics


async def enroll(client: httpx.AsyncClient) -> str | None:
    """Enroll this node with the control server. Returns node ID."""
    state = load_state()
    if state.get("node_id"):
        print(f"[AGENT] Already enrolled as node {state['node_id']}")
        return state["node_id"]

    if not ENROLLMENT_TOKEN:
        print("[AGENT] ERROR: No ENROLLMENT_TOKEN set. Cannot enroll.")
        return None

    ip = get_local_ip()
    payload = {
        "enrollment_token": ENROLLMENT_TOKEN,
        "name": NODE_NAME,
        "hostname": socket.gethostname(),
        "ip_address": ip,
        "port": VLLM_PORT,
        "gpu_name": GPU_NAME,
        "gpu_vram_mb": GPU_VRAM_MB,
        "ram_total_mb": RAM_TOTAL_MB,
        "cpu_cores": CPU_CORES,
    }

    try:
        resp = await client.post(f"{API}/nodes/enroll", json=payload)
        if resp.status_code == 200:
            data = resp.json()
            node_id = data.get("id")
            save_state({"node_id": node_id, "name": NODE_NAME})
            print(f"[AGENT] Enrolled successfully! Node ID: {node_id}")
            return node_id
        else:
            print(f"[AGENT] Enrollment failed: {resp.status_code} {resp.text}")
            return None
    except httpx.RequestError as e:
        print(f"[AGENT] Connection error during enrollment: {e}")
        return None


async def heartbeat_loop(client: httpx.AsyncClient, node_id: str):
    """Send periodic heartbeats with resource metrics."""
    consecutive_failures = 0
    max_failures = 10

    while True:
        try:
            metrics = get_resource_metrics()
            resp = await client.post(
                f"{API}/nodes/heartbeat/{node_id}",
                json=metrics
            )

            if resp.status_code == 200:
                data = resp.json()
                consecutive_failures = 0
                warnings = data.get("warnings", [])
                if warnings:
                    print(f"[AGENT] Resource warnings: {warnings}")
            elif resp.status_code == 404:
                print("[AGENT] Node not found — re-enrollment needed")
                save_state({})
                return
            else:
                consecutive_failures += 1
                print(f"[AGENT] Heartbeat failed: {resp.status_code}")

        except httpx.RequestError as e:
            consecutive_failures += 1
            print(f"[AGENT] Heartbeat connection error: {e}")

        if consecutive_failures >= max_failures:
            print(f"[AGENT] {max_failures} consecutive failures. Waiting 60s before retry.")
            await asyncio.sleep(60)
            consecutive_failures = 0
        else:
            await asyncio.sleep(HEARTBEAT_INTERVAL)


async def wait_for_vllm():
    """Wait for local vLLM server to be ready."""
    print(f"[AGENT] Waiting for vLLM on port {VLLM_PORT}...")
    async with httpx.AsyncClient(timeout=5) as client:
        for attempt in range(120):  # Wait up to 10 minutes
            try:
                resp = await client.get(f"http://localhost:{VLLM_PORT}/health")
                if resp.status_code == 200:
                    print("[AGENT] vLLM is ready!")
                    return True
            except httpx.RequestError:
                pass
            await asyncio.sleep(5)
    print("[AGENT] WARNING: vLLM did not become ready in time")
    return False


async def detect_vllm_model():
    """Query vLLM to find what model it's actually serving."""
    try:
        async with httpx.AsyncClient(timeout=10) as client:
            resp = await client.get(f"http://localhost:{VLLM_PORT}/v1/models")
            if resp.status_code == 200:
                data = resp.json()
                models = data.get("data", [])
                if models:
                    return models[0].get("id", "")
    except Exception:
        pass
    return VLLM_MODEL


def _model_id_from_served_name(served_name: str) -> str:
    """Map HuggingFace model name to MAC model_id."""
    mapping = {
        "Qwen/Qwen2.5-7B-Instruct-AWQ": "qwen2.5:7b",
        "Qwen/Qwen2.5-7B-Instruct": "qwen2.5:7b",
        "Qwen/Qwen2.5-Coder-7B-Instruct-AWQ": "qwen2.5-coder:7b",
        "Qwen/Qwen2.5-Coder-7B-Instruct": "qwen2.5-coder:7b",
        "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B": "deepseek-r1:14b",
        "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B": "deepseek-r1:7b",
        "google/gemma-3-27b-it": "gemma3:27b",
    }
    return mapping.get(served_name, served_name)


async def register_model(client: httpx.AsyncClient, node_id: str):
    """Register the served model with the control node."""
    served_name = await detect_vllm_model()
    if not served_name:
        print("[AGENT] Could not detect model — skipping registration")
        return

    model_id = _model_id_from_served_name(served_name)
    print(f"[AGENT] Registering model: {model_id} ({served_name})")

    try:
        resp = await client.post(
            f"{API}/nodes/register-model/{node_id}",
            json={
                "model_id": model_id,
                "served_name": served_name,
                "model_name": served_name.split("/")[-1] if "/" in served_name else served_name,
                "vllm_port": VLLM_PORT,
            }
        )
        if resp.status_code == 200:
            print(f"[AGENT] Model registered: {resp.json()}")
        else:
            print(f"[AGENT] Model registration failed: {resp.status_code} {resp.text}")
    except httpx.RequestError as e:
        print(f"[AGENT] Model registration error: {e}")


async def main():
    print(f"[AGENT] MAC Worker Agent starting — {NODE_NAME}")
    print(f"[AGENT] Control node: {CONTROL_URL}")
    if VLLM_MODEL:
        print(f"[AGENT] Configured model: {VLLM_MODEL}")

    await wait_for_vllm()

    async with httpx.AsyncClient(timeout=30) as client:
        # Enrollment loop
        node_id = None
        while not node_id:
            node_id = await enroll(client)
            if not node_id:
                print("[AGENT] Retrying enrollment in 30s...")
                await asyncio.sleep(30)

        # Register model with control node
        await register_model(client, node_id)

        # Heartbeat loop
        print(f"[AGENT] Starting heartbeat loop (interval: {HEARTBEAT_INTERVAL}s)")
        while True:
            await heartbeat_loop(client, node_id)

            # If heartbeat loop exits (node not found), re-enroll
            print("[AGENT] Heartbeat loop exited. Re-enrolling...")
            node_id = None
            while not node_id:
                node_id = await enroll(client)
                if not node_id:
                    await asyncio.sleep(30)
            await register_model(client, node_id)


if __name__ == "__main__":
    try:
        asyncio.run(main())
    except KeyboardInterrupt:
        print("[AGENT] Shutting down...")
        sys.exit(0)