|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| param(
|
| [string]$ControlNodeIP = "10.10.13.30",
|
| [string]$EnrollmentToken = "",
|
| [string]$NodeName = "",
|
| [string]$Model = "",
|
| [int]$VllmPort = 8001,
|
| [float]$GpuMemUtil = 0.85,
|
| [int]$MaxModelLen = 8192
|
| )
|
|
|
| $ErrorActionPreference = "Stop"
|
|
|
| function Write-Step { param([string]$msg) Write-Host "`n[MAC] $msg" -ForegroundColor Cyan }
|
| function Write-Ok { param([string]$msg) Write-Host "[OK] $msg" -ForegroundColor Green }
|
| function Write-Warn { param([string]$msg) Write-Host "[!] $msg" -ForegroundColor Yellow }
|
|
|
| Write-Host @"
|
|
|
| ββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| β MAC Worker Node Setup β MBM AI Cloud β
|
| β Setting up GPU inference worker β
|
| ββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
| "@ -ForegroundColor Magenta
|
|
|
|
|
| $isAdmin = ([Security.Principal.WindowsPrincipal][Security.Principal.WindowsIdentity]::GetCurrent()).IsInRole([Security.Principal.WindowsBuiltInRole]::Administrator)
|
| if (-not $isAdmin) {
|
| Write-Host "ERROR: This script must be run as Administrator" -ForegroundColor Red
|
| Write-Host "Right-click PowerShell -> Run as Administrator" -ForegroundColor Yellow
|
| exit 1
|
| }
|
|
|
|
|
| Write-Step "Checking NVIDIA GPU..."
|
| try {
|
| $gpu = & nvidia-smi --query-gpu=name,memory.total,driver_version --format=csv,noheader 2>$null
|
| if ($gpu) {
|
| Write-Ok "GPU found: $gpu"
|
| } else {
|
| throw "No GPU"
|
| }
|
| } catch {
|
| Write-Host "ERROR: NVIDIA GPU or driver not found. Install latest NVIDIA drivers first:" -ForegroundColor Red
|
| Write-Host " https://www.nvidia.com/Download/index.aspx" -ForegroundColor Yellow
|
| exit 1
|
| }
|
|
|
|
|
| Write-Step "Checking WSL2..."
|
| $wslInstalled = $false
|
| try {
|
| $wslVersion = wsl --version 2>$null
|
| if ($LASTEXITCODE -eq 0) { $wslInstalled = $true }
|
| } catch {}
|
|
|
| if (-not $wslInstalled) {
|
| Write-Step "Installing WSL2 (this may require a reboot)..."
|
| wsl --install --no-distribution
|
| Write-Warn "WSL2 installed. If prompted, REBOOT and re-run this script."
|
| Write-Host "After reboot, run: .\setup-worker.ps1" -ForegroundColor Yellow
|
| Read-Host "Press Enter to continue (or Ctrl+C to reboot first)"
|
| } else {
|
| Write-Ok "WSL2 is already installed"
|
| }
|
|
|
|
|
| Write-Step "Checking Docker..."
|
| $dockerInstalled = $false
|
| try {
|
| $dockerVer = docker version --format '{{.Server.Version}}' 2>$null
|
| if ($LASTEXITCODE -eq 0 -and $dockerVer) { $dockerInstalled = $true }
|
| } catch {}
|
|
|
| if (-not $dockerInstalled) {
|
| Write-Step "Downloading Docker Desktop..."
|
| $dockerUrl = "https://desktop.docker.com/win/main/amd64/Docker%20Desktop%20Installer.exe"
|
| $installerPath = "$env:TEMP\DockerDesktopInstaller.exe"
|
|
|
| if (-not (Test-Path $installerPath)) {
|
| Invoke-WebRequest -Uri $dockerUrl -OutFile $installerPath -UseBasicParsing
|
| }
|
|
|
| Write-Step "Installing Docker Desktop (this takes a few minutes)..."
|
| Start-Process -FilePath $installerPath -ArgumentList "install","--quiet","--accept-license" -Wait -NoNewWindow
|
|
|
| Write-Warn "Docker Desktop installed. You need to:"
|
| Write-Host " 1. Start Docker Desktop from Start Menu" -ForegroundColor Yellow
|
| Write-Host " 2. Wait for it to finish starting (whale icon in taskbar)" -ForegroundColor Yellow
|
| Write-Host " 3. Re-run this script" -ForegroundColor Yellow
|
| Read-Host "Press Enter when Docker Desktop is running"
|
|
|
|
|
| try {
|
| $dockerVer = docker version --format '{{.Server.Version}}' 2>$null
|
| if ($LASTEXITCODE -ne 0) { throw "Docker not ready" }
|
| $dockerInstalled = $true
|
| } catch {
|
| Write-Host "Docker is not running yet. Start Docker Desktop and re-run." -ForegroundColor Red
|
| exit 1
|
| }
|
| } else {
|
| Write-Ok "Docker is installed: $dockerVer"
|
| }
|
|
|
|
|
| Write-Step "Checking Docker GPU support..."
|
| try {
|
| $gpuTest = docker run --rm --gpus all nvidia/cuda:12.1.0-base-ubuntu22.04 nvidia-smi 2>$null
|
| if ($LASTEXITCODE -eq 0) {
|
| Write-Ok "Docker GPU (NVIDIA Container Toolkit) is working"
|
| } else {
|
| throw "GPU test failed"
|
| }
|
| } catch {
|
| Write-Warn "Docker GPU test failed. Ensure Docker Desktop has WSL2 backend enabled."
|
| Write-Host " Docker Desktop -> Settings -> General -> Use WSL2 based engine = ON" -ForegroundColor Yellow
|
| Write-Host " Docker Desktop -> Settings -> Resources -> WSL Integration -> Enable" -ForegroundColor Yellow
|
| Read-Host "Fix the settings and press Enter to continue"
|
| }
|
|
|
|
|
| Write-Step "Configuring firewall..."
|
| $fwRule = "MAC vLLM Worker ($VllmPort)"
|
| $existing = Get-NetFirewallRule -DisplayName $fwRule -ErrorAction SilentlyContinue
|
| if (-not $existing) {
|
| New-NetFirewallRule -DisplayName $fwRule -Direction Inbound -Protocol TCP -LocalPort $VllmPort -Action Allow -Profile Private,Domain | Out-Null
|
| Write-Ok "Firewall rule created for port $VllmPort"
|
| } else {
|
| Write-Ok "Firewall rule already exists for port $VllmPort"
|
| }
|
|
|
|
|
| Write-Step "Configuring worker node..."
|
|
|
| if (-not $NodeName) {
|
| $hostname = $env:COMPUTERNAME
|
| $NodeName = Read-Host "Enter node name (default: worker-$hostname)"
|
| if (-not $NodeName) { $NodeName = "worker-$hostname" }
|
| }
|
|
|
| if (-not $EnrollmentToken) {
|
| Write-Host "`nYou need an enrollment token from the MAC admin panel." -ForegroundColor Yellow
|
| Write-Host "Ask the admin to generate one at: http://$ControlNodeIP/admin -> Cluster -> Generate Token`n" -ForegroundColor Yellow
|
| $EnrollmentToken = Read-Host "Paste enrollment token"
|
| if (-not $EnrollmentToken) {
|
| Write-Host "ERROR: Enrollment token is required" -ForegroundColor Red
|
| exit 1
|
| }
|
| }
|
|
|
| if (-not $Model) {
|
| Write-Host "`nChoose a model for this worker:" -ForegroundColor Yellow
|
| Write-Host " [1] Qwen2.5-Coder-7B β Code generation & debugging (recommended for PC2)" -ForegroundColor White
|
| Write-Host " [2] DeepSeek-R1-7B β Math & reasoning (recommended for PC3)" -ForegroundColor White
|
| Write-Host " [3] Qwen2.5-7B-Instruct β General chat (same as PC1)" -ForegroundColor White
|
| Write-Host " [4] Custom model β Enter HuggingFace model name" -ForegroundColor White
|
| $choice = Read-Host "Select (1-4)"
|
| switch ($choice) {
|
| "1" { $Model = "Qwen/Qwen2.5-Coder-7B-Instruct-AWQ" }
|
| "2" { $Model = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B" }
|
| "3" { $Model = "Qwen/Qwen2.5-7B-Instruct-AWQ" }
|
| "4" { $Model = Read-Host "Enter full HuggingFace model name" }
|
| default { $Model = "Qwen/Qwen2.5-Coder-7B-Instruct-AWQ" }
|
| }
|
| }
|
|
|
|
|
| $localIP = (Get-NetIPAddress -AddressFamily IPv4 | Where-Object { $_.InterfaceAlias -like "*Wi-Fi*" -and $_.PrefixOrigin -eq "Dhcp" } | Select-Object -First 1).IPAddress
|
| if (-not $localIP) {
|
| $localIP = (Get-NetIPAddress -AddressFamily IPv4 | Where-Object { $_.PrefixOrigin -eq "Dhcp" } | Select-Object -First 1).IPAddress
|
| }
|
| Write-Ok "This PC's IP: $localIP"
|
|
|
|
|
| $cpuCores = (Get-CimInstance Win32_Processor).NumberOfLogicalProcessors
|
| $ramMB = [math]::Round((Get-CimInstance Win32_ComputerSystem).TotalPhysicalMemory / 1MB)
|
| $gpuInfo = (& nvidia-smi --query-gpu=name,memory.total --format=csv,noheader).Trim()
|
| $gpuName = ($gpuInfo -split ",")[0].Trim()
|
| $gpuVram = [int](($gpuInfo -split ",")[1].Trim() -replace '[^0-9]','')
|
|
|
| Write-Host "`n Configuration Summary:" -ForegroundColor Cyan
|
| Write-Host " Node Name: $NodeName"
|
| Write-Host " Control Node: $ControlNodeIP"
|
| Write-Host " This PC IP: $localIP"
|
| Write-Host " GPU: $gpuName ($gpuVram MB)"
|
| Write-Host " RAM: $ramMB MB"
|
| Write-Host " CPU Cores: $cpuCores"
|
| Write-Host " Model: $Model"
|
| Write-Host " vLLM Port: $VllmPort"
|
|
|
|
|
| Write-Step "Setting up worker directory..."
|
| $workerDir = "$env:USERPROFILE\mac-worker"
|
| if (-not (Test-Path $workerDir)) { New-Item -ItemType Directory -Path $workerDir | Out-Null }
|
|
|
|
|
| $envContent = @"
|
| # MAC Worker Node Configuration
|
| # Generated: $(Get-Date -Format "yyyy-MM-dd HH:mm:ss")
|
|
|
| CONTROL_NODE_URL=http://${ControlNodeIP}:8000
|
| ENROLLMENT_TOKEN=${EnrollmentToken}
|
| NODE_NAME=${NodeName}
|
| VLLM_MODEL=${Model}
|
| VLLM_PORT=${VllmPort}
|
| GPU_MEM_UTIL=${GpuMemUtil}
|
| MAX_MODEL_LEN=${MaxModelLen}
|
| GPU_NAME=${gpuName}
|
| GPU_VRAM_MB=${gpuVram}
|
| RAM_TOTAL_MB=${ramMB}
|
| CPU_CORES=${cpuCores}
|
| HEARTBEAT_INTERVAL=30
|
| "@
|
|
|
| Set-Content -Path "$workerDir\.env" -Value $envContent
|
| Write-Ok "Created .env at $workerDir\.env"
|
|
|
|
|
| $composeContent = @"
|
| # MAC GPU Worker Node β $NodeName
|
| # Model: $Model
|
| # Generated: $(Get-Date -Format "yyyy-MM-dd HH:mm:ss")
|
|
|
| services:
|
|
|
| vllm:
|
| image: vllm/vllm-openai:latest
|
| container_name: mac-vllm-worker
|
| ports:
|
| - "${VllmPort}:${VllmPort}"
|
| environment:
|
| - HF_HOME=/root/.cache/huggingface
|
| volumes:
|
| - hf-cache:/root/.cache/huggingface
|
| command: >
|
| --model `${VLLM_MODEL}
|
| --port `${VLLM_PORT}
|
| --gpu-memory-utilization `${GPU_MEM_UTIL}
|
| --max-model-len `${MAX_MODEL_LEN}
|
| --trust-remote-code
|
| --enforce-eager
|
| deploy:
|
| resources:
|
| reservations:
|
| devices:
|
| - driver: nvidia
|
| count: 1
|
| capabilities: [gpu]
|
| restart: unless-stopped
|
| networks:
|
| - worker-net
|
| healthcheck:
|
| test: ["CMD-SHELL", "curl -f http://localhost:`${VLLM_PORT}/health || exit 1"]
|
| interval: 30s
|
| timeout: 10s
|
| retries: 5
|
| start_period: 120s
|
|
|
| worker-agent:
|
| image: python:3.11-slim
|
| container_name: mac-worker-agent
|
| env_file: .env
|
| volumes:
|
| - ./worker-agent.py:/app/agent.py:ro
|
| - agent-state:/tmp
|
| command: >
|
| bash -c "pip install httpx psutil --quiet && python /app/agent.py"
|
| depends_on:
|
| vllm:
|
| condition: service_healthy
|
| restart: unless-stopped
|
| networks:
|
| - worker-net
|
|
|
| volumes:
|
| hf-cache:
|
| agent-state:
|
|
|
| networks:
|
| worker-net:
|
| driver: bridge
|
| "@
|
|
|
| Set-Content -Path "$workerDir\docker-compose.yml" -Value $composeContent
|
| Write-Ok "Created docker-compose.yml"
|
|
|
|
|
| $agentScript = @'
|
| #!/usr/bin/env python3
|
| """MAC Worker Agent β Enrolls with control node and sends periodic heartbeats."""
|
|
|
| import asyncio, json, os, socket, sys, time
|
| import httpx
|
|
|
| CONTROL_URL = os.environ.get("CONTROL_NODE_URL", "http://10.10.13.30:8000")
|
| ENROLLMENT_TOKEN = os.environ.get("ENROLLMENT_TOKEN", "")
|
| NODE_NAME = os.environ.get("NODE_NAME", f"worker-{socket.gethostname()}")
|
| VLLM_PORT = int(os.environ.get("VLLM_PORT", 8001))
|
| GPU_NAME = os.environ.get("GPU_NAME", "NVIDIA GPU")
|
| GPU_VRAM_MB = int(os.environ.get("GPU_VRAM_MB", 12288))
|
| RAM_TOTAL_MB = int(os.environ.get("RAM_TOTAL_MB", 16384))
|
| CPU_CORES = int(os.environ.get("CPU_CORES", 8))
|
| HEARTBEAT_INTERVAL = int(os.environ.get("HEARTBEAT_INTERVAL", 30))
|
|
|
| API = f"{CONTROL_URL}/api/v1"
|
| STATE_FILE = "/tmp/mac_worker_state.json"
|
|
|
|
|
| def get_local_ip():
|
| try:
|
| s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
|
| s.connect(("8.8.8.8", 80))
|
| ip = s.getsockname()[0]
|
| s.close()
|
| return ip
|
| except Exception:
|
| return "127.0.0.1"
|
|
|
|
|
| def load_state():
|
| try:
|
| with open(STATE_FILE, "r") as f:
|
| return json.load(f)
|
| except (FileNotFoundError, json.JSONDecodeError):
|
| return {}
|
|
|
|
|
| def save_state(data):
|
| with open(STATE_FILE, "w") as f:
|
| json.dump(data, f)
|
|
|
|
|
| def get_resource_metrics():
|
| metrics = {"cpu_util_pct": 0.0, "ram_used_mb": 0, "gpu_util_pct": 0.0, "gpu_vram_used_mb": 0}
|
| try:
|
| import psutil
|
| metrics["cpu_util_pct"] = psutil.cpu_percent(interval=1)
|
| metrics["ram_used_mb"] = int(psutil.virtual_memory().used / 1024 / 1024)
|
| except ImportError:
|
| pass
|
| try:
|
| import subprocess
|
| result = subprocess.run(
|
| ["nvidia-smi", "--query-gpu=utilization.gpu,memory.used", "--format=csv,noheader,nounits"],
|
| capture_output=True, text=True, timeout=5
|
| )
|
| if result.returncode == 0:
|
| parts = result.stdout.strip().split(",")
|
| if len(parts) >= 2:
|
| metrics["gpu_util_pct"] = float(parts[0].strip())
|
| metrics["gpu_vram_used_mb"] = int(float(parts[1].strip()))
|
| except (FileNotFoundError, Exception):
|
| pass
|
| return metrics
|
|
|
|
|
| async def enroll(client):
|
| state = load_state()
|
| if state.get("node_id"):
|
| print(f"[AGENT] Already enrolled as node {state['node_id']}")
|
| return state["node_id"]
|
| if not ENROLLMENT_TOKEN:
|
| print("[AGENT] ERROR: No ENROLLMENT_TOKEN set")
|
| return None
|
| ip = get_local_ip()
|
| payload = {
|
| "enrollment_token": ENROLLMENT_TOKEN, "name": NODE_NAME,
|
| "hostname": socket.gethostname(), "ip_address": ip, "port": VLLM_PORT,
|
| "gpu_name": GPU_NAME, "gpu_vram_mb": GPU_VRAM_MB,
|
| "ram_total_mb": RAM_TOTAL_MB, "cpu_cores": CPU_CORES,
|
| }
|
| try:
|
| resp = await client.post(f"{API}/nodes/enroll", json=payload)
|
| if resp.status_code == 200:
|
| data = resp.json()
|
| node_id = data.get("id")
|
| save_state({"node_id": node_id, "name": NODE_NAME})
|
| print(f"[AGENT] Enrolled! Node ID: {node_id}")
|
| return node_id
|
| else:
|
| print(f"[AGENT] Enrollment failed: {resp.status_code} {resp.text}")
|
| return None
|
| except httpx.RequestError as e:
|
| print(f"[AGENT] Connection error: {e}")
|
| return None
|
|
|
|
|
| async def heartbeat_loop(client, node_id):
|
| consecutive_failures = 0
|
| while True:
|
| try:
|
| metrics = get_resource_metrics()
|
| resp = await client.post(f"{API}/nodes/heartbeat/{node_id}", json=metrics)
|
| if resp.status_code == 200:
|
| consecutive_failures = 0
|
| warnings = resp.json().get("warnings", [])
|
| if warnings:
|
| print(f"[AGENT] Warnings: {warnings}")
|
| elif resp.status_code == 404:
|
| print("[AGENT] Node not found β re-enrolling...")
|
| save_state({})
|
| return
|
| else:
|
| consecutive_failures += 1
|
| except httpx.RequestError as e:
|
| consecutive_failures += 1
|
| print(f"[AGENT] Heartbeat error: {e}")
|
| if consecutive_failures >= 10:
|
| print("[AGENT] Too many failures, waiting 60s...")
|
| await asyncio.sleep(60)
|
| consecutive_failures = 0
|
| else:
|
| await asyncio.sleep(HEARTBEAT_INTERVAL)
|
|
|
|
|
| async def wait_for_vllm():
|
| print(f"[AGENT] Waiting for vLLM on port {VLLM_PORT}...")
|
| async with httpx.AsyncClient(timeout=5) as client:
|
| for _ in range(120):
|
| try:
|
| resp = await client.get(f"http://localhost:{VLLM_PORT}/health")
|
| if resp.status_code == 200:
|
| print("[AGENT] vLLM ready!")
|
| return True
|
| except httpx.RequestError:
|
| pass
|
| await asyncio.sleep(5)
|
| print("[AGENT] WARNING: vLLM not ready after 10 min")
|
| return False
|
|
|
|
|
| async def main():
|
| print(f"[AGENT] MAC Worker Agent β {NODE_NAME}")
|
| print(f"[AGENT] Control: {CONTROL_URL}")
|
| await wait_for_vllm()
|
| async with httpx.AsyncClient(timeout=30) as client:
|
| node_id = None
|
| while not node_id:
|
| node_id = await enroll(client)
|
| if not node_id:
|
| print("[AGENT] Retrying in 30s...")
|
| await asyncio.sleep(30)
|
| print(f"[AGENT] Starting heartbeat loop (every {HEARTBEAT_INTERVAL}s)")
|
| while True:
|
| await heartbeat_loop(client, node_id)
|
| node_id = None
|
| while not node_id:
|
| node_id = await enroll(client)
|
| if not node_id:
|
| await asyncio.sleep(30)
|
|
|
| if __name__ == "__main__":
|
| asyncio.run(main())
|
| '@
|
|
|
| Set-Content -Path "$workerDir\worker-agent.py" -Value $agentScript
|
| Write-Ok "Created worker-agent.py"
|
|
|
|
|
| Write-Step "Starting MAC worker..."
|
| Write-Host "`n This will pull the vLLM Docker image (~8GB) and the model." -ForegroundColor Yellow
|
| Write-Host " First run may take 15-30 minutes depending on internet speed.`n" -ForegroundColor Yellow
|
|
|
| $startNow = Read-Host "Start the worker now? (Y/n)"
|
| if ($startNow -ne "n" -and $startNow -ne "N") {
|
| Push-Location $workerDir
|
| docker compose up -d
|
| Pop-Location
|
|
|
| Write-Host "`n" -NoNewline
|
| Write-Ok "Worker is starting! Monitor with:"
|
| Write-Host " cd $workerDir" -ForegroundColor White
|
| Write-Host " docker compose logs -f # Watch all logs" -ForegroundColor White
|
| Write-Host " docker compose logs -f vllm # Watch model loading" -ForegroundColor White
|
| Write-Host " docker compose logs -f worker-agent # Watch enrollment" -ForegroundColor White
|
| } else {
|
| Write-Ok "Setup complete. When ready, run:"
|
| Write-Host " cd $workerDir" -ForegroundColor White
|
| Write-Host " docker compose up -d" -ForegroundColor White
|
| }
|
|
|
| Write-Host @"
|
|
|
| ββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| β Setup complete! Worker: $NodeName
|
| β Model: $Model
|
| β Control: http://${ControlNodeIP}:8000
|
| β β
|
| β The worker will auto-enroll with the control β
|
| β node and start sending heartbeats. β
|
| ββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
| "@ -ForegroundColor Green
|
|
|