Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,9 +1,7 @@
|
|
| 1 |
"""
|
| 2 |
-
SAM-Z-1 Distributed Compute Cluster Head Node
|
| 3 |
- Smart load balancing with distributed compute
|
| 4 |
- Real-time status dashboard
|
| 5 |
-
- Auto-detects worker version (v4 vs v5)
|
| 6 |
-
- Supports 4 new models with backward compatibility
|
| 7 |
"""
|
| 8 |
|
| 9 |
from fastapi import FastAPI, HTTPException, WebSocket
|
|
@@ -17,7 +15,7 @@ from typing import List, Optional, Dict
|
|
| 17 |
from collections import deque
|
| 18 |
import random
|
| 19 |
|
| 20 |
-
app = FastAPI(title="SAM-Z-1 Distributed Cluster", version="
|
| 21 |
|
| 22 |
# ============================================================================
|
| 23 |
# Configuration
|
|
@@ -26,27 +24,15 @@ app = FastAPI(title="SAM-Z-1 Distributed Cluster", version="5.0.0")
|
|
| 26 |
WORKER_URLS = [
|
| 27 |
"https://bc-ai-worker-2.hf.space",
|
| 28 |
"https://bc-ai-worker-sam-z-api.hf.space",
|
| 29 |
-
"https://bc-ai-worker-sam-z-api.hf.space",
|
| 30 |
-
"https://bc-ai-worker-3.hf.space",
|
| 31 |
-
"https://bc-ai-worker-4.hf.space",
|
| 32 |
-
"https://bc-ai-worker-5.hf.space"
|
| 33 |
]
|
| 34 |
|
| 35 |
-
HEALTH_CHECK_INTERVAL = 5
|
| 36 |
LOAD_CHECK_WINDOW = 10
|
| 37 |
|
| 38 |
LIGHT_LOAD_THRESHOLD = 2
|
| 39 |
HEAVY_LOAD_THRESHOLD = 5
|
| 40 |
|
| 41 |
-
#
|
| 42 |
-
NEW_MODELS = [
|
| 43 |
-
"SAM-X-1-Large",
|
| 44 |
-
"SAM-X-1-Fast",
|
| 45 |
-
"SAM-X-1-Mini",
|
| 46 |
-
"SAM-X-1-Nano"
|
| 47 |
-
]
|
| 48 |
-
|
| 49 |
-
# Worker state with version detection
|
| 50 |
worker_health = {
|
| 51 |
url: {
|
| 52 |
"healthy": True,
|
|
@@ -55,14 +41,12 @@ worker_health = {
|
|
| 55 |
"total_requests": 0,
|
| 56 |
"total_tokens": 0,
|
| 57 |
"avg_latency": 0,
|
| 58 |
-
"role": "idle",
|
| 59 |
-
"version": None, # Will be auto-detected: "v4" or "v5"
|
| 60 |
-
"supports_models": [] # Models this worker supports
|
| 61 |
} for url in WORKER_URLS
|
| 62 |
}
|
| 63 |
|
| 64 |
request_timestamps = deque(maxlen=100)
|
| 65 |
-
current_load_mode = "light"
|
| 66 |
cluster_stats = {
|
| 67 |
"total_requests": 0,
|
| 68 |
"successful_requests": 0,
|
|
@@ -70,6 +54,7 @@ cluster_stats = {
|
|
| 70 |
"uptime_start": time.time()
|
| 71 |
}
|
| 72 |
|
|
|
|
| 73 |
active_connections = set()
|
| 74 |
|
| 75 |
# ============================================================================
|
|
@@ -84,7 +69,6 @@ class GenerateRequest(BaseModel):
|
|
| 84 |
top_p: float = 0.9
|
| 85 |
repetition_penalty: float = 1.1
|
| 86 |
stream: bool = True
|
| 87 |
-
model: Optional[str] = None # NEW: Model selection
|
| 88 |
|
| 89 |
class ChatMessage(BaseModel):
|
| 90 |
role: str
|
|
@@ -98,138 +82,6 @@ class ChatRequest(BaseModel):
|
|
| 98 |
top_p: float = 0.9
|
| 99 |
repetition_penalty: float = 1.1
|
| 100 |
stream: bool = True
|
| 101 |
-
model: Optional[str] = None # NEW: Model selection
|
| 102 |
-
|
| 103 |
-
# ============================================================================
|
| 104 |
-
# Worker Version Detection
|
| 105 |
-
# ============================================================================
|
| 106 |
-
|
| 107 |
-
async def detect_worker_version(worker_url: str) -> tuple:
|
| 108 |
-
"""
|
| 109 |
-
Detect worker version and supported models
|
| 110 |
-
Returns: (version: str, supported_models: List[str])
|
| 111 |
-
"""
|
| 112 |
-
try:
|
| 113 |
-
async with httpx.AsyncClient(timeout=10.0) as client:
|
| 114 |
-
# Try to get worker info endpoint (v5 feature)
|
| 115 |
-
try:
|
| 116 |
-
response = await client.get(f"{worker_url}/info")
|
| 117 |
-
if response.status_code == 200:
|
| 118 |
-
data = response.json()
|
| 119 |
-
version = data.get("version", "v5")
|
| 120 |
-
models = data.get("models", NEW_MODELS)
|
| 121 |
-
return version, models
|
| 122 |
-
except:
|
| 123 |
-
pass
|
| 124 |
-
|
| 125 |
-
# Try to get models list (v5 feature)
|
| 126 |
-
try:
|
| 127 |
-
response = await client.get(f"{worker_url}/models")
|
| 128 |
-
if response.status_code == 200:
|
| 129 |
-
data = response.json()
|
| 130 |
-
models = data.get("models", [])
|
| 131 |
-
if models:
|
| 132 |
-
return "v5", models
|
| 133 |
-
except:
|
| 134 |
-
pass
|
| 135 |
-
|
| 136 |
-
# Fallback: worker is v4 (no model selection)
|
| 137 |
-
return "v4", []
|
| 138 |
-
|
| 139 |
-
except Exception as e:
|
| 140 |
-
print(f"⚠️ Version detection failed for {worker_url}: {e}")
|
| 141 |
-
return "v4", []
|
| 142 |
-
|
| 143 |
-
async def check_worker_health(worker_url: str) -> bool:
|
| 144 |
-
"""Check if worker is healthy"""
|
| 145 |
-
try:
|
| 146 |
-
async with httpx.AsyncClient(timeout=5.0) as client:
|
| 147 |
-
response = await client.get(f"{worker_url}/health")
|
| 148 |
-
return response.status_code == 200
|
| 149 |
-
except:
|
| 150 |
-
return False
|
| 151 |
-
|
| 152 |
-
async def health_check_loop():
|
| 153 |
-
"""Health check with version detection"""
|
| 154 |
-
while True:
|
| 155 |
-
for worker_url in WORKER_URLS:
|
| 156 |
-
# Check health
|
| 157 |
-
healthy = await check_worker_health(worker_url)
|
| 158 |
-
worker_health[worker_url]["healthy"] = healthy
|
| 159 |
-
worker_health[worker_url]["last_check"] = time.time()
|
| 160 |
-
|
| 161 |
-
# Detect version if not yet detected
|
| 162 |
-
if worker_health[worker_url]["version"] is None:
|
| 163 |
-
version, models = await detect_worker_version(worker_url)
|
| 164 |
-
worker_health[worker_url]["version"] = version
|
| 165 |
-
worker_health[worker_url]["supports_models"] = models
|
| 166 |
-
|
| 167 |
-
status = "✅" if healthy else "❌"
|
| 168 |
-
print(f"{status} Worker: {worker_url.split('//')[1].split('.')[0]} | Version: {version} | Models: {len(models)}")
|
| 169 |
-
|
| 170 |
-
await broadcast_stats()
|
| 171 |
-
await asyncio.sleep(HEALTH_CHECK_INTERVAL)
|
| 172 |
-
|
| 173 |
-
@app.on_event("startup")
|
| 174 |
-
async def startup_event():
|
| 175 |
-
asyncio.create_task(health_check_loop())
|
| 176 |
-
|
| 177 |
-
# ============================================================================
|
| 178 |
-
# Smart Worker Selection
|
| 179 |
-
# ============================================================================
|
| 180 |
-
|
| 181 |
-
def get_workers_for_model(model_name: Optional[str]) -> List[str]:
|
| 182 |
-
"""Get workers that support the requested model"""
|
| 183 |
-
healthy = get_healthy_workers()
|
| 184 |
-
|
| 185 |
-
if not model_name:
|
| 186 |
-
# No specific model requested, use any healthy worker
|
| 187 |
-
return healthy
|
| 188 |
-
|
| 189 |
-
# Filter workers by model support
|
| 190 |
-
compatible = []
|
| 191 |
-
for url in healthy:
|
| 192 |
-
version = worker_health[url]["version"]
|
| 193 |
-
models = worker_health[url]["supports_models"]
|
| 194 |
-
|
| 195 |
-
if version == "v5" and model_name in models:
|
| 196 |
-
# v5 worker with explicit model support
|
| 197 |
-
compatible.append(url)
|
| 198 |
-
elif version == "v4":
|
| 199 |
-
# v4 workers don't support model selection but work with default
|
| 200 |
-
compatible.append(url)
|
| 201 |
-
|
| 202 |
-
return compatible if compatible else healthy
|
| 203 |
-
|
| 204 |
-
def get_healthy_workers() -> List[str]:
|
| 205 |
-
return [url for url, status in worker_health.items() if status["healthy"]]
|
| 206 |
-
|
| 207 |
-
def get_least_busy_worker(worker_list: List[str] = None) -> Optional[str]:
|
| 208 |
-
workers = worker_list if worker_list is not None else get_healthy_workers()
|
| 209 |
-
if not workers:
|
| 210 |
-
return None
|
| 211 |
-
return min(workers, key=lambda url: worker_health[url]["active_requests"])
|
| 212 |
-
|
| 213 |
-
def select_distributed_workers(model_name: Optional[str] = None) -> tuple:
|
| 214 |
-
"""
|
| 215 |
-
Select workers for distributed compute with model compatibility
|
| 216 |
-
Returns: (generators: List[str], decoders: List[str])
|
| 217 |
-
"""
|
| 218 |
-
compatible = get_workers_for_model(model_name)
|
| 219 |
-
|
| 220 |
-
if len(compatible) < 2:
|
| 221 |
-
return ([compatible[0]], []) if len(compatible) == 1 else ([], [])
|
| 222 |
-
|
| 223 |
-
sorted_workers = sorted(compatible, key=lambda url: worker_health[url]["active_requests"])
|
| 224 |
-
|
| 225 |
-
if len(compatible) >= 5:
|
| 226 |
-
return ([sorted_workers[0]], sorted_workers[1:5])
|
| 227 |
-
elif len(compatible) == 4:
|
| 228 |
-
return ([sorted_workers[0]], sorted_workers[1:4])
|
| 229 |
-
elif len(compatible) == 3:
|
| 230 |
-
return ([sorted_workers[0]], sorted_workers[1:3])
|
| 231 |
-
else:
|
| 232 |
-
return ([sorted_workers[0]], [sorted_workers[1]])
|
| 233 |
|
| 234 |
# ============================================================================
|
| 235 |
# Load Management
|
|
@@ -244,20 +96,21 @@ def update_load_mode():
|
|
| 244 |
load = get_current_load()
|
| 245 |
healthy_count = len(get_healthy_workers())
|
| 246 |
|
|
|
|
| 247 |
if healthy_count >= 5:
|
| 248 |
if load <= LIGHT_LOAD_THRESHOLD:
|
| 249 |
-
current_load_mode = "light"
|
| 250 |
-
elif load <=
|
| 251 |
-
current_load_mode = "medium"
|
| 252 |
else:
|
| 253 |
-
current_load_mode = "heavy"
|
| 254 |
elif healthy_count >= 3:
|
| 255 |
if load <= 2:
|
| 256 |
-
current_load_mode = "light"
|
| 257 |
else:
|
| 258 |
-
current_load_mode = "heavy"
|
| 259 |
else:
|
| 260 |
-
current_load_mode = "heavy"
|
| 261 |
|
| 262 |
return current_load_mode, load
|
| 263 |
|
|
@@ -265,11 +118,42 @@ def track_request():
|
|
| 265 |
request_timestamps.append(time.time())
|
| 266 |
cluster_stats["total_requests"] += 1
|
| 267 |
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 271 |
|
| 272 |
async def broadcast_stats():
|
|
|
|
| 273 |
if not active_connections:
|
| 274 |
return
|
| 275 |
|
|
@@ -282,15 +166,13 @@ async def broadcast_stats():
|
|
| 282 |
"load": load,
|
| 283 |
"workers": [
|
| 284 |
{
|
| 285 |
-
"url": url.split("//")[1].split(".")[0],
|
| 286 |
"healthy": status["healthy"],
|
| 287 |
"active": status["active_requests"],
|
| 288 |
"total": status["total_requests"],
|
| 289 |
"tokens": status["total_tokens"],
|
| 290 |
"latency": round(status["avg_latency"], 2),
|
| 291 |
-
"role": status["role"]
|
| 292 |
-
"version": status["version"] or "detecting...",
|
| 293 |
-
"models": len(status["supports_models"])
|
| 294 |
}
|
| 295 |
for url, status in worker_health.items()
|
| 296 |
],
|
|
@@ -303,6 +185,7 @@ async def broadcast_stats():
|
|
| 303 |
}
|
| 304 |
}
|
| 305 |
|
|
|
|
| 306 |
disconnected = set()
|
| 307 |
for ws in active_connections:
|
| 308 |
try:
|
|
@@ -310,24 +193,36 @@ async def broadcast_stats():
|
|
| 310 |
except:
|
| 311 |
disconnected.add(ws)
|
| 312 |
|
|
|
|
| 313 |
active_connections.difference_update(disconnected)
|
| 314 |
|
| 315 |
-
|
| 316 |
-
async def websocket_endpoint(websocket: WebSocket):
|
| 317 |
-
await websocket.accept()
|
| 318 |
-
active_connections.add(websocket)
|
| 319 |
-
|
| 320 |
try:
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
except:
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 328 |
|
| 329 |
# ============================================================================
|
| 330 |
-
# Distributed Generation
|
| 331 |
# ============================================================================
|
| 332 |
|
| 333 |
async def distributed_generation(
|
|
@@ -336,7 +231,11 @@ async def distributed_generation(
|
|
| 336 |
request_data: dict,
|
| 337 |
endpoint: str = "generate"
|
| 338 |
):
|
| 339 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 340 |
|
| 341 |
if not generators or not decoders:
|
| 342 |
return
|
|
@@ -344,13 +243,15 @@ async def distributed_generation(
|
|
| 344 |
token_queue = asyncio.Queue(maxsize=50)
|
| 345 |
text_queue = asyncio.Queue(maxsize=50)
|
| 346 |
|
|
|
|
| 347 |
for gen_url in generators:
|
| 348 |
worker_health[gen_url]["role"] = "generator"
|
| 349 |
for dec_url in decoders:
|
| 350 |
worker_health[dec_url]["role"] = "decoder"
|
| 351 |
|
| 352 |
async def generate_tokens():
|
| 353 |
-
|
|
|
|
| 354 |
try:
|
| 355 |
worker_health[gen_url]["active_requests"] += 1
|
| 356 |
request_data_tokens = {**request_data, "return_token_ids": True}
|
|
@@ -368,6 +269,7 @@ async def distributed_generation(
|
|
| 368 |
if "token_id" in data:
|
| 369 |
await token_queue.put(data["token_id"])
|
| 370 |
elif "done" in data:
|
|
|
|
| 371 |
for _ in decoders:
|
| 372 |
await token_queue.put(None)
|
| 373 |
break
|
|
@@ -382,16 +284,18 @@ async def distributed_generation(
|
|
| 382 |
worker_health[gen_url]["role"] = "idle"
|
| 383 |
|
| 384 |
async def decode_tokens(decoder_url: str, decoder_id: int):
|
|
|
|
| 385 |
try:
|
| 386 |
worker_health[decoder_url]["active_requests"] += 1
|
| 387 |
batch = []
|
| 388 |
-
batch_size = 2
|
| 389 |
|
| 390 |
while True:
|
| 391 |
try:
|
| 392 |
token_id = await asyncio.wait_for(token_queue.get(), timeout=2.0)
|
| 393 |
|
| 394 |
if token_id is None:
|
|
|
|
| 395 |
if batch:
|
| 396 |
async with httpx.AsyncClient(timeout=10.0) as client:
|
| 397 |
response = await client.post(
|
|
@@ -407,6 +311,7 @@ async def distributed_generation(
|
|
| 407 |
|
| 408 |
batch.append(token_id)
|
| 409 |
|
|
|
|
| 410 |
if len(batch) >= batch_size:
|
| 411 |
async with httpx.AsyncClient(timeout=10.0) as client:
|
| 412 |
response = await client.post(
|
|
@@ -429,12 +334,16 @@ async def distributed_generation(
|
|
| 429 |
worker_health[decoder_url]["active_requests"] -= 1
|
| 430 |
worker_health[decoder_url]["role"] = "idle"
|
| 431 |
|
|
|
|
| 432 |
gen_task = asyncio.create_task(generate_tokens())
|
|
|
|
|
|
|
| 433 |
decoder_tasks = [
|
| 434 |
asyncio.create_task(decode_tokens(dec_url, i))
|
| 435 |
for i, dec_url in enumerate(decoders)
|
| 436 |
]
|
| 437 |
|
|
|
|
| 438 |
accumulated_text = ""
|
| 439 |
decoders_done = 0
|
| 440 |
total_decoders = len(decoders)
|
|
@@ -480,19 +389,24 @@ async def heavy_load_generation(worker_url: str, request_data: dict, endpoint: s
|
|
| 480 |
worker_health[worker_url]["role"] = "idle"
|
| 481 |
|
| 482 |
# ============================================================================
|
| 483 |
-
#
|
| 484 |
# ============================================================================
|
| 485 |
|
| 486 |
@app.get("/", response_class=HTMLResponse)
|
| 487 |
async def dashboard():
|
| 488 |
-
"""Real-time dashboard
|
| 489 |
return """
|
| 490 |
<!DOCTYPE html>
|
| 491 |
<html>
|
| 492 |
<head>
|
| 493 |
-
<title>SAM-Z-1 Cluster Control
|
| 494 |
<style>
|
| 495 |
-
* {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 496 |
body {
|
| 497 |
font-family: 'Courier New', monospace;
|
| 498 |
background: linear-gradient(135deg, #0a0e27 0%, #1a1f3a 100%);
|
|
@@ -501,12 +415,14 @@ async def dashboard():
|
|
| 501 |
overflow-x: hidden;
|
| 502 |
overflow-y: auto;
|
| 503 |
}
|
|
|
|
| 504 |
.container {
|
| 505 |
padding: 20px;
|
| 506 |
max-width: 1400px;
|
| 507 |
margin: 0 auto;
|
| 508 |
padding-bottom: 40px;
|
| 509 |
}
|
|
|
|
| 510 |
.header {
|
| 511 |
text-align: center;
|
| 512 |
margin-bottom: 30px;
|
|
@@ -516,6 +432,7 @@ async def dashboard():
|
|
| 516 |
border-radius: 10px;
|
| 517 |
box-shadow: 0 0 20px rgba(0, 255, 136, 0.3);
|
| 518 |
}
|
|
|
|
| 519 |
.header h1 {
|
| 520 |
font-size: 2.5em;
|
| 521 |
text-transform: uppercase;
|
|
@@ -523,33 +440,18 @@ async def dashboard():
|
|
| 523 |
text-shadow: 0 0 10px #00ff88;
|
| 524 |
animation: glow 2s ease-in-out infinite alternate;
|
| 525 |
}
|
|
|
|
| 526 |
@keyframes glow {
|
| 527 |
from { text-shadow: 0 0 10px #00ff88, 0 0 20px #00ff88; }
|
| 528 |
to { text-shadow: 0 0 20px #00ff88, 0 0 30px #00ff88, 0 0 40px #00ff88; }
|
| 529 |
}
|
| 530 |
-
|
| 531 |
-
display: inline-block;
|
| 532 |
-
padding: 3px 10px;
|
| 533 |
-
border-radius: 12px;
|
| 534 |
-
font-size: 10px;
|
| 535 |
-
margin-left: 8px;
|
| 536 |
-
font-weight: bold;
|
| 537 |
-
}
|
| 538 |
-
.version-v5 {
|
| 539 |
-
background: rgba(0, 255, 136, 0.2);
|
| 540 |
-
border: 1px solid #00ff88;
|
| 541 |
-
color: #00ff88;
|
| 542 |
-
}
|
| 543 |
-
.version-v4 {
|
| 544 |
-
background: rgba(255, 165, 0, 0.2);
|
| 545 |
-
border: 1px solid #ffa500;
|
| 546 |
-
color: #ffa500;
|
| 547 |
-
}
|
| 548 |
.status-bar {
|
| 549 |
display: flex;
|
| 550 |
gap: 20px;
|
| 551 |
margin-bottom: 30px;
|
| 552 |
}
|
|
|
|
| 553 |
.stat-card {
|
| 554 |
flex: 1;
|
| 555 |
background: rgba(0, 255, 136, 0.05);
|
|
@@ -559,22 +461,83 @@ async def dashboard():
|
|
| 559 |
position: relative;
|
| 560 |
overflow: hidden;
|
| 561 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 562 |
.stat-label {
|
| 563 |
font-size: 0.8em;
|
| 564 |
opacity: 0.7;
|
| 565 |
text-transform: uppercase;
|
| 566 |
}
|
|
|
|
| 567 |
.stat-value {
|
| 568 |
font-size: 2em;
|
| 569 |
font-weight: bold;
|
| 570 |
margin-top: 5px;
|
| 571 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 572 |
.workers-grid {
|
| 573 |
display: grid;
|
| 574 |
grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
|
| 575 |
gap: 20px;
|
| 576 |
margin-bottom: 30px;
|
| 577 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 578 |
.worker-card {
|
| 579 |
background: rgba(10, 14, 39, 0.8);
|
| 580 |
border: 2px solid #00ff88;
|
|
@@ -583,89 +546,166 @@ async def dashboard():
|
|
| 583 |
position: relative;
|
| 584 |
transition: all 0.3s;
|
| 585 |
}
|
|
|
|
| 586 |
.worker-card:hover {
|
| 587 |
transform: translateY(-5px);
|
| 588 |
box-shadow: 0 5px 30px rgba(0, 255, 136, 0.4);
|
| 589 |
}
|
|
|
|
| 590 |
.worker-card.offline {
|
| 591 |
border-color: #ff4444;
|
| 592 |
opacity: 0.6;
|
| 593 |
}
|
|
|
|
| 594 |
.worker-header {
|
| 595 |
display: flex;
|
| 596 |
justify-content: space-between;
|
| 597 |
align-items: center;
|
| 598 |
margin-bottom: 15px;
|
| 599 |
}
|
|
|
|
| 600 |
.worker-name {
|
| 601 |
font-size: 1.2em;
|
| 602 |
font-weight: bold;
|
| 603 |
}
|
|
|
|
| 604 |
.status-dot {
|
| 605 |
width: 12px;
|
| 606 |
height: 12px;
|
| 607 |
border-radius: 50%;
|
| 608 |
animation: pulse 2s infinite;
|
| 609 |
}
|
|
|
|
| 610 |
.status-dot.online {
|
| 611 |
background: #00ff88;
|
| 612 |
box-shadow: 0 0 10px #00ff88;
|
| 613 |
}
|
|
|
|
| 614 |
.status-dot.offline {
|
| 615 |
background: #ff4444;
|
| 616 |
box-shadow: 0 0 10px #ff4444;
|
| 617 |
}
|
|
|
|
| 618 |
@keyframes pulse {
|
| 619 |
0%, 100% { opacity: 1; }
|
| 620 |
50% { opacity: 0.5; }
|
| 621 |
}
|
|
|
|
| 622 |
.worker-stats {
|
| 623 |
display: grid;
|
| 624 |
grid-template-columns: repeat(2, 1fr);
|
| 625 |
gap: 10px;
|
| 626 |
margin-top: 15px;
|
| 627 |
}
|
|
|
|
| 628 |
.worker-stat {
|
| 629 |
background: rgba(0, 255, 136, 0.05);
|
| 630 |
padding: 10px;
|
| 631 |
border-radius: 5px;
|
| 632 |
}
|
|
|
|
| 633 |
.worker-stat-label {
|
| 634 |
font-size: 0.7em;
|
| 635 |
opacity: 0.7;
|
| 636 |
}
|
|
|
|
| 637 |
.worker-stat-value {
|
| 638 |
font-size: 1.3em;
|
| 639 |
font-weight: bold;
|
| 640 |
margin-top: 3px;
|
| 641 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 642 |
.timestamp {
|
| 643 |
text-align: center;
|
| 644 |
margin-top: 20px;
|
| 645 |
opacity: 0.5;
|
| 646 |
font-size: 0.9em;
|
| 647 |
}
|
| 648 |
-
@media (max-width: 768px) {
|
| 649 |
-
.workers-grid { grid-template-columns: 1fr; }
|
| 650 |
-
.status-bar { flex-direction: column; }
|
| 651 |
-
}
|
| 652 |
</style>
|
| 653 |
</head>
|
| 654 |
<body>
|
| 655 |
<div class="container">
|
| 656 |
<div class="header">
|
| 657 |
<h1>⚡ SAM-Z-1 CLUSTER ⚡</h1>
|
| 658 |
-
<div>DISTRIBUTED COMPUTE SYSTEM
|
| 659 |
</div>
|
| 660 |
|
| 661 |
<div class="status-bar">
|
| 662 |
<div class="stat-card">
|
| 663 |
<div class="stat-label">Load Mode</div>
|
| 664 |
<div class="stat-value" id="mode">--</div>
|
|
|
|
| 665 |
</div>
|
| 666 |
<div class="stat-card">
|
| 667 |
<div class="stat-label">Current Load</div>
|
| 668 |
<div class="stat-value" id="load">0</div>
|
|
|
|
| 669 |
</div>
|
| 670 |
<div class="stat-card">
|
| 671 |
<div class="stat-label">Total Requests</div>
|
|
@@ -677,47 +717,149 @@ async def dashboard():
|
|
| 677 |
</div>
|
| 678 |
</div>
|
| 679 |
|
| 680 |
-
<div class="workers-grid" id="workers"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 681 |
|
| 682 |
<div class="timestamp" id="timestamp">Last update: --</div>
|
| 683 |
</div>
|
| 684 |
|
| 685 |
<script>
|
|
|
|
| 686 |
const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
|
| 687 |
let ws;
|
|
|
|
| 688 |
|
| 689 |
function connectWebSocket() {
|
| 690 |
try {
|
| 691 |
ws = new WebSocket(`${protocol}//${window.location.host}/ws`);
|
| 692 |
|
| 693 |
-
ws.onopen = () =>
|
| 694 |
-
|
| 695 |
-
|
| 696 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 697 |
} catch (e) {
|
| 698 |
-
console.error('Failed to connect WebSocket');
|
|
|
|
|
|
|
| 699 |
}
|
| 700 |
}
|
| 701 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 702 |
connectWebSocket();
|
| 703 |
|
| 704 |
function updateDashboard(data) {
|
|
|
|
| 705 |
document.getElementById('mode').textContent = data.mode.toUpperCase();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 706 |
document.getElementById('load').textContent = data.load;
|
| 707 |
document.getElementById('total-req').textContent = data.cluster.total_requests;
|
| 708 |
document.getElementById('rps').textContent = data.cluster.rps;
|
|
|
|
|
|
|
|
|
|
| 709 |
|
|
|
|
| 710 |
const workersDiv = document.getElementById('workers');
|
|
|
|
|
|
|
|
|
|
| 711 |
workersDiv.innerHTML = data.workers.map(worker => `
|
| 712 |
<div class="worker-card ${worker.healthy ? '' : 'offline'}">
|
| 713 |
<div class="worker-header">
|
| 714 |
-
<div>
|
| 715 |
-
<div class="worker-name">${worker.url}</div>
|
| 716 |
-
<span class="version-badge version-${worker.version}">${worker.version.toUpperCase()}</span>
|
| 717 |
-
${worker.models > 0 ? `<span style="font-size:0.8em;opacity:0.7;margin-left:5px">${worker.models} models</span>` : ''}
|
| 718 |
-
</div>
|
| 719 |
<div class="status-dot ${worker.healthy ? 'online' : 'offline'}"></div>
|
| 720 |
</div>
|
|
|
|
| 721 |
<div class="worker-stats">
|
| 722 |
<div class="worker-stat">
|
| 723 |
<div class="worker-stat-label">Active</div>
|
|
@@ -732,46 +874,69 @@ async def dashboard():
|
|
| 732 |
<div class="worker-stat-value">${worker.tokens}</div>
|
| 733 |
</div>
|
| 734 |
<div class="worker-stat">
|
| 735 |
-
<div class="worker-stat-label">
|
| 736 |
-
<div class="worker-stat-value"
|
| 737 |
</div>
|
| 738 |
</div>
|
|
|
|
|
|
|
|
|
|
| 739 |
</div>
|
| 740 |
`).join('');
|
| 741 |
|
|
|
|
|
|
|
| 742 |
document.getElementById('timestamp').textContent =
|
| 743 |
-
`Last update: ${
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 744 |
}
|
| 745 |
</script>
|
| 746 |
</body>
|
| 747 |
</html>
|
| 748 |
"""
|
| 749 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 750 |
@app.get("/api/status")
|
| 751 |
async def api_status():
|
|
|
|
| 752 |
mode, load = update_load_mode()
|
| 753 |
healthy_count = len(get_healthy_workers())
|
| 754 |
|
| 755 |
-
# Count v4 vs v5 workers
|
| 756 |
-
v4_count = sum(1 for w in worker_health.values() if w["version"] == "v4")
|
| 757 |
-
v5_count = sum(1 for w in worker_health.values() if w["version"] == "v5")
|
| 758 |
-
|
| 759 |
return {
|
| 760 |
"name": "SAM-Z-1 Distributed Cluster",
|
| 761 |
-
"version": "
|
| 762 |
"mode": mode,
|
| 763 |
"current_load": load,
|
| 764 |
"workers": len(WORKER_URLS),
|
| 765 |
"healthy_workers": healthy_count,
|
| 766 |
-
"
|
| 767 |
-
"v5_workers": v5_count,
|
| 768 |
-
"features": [
|
| 769 |
-
"distributed_compute",
|
| 770 |
-
"smart_load_balancing",
|
| 771 |
-
"auto_version_detection",
|
| 772 |
-
"multi_model_support",
|
| 773 |
-
"real_time_dashboard"
|
| 774 |
-
]
|
| 775 |
}
|
| 776 |
|
| 777 |
@app.get("/health")
|
|
@@ -782,34 +947,16 @@ async def health():
|
|
| 782 |
"workers_healthy": healthy_count
|
| 783 |
}
|
| 784 |
|
| 785 |
-
@app.get("/models")
|
| 786 |
-
async def list_models():
|
| 787 |
-
"""List all available models across all workers"""
|
| 788 |
-
all_models = set()
|
| 789 |
-
for url, status in worker_health.items():
|
| 790 |
-
if status["healthy"] and status["version"] == "v5":
|
| 791 |
-
all_models.update(status["supports_models"])
|
| 792 |
-
|
| 793 |
-
return {
|
| 794 |
-
"models": sorted(list(all_models)),
|
| 795 |
-
"default": "SAM-X-1-Nano" if "SAM-X-1-Nano" in all_models else None
|
| 796 |
-
}
|
| 797 |
-
|
| 798 |
@app.post("/v1/generate")
|
| 799 |
async def generate(request: GenerateRequest):
|
| 800 |
-
"""Generate text with
|
| 801 |
track_request()
|
| 802 |
mode, load = update_load_mode()
|
| 803 |
|
| 804 |
-
|
| 805 |
-
|
| 806 |
-
|
| 807 |
-
if not compatible:
|
| 808 |
cluster_stats["failed_requests"] += 1
|
| 809 |
-
raise HTTPException(
|
| 810 |
-
status_code=503,
|
| 811 |
-
detail=f"No workers available for model: {request.model or 'default'}"
|
| 812 |
-
)
|
| 813 |
|
| 814 |
request_data = {
|
| 815 |
"prompt": request.prompt,
|
|
@@ -821,15 +968,12 @@ async def generate(request: GenerateRequest):
|
|
| 821 |
"stream": True
|
| 822 |
}
|
| 823 |
|
| 824 |
-
|
| 825 |
-
if request.model:
|
| 826 |
-
request_data["model"] = request.model
|
| 827 |
-
|
| 828 |
-
print(f"🎯 {mode.upper()} | Load: {load} | Model: {request.model or 'default'} | Workers: {len(compatible)}")
|
| 829 |
|
| 830 |
try:
|
| 831 |
-
if mode == "light" and len(
|
| 832 |
-
|
|
|
|
| 833 |
if decoders:
|
| 834 |
cluster_stats["successful_requests"] += 1
|
| 835 |
return StreamingResponse(
|
|
@@ -837,7 +981,8 @@ async def generate(request: GenerateRequest):
|
|
| 837 |
media_type="text/event-stream"
|
| 838 |
)
|
| 839 |
|
| 840 |
-
|
|
|
|
| 841 |
cluster_stats["successful_requests"] += 1
|
| 842 |
return StreamingResponse(
|
| 843 |
heavy_load_generation(worker, request_data, "generate"),
|
|
@@ -849,19 +994,14 @@ async def generate(request: GenerateRequest):
|
|
| 849 |
|
| 850 |
@app.post("/v1/chat")
|
| 851 |
async def chat(request: ChatRequest):
|
| 852 |
-
"""Chat with
|
| 853 |
track_request()
|
| 854 |
mode, load = update_load_mode()
|
| 855 |
|
| 856 |
-
|
| 857 |
-
|
| 858 |
-
|
| 859 |
-
if not compatible:
|
| 860 |
cluster_stats["failed_requests"] += 1
|
| 861 |
-
raise HTTPException(
|
| 862 |
-
status_code=503,
|
| 863 |
-
detail=f"No workers available for model: {request.model or 'default'}"
|
| 864 |
-
)
|
| 865 |
|
| 866 |
request_data = {
|
| 867 |
"messages": [{"role": m.role, "content": m.content} for m in request.messages],
|
|
@@ -873,15 +1013,12 @@ async def chat(request: ChatRequest):
|
|
| 873 |
"stream": True
|
| 874 |
}
|
| 875 |
|
| 876 |
-
|
| 877 |
-
if request.model:
|
| 878 |
-
request_data["model"] = request.model
|
| 879 |
-
|
| 880 |
-
print(f"💬 {mode.upper()} | Load: {load} | Model: {request.model or 'default'} | Workers: {len(compatible)}")
|
| 881 |
|
| 882 |
try:
|
| 883 |
-
if mode == "light" and len(
|
| 884 |
-
|
|
|
|
| 885 |
if decoders:
|
| 886 |
cluster_stats["successful_requests"] += 1
|
| 887 |
return StreamingResponse(
|
|
@@ -889,7 +1026,8 @@ async def chat(request: ChatRequest):
|
|
| 889 |
media_type="text/event-stream"
|
| 890 |
)
|
| 891 |
|
| 892 |
-
|
|
|
|
| 893 |
cluster_stats["successful_requests"] += 1
|
| 894 |
return StreamingResponse(
|
| 895 |
heavy_load_generation(worker, request_data, "chat"),
|
|
|
|
| 1 |
"""
|
| 2 |
+
SAM-Z-1 Distributed Compute Cluster Head Node
|
| 3 |
- Smart load balancing with distributed compute
|
| 4 |
- Real-time status dashboard
|
|
|
|
|
|
|
| 5 |
"""
|
| 6 |
|
| 7 |
from fastapi import FastAPI, HTTPException, WebSocket
|
|
|
|
| 15 |
from collections import deque
|
| 16 |
import random
|
| 17 |
|
| 18 |
+
app = FastAPI(title="SAM-Z-1 Distributed Cluster", version="4.0.0")
|
| 19 |
|
| 20 |
# ============================================================================
|
| 21 |
# Configuration
|
|
|
|
| 24 |
WORKER_URLS = [
|
| 25 |
"https://bc-ai-worker-2.hf.space",
|
| 26 |
"https://bc-ai-worker-sam-z-api.hf.space",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
]
|
| 28 |
|
| 29 |
+
HEALTH_CHECK_INTERVAL = 5 # faster checks for real-time dashboard
|
| 30 |
LOAD_CHECK_WINDOW = 10
|
| 31 |
|
| 32 |
LIGHT_LOAD_THRESHOLD = 2
|
| 33 |
HEAVY_LOAD_THRESHOLD = 5
|
| 34 |
|
| 35 |
+
# Worker state
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
worker_health = {
|
| 37 |
url: {
|
| 38 |
"healthy": True,
|
|
|
|
| 41 |
"total_requests": 0,
|
| 42 |
"total_tokens": 0,
|
| 43 |
"avg_latency": 0,
|
| 44 |
+
"role": "idle" # "generator", "decoder", "full", "idle"
|
|
|
|
|
|
|
| 45 |
} for url in WORKER_URLS
|
| 46 |
}
|
| 47 |
|
| 48 |
request_timestamps = deque(maxlen=100)
|
| 49 |
+
current_load_mode = "light" # "light", "medium", "heavy"
|
| 50 |
cluster_stats = {
|
| 51 |
"total_requests": 0,
|
| 52 |
"successful_requests": 0,
|
|
|
|
| 54 |
"uptime_start": time.time()
|
| 55 |
}
|
| 56 |
|
| 57 |
+
# Active WebSocket connections for real-time updates
|
| 58 |
active_connections = set()
|
| 59 |
|
| 60 |
# ============================================================================
|
|
|
|
| 69 |
top_p: float = 0.9
|
| 70 |
repetition_penalty: float = 1.1
|
| 71 |
stream: bool = True
|
|
|
|
| 72 |
|
| 73 |
class ChatMessage(BaseModel):
|
| 74 |
role: str
|
|
|
|
| 82 |
top_p: float = 0.9
|
| 83 |
repetition_penalty: float = 1.1
|
| 84 |
stream: bool = True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
|
| 86 |
# ============================================================================
|
| 87 |
# Load Management
|
|
|
|
| 96 |
load = get_current_load()
|
| 97 |
healthy_count = len(get_healthy_workers())
|
| 98 |
|
| 99 |
+
# Adjust thresholds based on available workers
|
| 100 |
if healthy_count >= 5:
|
| 101 |
if load <= LIGHT_LOAD_THRESHOLD:
|
| 102 |
+
current_load_mode = "light" # 1 gen + 4 decoders
|
| 103 |
+
elif load <= MEDIUM_LOAD_THRESHOLD:
|
| 104 |
+
current_load_mode = "medium" # 2 gens + 3 decoders OR parallel requests
|
| 105 |
else:
|
| 106 |
+
current_load_mode = "heavy" # all workers independent
|
| 107 |
elif healthy_count >= 3:
|
| 108 |
if load <= 2:
|
| 109 |
+
current_load_mode = "light" # 1 gen + 2 decoders
|
| 110 |
else:
|
| 111 |
+
current_load_mode = "heavy" # distribute requests
|
| 112 |
else:
|
| 113 |
+
current_load_mode = "heavy" # fallback to simple distribution
|
| 114 |
|
| 115 |
return current_load_mode, load
|
| 116 |
|
|
|
|
| 118 |
request_timestamps.append(time.time())
|
| 119 |
cluster_stats["total_requests"] += 1
|
| 120 |
|
| 121 |
+
def get_healthy_workers() -> List[str]:
|
| 122 |
+
return [url for url, status in worker_health.items() if status["healthy"]]
|
| 123 |
+
|
| 124 |
+
def get_least_busy_worker() -> Optional[str]:
|
| 125 |
+
healthy = get_healthy_workers()
|
| 126 |
+
if not healthy:
|
| 127 |
+
return None
|
| 128 |
+
return min(healthy, key=lambda url: worker_health[url]["active_requests"])
|
| 129 |
+
|
| 130 |
+
def select_distributed_workers() -> tuple:
|
| 131 |
+
"""
|
| 132 |
+
Select workers for distributed compute
|
| 133 |
+
Returns: (generators: List[str], decoders: List[str])
|
| 134 |
+
"""
|
| 135 |
+
healthy = get_healthy_workers()
|
| 136 |
+
if len(healthy) < 2:
|
| 137 |
+
return ([healthy[0]], []) if len(healthy) == 1 else ([], [])
|
| 138 |
+
|
| 139 |
+
# Sort by least busy
|
| 140 |
+
sorted_workers = sorted(healthy, key=lambda url: worker_health[url]["active_requests"])
|
| 141 |
+
|
| 142 |
+
if len(healthy) >= 5:
|
| 143 |
+
# OPTIMAL: 1 generator, 4 decoders
|
| 144 |
+
return ([sorted_workers[0]], sorted_workers[1:5])
|
| 145 |
+
elif len(healthy) == 4:
|
| 146 |
+
# 1 generator, 3 decoders
|
| 147 |
+
return ([sorted_workers[0]], sorted_workers[1:4])
|
| 148 |
+
elif len(healthy) == 3:
|
| 149 |
+
# 1 generator, 2 decoders
|
| 150 |
+
return ([sorted_workers[0]], sorted_workers[1:3])
|
| 151 |
+
else:
|
| 152 |
+
# 1 generator, 1 decoder
|
| 153 |
+
return ([sorted_workers[0]], [sorted_workers[1]])
|
| 154 |
|
| 155 |
async def broadcast_stats():
|
| 156 |
+
"""Broadcast stats to all connected WebSocket clients"""
|
| 157 |
if not active_connections:
|
| 158 |
return
|
| 159 |
|
|
|
|
| 166 |
"load": load,
|
| 167 |
"workers": [
|
| 168 |
{
|
| 169 |
+
"url": url.split("//")[1].split(".")[0], # shorter name
|
| 170 |
"healthy": status["healthy"],
|
| 171 |
"active": status["active_requests"],
|
| 172 |
"total": status["total_requests"],
|
| 173 |
"tokens": status["total_tokens"],
|
| 174 |
"latency": round(status["avg_latency"], 2),
|
| 175 |
+
"role": status["role"]
|
|
|
|
|
|
|
| 176 |
}
|
| 177 |
for url, status in worker_health.items()
|
| 178 |
],
|
|
|
|
| 185 |
}
|
| 186 |
}
|
| 187 |
|
| 188 |
+
# Broadcast to all connections
|
| 189 |
disconnected = set()
|
| 190 |
for ws in active_connections:
|
| 191 |
try:
|
|
|
|
| 193 |
except:
|
| 194 |
disconnected.add(ws)
|
| 195 |
|
| 196 |
+
# Remove disconnected
|
| 197 |
active_connections.difference_update(disconnected)
|
| 198 |
|
| 199 |
+
async def check_worker_health(worker_url: str) -> bool:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
try:
|
| 201 |
+
async with httpx.AsyncClient(timeout=5.0) as client:
|
| 202 |
+
response = await client.get(f"{worker_url}/health")
|
| 203 |
+
return response.status_code == 200
|
| 204 |
except:
|
| 205 |
+
return False
|
| 206 |
+
|
| 207 |
+
async def health_check_loop():
|
| 208 |
+
while True:
|
| 209 |
+
# Check all workers
|
| 210 |
+
for worker_url in WORKER_URLS:
|
| 211 |
+
healthy = await check_worker_health(worker_url)
|
| 212 |
+
worker_health[worker_url]["healthy"] = healthy
|
| 213 |
+
worker_health[worker_url]["last_check"] = time.time()
|
| 214 |
+
|
| 215 |
+
# Always broadcast stats to connected clients
|
| 216 |
+
await broadcast_stats()
|
| 217 |
+
|
| 218 |
+
await asyncio.sleep(HEALTH_CHECK_INTERVAL)
|
| 219 |
+
|
| 220 |
+
@app.on_event("startup")
|
| 221 |
+
async def startup_event():
|
| 222 |
+
asyncio.create_task(health_check_loop())
|
| 223 |
|
| 224 |
# ============================================================================
|
| 225 |
+
# Distributed Compute Generation
|
| 226 |
# ============================================================================
|
| 227 |
|
| 228 |
async def distributed_generation(
|
|
|
|
| 231 |
request_data: dict,
|
| 232 |
endpoint: str = "generate"
|
| 233 |
):
|
| 234 |
+
"""
|
| 235 |
+
DISTRIBUTED COMPUTE MODE
|
| 236 |
+
- Generator(s) produce token IDs
|
| 237 |
+
- Multiple decoders process in parallel (load balanced)
|
| 238 |
+
"""
|
| 239 |
|
| 240 |
if not generators or not decoders:
|
| 241 |
return
|
|
|
|
| 243 |
token_queue = asyncio.Queue(maxsize=50)
|
| 244 |
text_queue = asyncio.Queue(maxsize=50)
|
| 245 |
|
| 246 |
+
# Mark roles
|
| 247 |
for gen_url in generators:
|
| 248 |
worker_health[gen_url]["role"] = "generator"
|
| 249 |
for dec_url in decoders:
|
| 250 |
worker_health[dec_url]["role"] = "decoder"
|
| 251 |
|
| 252 |
async def generate_tokens():
|
| 253 |
+
"""Generator worker(s)"""
|
| 254 |
+
gen_url = generators[0] # primary generator
|
| 255 |
try:
|
| 256 |
worker_health[gen_url]["active_requests"] += 1
|
| 257 |
request_data_tokens = {**request_data, "return_token_ids": True}
|
|
|
|
| 269 |
if "token_id" in data:
|
| 270 |
await token_queue.put(data["token_id"])
|
| 271 |
elif "done" in data:
|
| 272 |
+
# Send done signal for each decoder
|
| 273 |
for _ in decoders:
|
| 274 |
await token_queue.put(None)
|
| 275 |
break
|
|
|
|
| 284 |
worker_health[gen_url]["role"] = "idle"
|
| 285 |
|
| 286 |
async def decode_tokens(decoder_url: str, decoder_id: int):
|
| 287 |
+
"""Decoder worker - processes tokens from shared queue"""
|
| 288 |
try:
|
| 289 |
worker_health[decoder_url]["active_requests"] += 1
|
| 290 |
batch = []
|
| 291 |
+
batch_size = 2 # smaller batches for faster streaming
|
| 292 |
|
| 293 |
while True:
|
| 294 |
try:
|
| 295 |
token_id = await asyncio.wait_for(token_queue.get(), timeout=2.0)
|
| 296 |
|
| 297 |
if token_id is None:
|
| 298 |
+
# Decode remaining batch
|
| 299 |
if batch:
|
| 300 |
async with httpx.AsyncClient(timeout=10.0) as client:
|
| 301 |
response = await client.post(
|
|
|
|
| 311 |
|
| 312 |
batch.append(token_id)
|
| 313 |
|
| 314 |
+
# Decode when batch is full
|
| 315 |
if len(batch) >= batch_size:
|
| 316 |
async with httpx.AsyncClient(timeout=10.0) as client:
|
| 317 |
response = await client.post(
|
|
|
|
| 334 |
worker_health[decoder_url]["active_requests"] -= 1
|
| 335 |
worker_health[decoder_url]["role"] = "idle"
|
| 336 |
|
| 337 |
+
# Start generator
|
| 338 |
gen_task = asyncio.create_task(generate_tokens())
|
| 339 |
+
|
| 340 |
+
# Start all decoders
|
| 341 |
decoder_tasks = [
|
| 342 |
asyncio.create_task(decode_tokens(dec_url, i))
|
| 343 |
for i, dec_url in enumerate(decoders)
|
| 344 |
]
|
| 345 |
|
| 346 |
+
# Stream results
|
| 347 |
accumulated_text = ""
|
| 348 |
decoders_done = 0
|
| 349 |
total_decoders = len(decoders)
|
|
|
|
| 389 |
worker_health[worker_url]["role"] = "idle"
|
| 390 |
|
| 391 |
# ============================================================================
|
| 392 |
+
# Dashboard
|
| 393 |
# ============================================================================
|
| 394 |
|
| 395 |
@app.get("/", response_class=HTMLResponse)
|
| 396 |
async def dashboard():
|
| 397 |
+
"""Real-time futuristic dashboard"""
|
| 398 |
return """
|
| 399 |
<!DOCTYPE html>
|
| 400 |
<html>
|
| 401 |
<head>
|
| 402 |
+
<title>SAM-Z-1 Cluster Control</title>
|
| 403 |
<style>
|
| 404 |
+
* {
|
| 405 |
+
margin: 0;
|
| 406 |
+
padding: 0;
|
| 407 |
+
box-sizing: border-box;
|
| 408 |
+
}
|
| 409 |
+
|
| 410 |
body {
|
| 411 |
font-family: 'Courier New', monospace;
|
| 412 |
background: linear-gradient(135deg, #0a0e27 0%, #1a1f3a 100%);
|
|
|
|
| 415 |
overflow-x: hidden;
|
| 416 |
overflow-y: auto;
|
| 417 |
}
|
| 418 |
+
|
| 419 |
.container {
|
| 420 |
padding: 20px;
|
| 421 |
max-width: 1400px;
|
| 422 |
margin: 0 auto;
|
| 423 |
padding-bottom: 40px;
|
| 424 |
}
|
| 425 |
+
|
| 426 |
.header {
|
| 427 |
text-align: center;
|
| 428 |
margin-bottom: 30px;
|
|
|
|
| 432 |
border-radius: 10px;
|
| 433 |
box-shadow: 0 0 20px rgba(0, 255, 136, 0.3);
|
| 434 |
}
|
| 435 |
+
|
| 436 |
.header h1 {
|
| 437 |
font-size: 2.5em;
|
| 438 |
text-transform: uppercase;
|
|
|
|
| 440 |
text-shadow: 0 0 10px #00ff88;
|
| 441 |
animation: glow 2s ease-in-out infinite alternate;
|
| 442 |
}
|
| 443 |
+
|
| 444 |
@keyframes glow {
|
| 445 |
from { text-shadow: 0 0 10px #00ff88, 0 0 20px #00ff88; }
|
| 446 |
to { text-shadow: 0 0 20px #00ff88, 0 0 30px #00ff88, 0 0 40px #00ff88; }
|
| 447 |
}
|
| 448 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 449 |
.status-bar {
|
| 450 |
display: flex;
|
| 451 |
gap: 20px;
|
| 452 |
margin-bottom: 30px;
|
| 453 |
}
|
| 454 |
+
|
| 455 |
.stat-card {
|
| 456 |
flex: 1;
|
| 457 |
background: rgba(0, 255, 136, 0.05);
|
|
|
|
| 461 |
position: relative;
|
| 462 |
overflow: hidden;
|
| 463 |
}
|
| 464 |
+
|
| 465 |
+
.stat-card::before {
|
| 466 |
+
content: '';
|
| 467 |
+
position: absolute;
|
| 468 |
+
top: 0;
|
| 469 |
+
left: -100%;
|
| 470 |
+
width: 100%;
|
| 471 |
+
height: 100%;
|
| 472 |
+
background: linear-gradient(90deg, transparent, rgba(0, 255, 136, 0.2), transparent);
|
| 473 |
+
animation: scan 3s infinite;
|
| 474 |
+
}
|
| 475 |
+
|
| 476 |
+
@keyframes scan {
|
| 477 |
+
0% { left: -100%; }
|
| 478 |
+
100% { left: 100%; }
|
| 479 |
+
}
|
| 480 |
+
|
| 481 |
.stat-label {
|
| 482 |
font-size: 0.8em;
|
| 483 |
opacity: 0.7;
|
| 484 |
text-transform: uppercase;
|
| 485 |
}
|
| 486 |
+
|
| 487 |
.stat-value {
|
| 488 |
font-size: 2em;
|
| 489 |
font-weight: bold;
|
| 490 |
margin-top: 5px;
|
| 491 |
}
|
| 492 |
+
|
| 493 |
+
.mode-badge {
|
| 494 |
+
display: inline-block;
|
| 495 |
+
padding: 5px 15px;
|
| 496 |
+
border-radius: 20px;
|
| 497 |
+
font-size: 0.9em;
|
| 498 |
+
font-weight: bold;
|
| 499 |
+
text-transform: uppercase;
|
| 500 |
+
margin-top: 10px;
|
| 501 |
+
}
|
| 502 |
+
|
| 503 |
+
.mode-light {
|
| 504 |
+
background: rgba(0, 255, 136, 0.2);
|
| 505 |
+
border: 1px solid #00ff88;
|
| 506 |
+
color: #00ff88;
|
| 507 |
+
}
|
| 508 |
+
|
| 509 |
+
.mode-heavy {
|
| 510 |
+
background: rgba(255, 68, 68, 0.2);
|
| 511 |
+
border: 1px solid #ff4444;
|
| 512 |
+
color: #ff4444;
|
| 513 |
+
}
|
| 514 |
+
|
| 515 |
.workers-grid {
|
| 516 |
display: grid;
|
| 517 |
grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
|
| 518 |
gap: 20px;
|
| 519 |
margin-bottom: 30px;
|
| 520 |
}
|
| 521 |
+
|
| 522 |
+
@media (max-width: 768px) {
|
| 523 |
+
.workers-grid {
|
| 524 |
+
grid-template-columns: 1fr;
|
| 525 |
+
}
|
| 526 |
+
|
| 527 |
+
.status-bar {
|
| 528 |
+
flex-direction: column;
|
| 529 |
+
}
|
| 530 |
+
|
| 531 |
+
.info-grid {
|
| 532 |
+
grid-template-columns: repeat(2, 1fr);
|
| 533 |
+
}
|
| 534 |
+
|
| 535 |
+
.header h1 {
|
| 536 |
+
font-size: 1.5em;
|
| 537 |
+
letter-spacing: 2px;
|
| 538 |
+
}
|
| 539 |
+
}
|
| 540 |
+
|
| 541 |
.worker-card {
|
| 542 |
background: rgba(10, 14, 39, 0.8);
|
| 543 |
border: 2px solid #00ff88;
|
|
|
|
| 546 |
position: relative;
|
| 547 |
transition: all 0.3s;
|
| 548 |
}
|
| 549 |
+
|
| 550 |
.worker-card:hover {
|
| 551 |
transform: translateY(-5px);
|
| 552 |
box-shadow: 0 5px 30px rgba(0, 255, 136, 0.4);
|
| 553 |
}
|
| 554 |
+
|
| 555 |
.worker-card.offline {
|
| 556 |
border-color: #ff4444;
|
| 557 |
opacity: 0.6;
|
| 558 |
}
|
| 559 |
+
|
| 560 |
.worker-header {
|
| 561 |
display: flex;
|
| 562 |
justify-content: space-between;
|
| 563 |
align-items: center;
|
| 564 |
margin-bottom: 15px;
|
| 565 |
}
|
| 566 |
+
|
| 567 |
.worker-name {
|
| 568 |
font-size: 1.2em;
|
| 569 |
font-weight: bold;
|
| 570 |
}
|
| 571 |
+
|
| 572 |
.status-dot {
|
| 573 |
width: 12px;
|
| 574 |
height: 12px;
|
| 575 |
border-radius: 50%;
|
| 576 |
animation: pulse 2s infinite;
|
| 577 |
}
|
| 578 |
+
|
| 579 |
.status-dot.online {
|
| 580 |
background: #00ff88;
|
| 581 |
box-shadow: 0 0 10px #00ff88;
|
| 582 |
}
|
| 583 |
+
|
| 584 |
.status-dot.offline {
|
| 585 |
background: #ff4444;
|
| 586 |
box-shadow: 0 0 10px #ff4444;
|
| 587 |
}
|
| 588 |
+
|
| 589 |
@keyframes pulse {
|
| 590 |
0%, 100% { opacity: 1; }
|
| 591 |
50% { opacity: 0.5; }
|
| 592 |
}
|
| 593 |
+
|
| 594 |
.worker-stats {
|
| 595 |
display: grid;
|
| 596 |
grid-template-columns: repeat(2, 1fr);
|
| 597 |
gap: 10px;
|
| 598 |
margin-top: 15px;
|
| 599 |
}
|
| 600 |
+
|
| 601 |
.worker-stat {
|
| 602 |
background: rgba(0, 255, 136, 0.05);
|
| 603 |
padding: 10px;
|
| 604 |
border-radius: 5px;
|
| 605 |
}
|
| 606 |
+
|
| 607 |
.worker-stat-label {
|
| 608 |
font-size: 0.7em;
|
| 609 |
opacity: 0.7;
|
| 610 |
}
|
| 611 |
+
|
| 612 |
.worker-stat-value {
|
| 613 |
font-size: 1.3em;
|
| 614 |
font-weight: bold;
|
| 615 |
margin-top: 3px;
|
| 616 |
}
|
| 617 |
+
|
| 618 |
+
.role-badge {
|
| 619 |
+
display: inline-block;
|
| 620 |
+
padding: 3px 10px;
|
| 621 |
+
border-radius: 12px;
|
| 622 |
+
font-size: 0.75em;
|
| 623 |
+
margin-top: 10px;
|
| 624 |
+
font-weight: bold;
|
| 625 |
+
}
|
| 626 |
+
|
| 627 |
+
.role-generator {
|
| 628 |
+
background: rgba(255, 165, 0, 0.2);
|
| 629 |
+
border: 1px solid #ffa500;
|
| 630 |
+
color: #ffa500;
|
| 631 |
+
}
|
| 632 |
+
|
| 633 |
+
.role-decoder {
|
| 634 |
+
background: rgba(0, 191, 255, 0.2);
|
| 635 |
+
border: 1px solid #00bfff;
|
| 636 |
+
color: #00bfff;
|
| 637 |
+
}
|
| 638 |
+
|
| 639 |
+
.role-full {
|
| 640 |
+
background: rgba(138, 43, 226, 0.2);
|
| 641 |
+
border: 1px solid #8a2be2;
|
| 642 |
+
color: #8a2be2;
|
| 643 |
+
}
|
| 644 |
+
|
| 645 |
+
.role-idle {
|
| 646 |
+
background: rgba(128, 128, 128, 0.2);
|
| 647 |
+
border: 1px solid #808080;
|
| 648 |
+
color: #808080;
|
| 649 |
+
}
|
| 650 |
+
|
| 651 |
+
.progress-bar {
|
| 652 |
+
width: 100%;
|
| 653 |
+
height: 4px;
|
| 654 |
+
background: rgba(0, 255, 136, 0.1);
|
| 655 |
+
border-radius: 2px;
|
| 656 |
+
margin-top: 10px;
|
| 657 |
+
overflow: hidden;
|
| 658 |
+
}
|
| 659 |
+
|
| 660 |
+
.progress-fill {
|
| 661 |
+
height: 100%;
|
| 662 |
+
background: linear-gradient(90deg, #00ff88, #00ffff);
|
| 663 |
+
transition: width 0.3s;
|
| 664 |
+
box-shadow: 0 0 10px #00ff88;
|
| 665 |
+
}
|
| 666 |
+
|
| 667 |
+
.cluster-info {
|
| 668 |
+
background: rgba(0, 255, 136, 0.05);
|
| 669 |
+
border: 1px solid #00ff88;
|
| 670 |
+
border-radius: 8px;
|
| 671 |
+
padding: 20px;
|
| 672 |
+
}
|
| 673 |
+
|
| 674 |
+
.info-grid {
|
| 675 |
+
display: grid;
|
| 676 |
+
grid-template-columns: repeat(4, 1fr);
|
| 677 |
+
gap: 20px;
|
| 678 |
+
}
|
| 679 |
+
|
| 680 |
+
.info-item {
|
| 681 |
+
text-align: center;
|
| 682 |
+
}
|
| 683 |
+
|
| 684 |
.timestamp {
|
| 685 |
text-align: center;
|
| 686 |
margin-top: 20px;
|
| 687 |
opacity: 0.5;
|
| 688 |
font-size: 0.9em;
|
| 689 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 690 |
</style>
|
| 691 |
</head>
|
| 692 |
<body>
|
| 693 |
<div class="container">
|
| 694 |
<div class="header">
|
| 695 |
<h1>⚡ SAM-Z-1 CLUSTER ⚡</h1>
|
| 696 |
+
<div>DISTRIBUTED COMPUTE SYSTEM v4.0</div>
|
| 697 |
</div>
|
| 698 |
|
| 699 |
<div class="status-bar">
|
| 700 |
<div class="stat-card">
|
| 701 |
<div class="stat-label">Load Mode</div>
|
| 702 |
<div class="stat-value" id="mode">--</div>
|
| 703 |
+
<div class="mode-badge" id="mode-badge">INITIALIZING</div>
|
| 704 |
</div>
|
| 705 |
<div class="stat-card">
|
| 706 |
<div class="stat-label">Current Load</div>
|
| 707 |
<div class="stat-value" id="load">0</div>
|
| 708 |
+
<div class="stat-label">requests / 10s</div>
|
| 709 |
</div>
|
| 710 |
<div class="stat-card">
|
| 711 |
<div class="stat-label">Total Requests</div>
|
|
|
|
| 717 |
</div>
|
| 718 |
</div>
|
| 719 |
|
| 720 |
+
<div class="workers-grid" id="workers">
|
| 721 |
+
<!-- Workers populated by JS -->
|
| 722 |
+
</div>
|
| 723 |
+
|
| 724 |
+
<div class="cluster-info">
|
| 725 |
+
<div class="stat-label" style="margin-bottom: 15px;">CLUSTER STATISTICS</div>
|
| 726 |
+
<div class="info-grid">
|
| 727 |
+
<div class="info-item">
|
| 728 |
+
<div class="stat-label">Successful</div>
|
| 729 |
+
<div class="stat-value" style="font-size: 1.5em;" id="success">0</div>
|
| 730 |
+
</div>
|
| 731 |
+
<div class="info-item">
|
| 732 |
+
<div class="stat-label">Failed</div>
|
| 733 |
+
<div class="stat-value" style="font-size: 1.5em;" id="failed">0</div>
|
| 734 |
+
</div>
|
| 735 |
+
<div class="info-item">
|
| 736 |
+
<div class="stat-label">Uptime</div>
|
| 737 |
+
<div class="stat-value" style="font-size: 1.5em;" id="uptime">0s</div>
|
| 738 |
+
</div>
|
| 739 |
+
<div class="info-item">
|
| 740 |
+
<div class="stat-label">Healthy Workers</div>
|
| 741 |
+
<div class="stat-value" style="font-size: 1.5em;" id="healthy">0</div>
|
| 742 |
+
</div>
|
| 743 |
+
</div>
|
| 744 |
+
</div>
|
| 745 |
|
| 746 |
<div class="timestamp" id="timestamp">Last update: --</div>
|
| 747 |
</div>
|
| 748 |
|
| 749 |
<script>
|
| 750 |
+
// Use wss:// for HTTPS, ws:// for HTTP
|
| 751 |
const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
|
| 752 |
let ws;
|
| 753 |
+
let usePolling = false;
|
| 754 |
|
| 755 |
function connectWebSocket() {
|
| 756 |
try {
|
| 757 |
ws = new WebSocket(`${protocol}//${window.location.host}/ws`);
|
| 758 |
|
| 759 |
+
ws.onopen = () => {
|
| 760 |
+
console.log('✅ WebSocket connected');
|
| 761 |
+
usePolling = false;
|
| 762 |
+
};
|
| 763 |
+
|
| 764 |
+
ws.onmessage = (event) => {
|
| 765 |
+
const data = JSON.parse(event.data);
|
| 766 |
+
updateDashboard(data);
|
| 767 |
+
};
|
| 768 |
+
|
| 769 |
+
ws.onerror = (error) => {
|
| 770 |
+
console.error('❌ WebSocket error, switching to polling');
|
| 771 |
+
usePolling = true;
|
| 772 |
+
startPolling();
|
| 773 |
+
};
|
| 774 |
+
|
| 775 |
+
ws.onclose = () => {
|
| 776 |
+
console.log('🔌 WebSocket disconnected');
|
| 777 |
+
if (!usePolling) {
|
| 778 |
+
setTimeout(connectWebSocket, 3000);
|
| 779 |
+
}
|
| 780 |
+
};
|
| 781 |
} catch (e) {
|
| 782 |
+
console.error('Failed to connect WebSocket, using polling');
|
| 783 |
+
usePolling = true;
|
| 784 |
+
startPolling();
|
| 785 |
}
|
| 786 |
}
|
| 787 |
|
| 788 |
+
async function pollStats() {
|
| 789 |
+
if (!usePolling) return;
|
| 790 |
+
|
| 791 |
+
try {
|
| 792 |
+
const response = await fetch('/api/status');
|
| 793 |
+
const data = await response.json();
|
| 794 |
+
|
| 795 |
+
// Fetch worker stats too
|
| 796 |
+
const workersRes = await fetch('/workers');
|
| 797 |
+
const workersData = await workersRes.json();
|
| 798 |
+
|
| 799 |
+
// Format data like WebSocket
|
| 800 |
+
const formattedData = {
|
| 801 |
+
timestamp: Date.now() / 1000,
|
| 802 |
+
mode: data.mode,
|
| 803 |
+
load: data.current_load,
|
| 804 |
+
workers: workersData.workers.map(w => ({
|
| 805 |
+
url: w.url.split("//")[1].split(".")[0],
|
| 806 |
+
healthy: w.healthy,
|
| 807 |
+
active: w.active_requests || 0,
|
| 808 |
+
total: 0,
|
| 809 |
+
tokens: 0,
|
| 810 |
+
latency: 0,
|
| 811 |
+
role: "idle"
|
| 812 |
+
})),
|
| 813 |
+
cluster: {
|
| 814 |
+
total_requests: 0,
|
| 815 |
+
successful: 0,
|
| 816 |
+
failed: 0,
|
| 817 |
+
uptime: 0,
|
| 818 |
+
rps: 0
|
| 819 |
+
}
|
| 820 |
+
};
|
| 821 |
+
|
| 822 |
+
updateDashboard(formattedData);
|
| 823 |
+
} catch (e) {
|
| 824 |
+
console.error('Polling error:', e);
|
| 825 |
+
}
|
| 826 |
+
}
|
| 827 |
+
|
| 828 |
+
function startPolling() {
|
| 829 |
+
pollStats();
|
| 830 |
+
setInterval(pollStats, 1000);
|
| 831 |
+
}
|
| 832 |
+
|
| 833 |
+
// Try WebSocket first
|
| 834 |
connectWebSocket();
|
| 835 |
|
| 836 |
function updateDashboard(data) {
|
| 837 |
+
// Mode
|
| 838 |
document.getElementById('mode').textContent = data.mode.toUpperCase();
|
| 839 |
+
const modeBadge = document.getElementById('mode-badge');
|
| 840 |
+
modeBadge.textContent = `${data.mode.toUpperCase()} MODE`;
|
| 841 |
+
modeBadge.className = `mode-badge mode-${data.mode}`;
|
| 842 |
+
|
| 843 |
+
// Stats
|
| 844 |
document.getElementById('load').textContent = data.load;
|
| 845 |
document.getElementById('total-req').textContent = data.cluster.total_requests;
|
| 846 |
document.getElementById('rps').textContent = data.cluster.rps;
|
| 847 |
+
document.getElementById('success').textContent = data.cluster.successful;
|
| 848 |
+
document.getElementById('failed').textContent = data.cluster.failed;
|
| 849 |
+
document.getElementById('uptime').textContent = formatUptime(data.cluster.uptime);
|
| 850 |
|
| 851 |
+
// Workers
|
| 852 |
const workersDiv = document.getElementById('workers');
|
| 853 |
+
const healthyCount = data.workers.filter(w => w.healthy).length;
|
| 854 |
+
document.getElementById('healthy').textContent = `${healthyCount}/${data.workers.length}`;
|
| 855 |
+
|
| 856 |
workersDiv.innerHTML = data.workers.map(worker => `
|
| 857 |
<div class="worker-card ${worker.healthy ? '' : 'offline'}">
|
| 858 |
<div class="worker-header">
|
| 859 |
+
<div class="worker-name">${worker.url}</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 860 |
<div class="status-dot ${worker.healthy ? 'online' : 'offline'}"></div>
|
| 861 |
</div>
|
| 862 |
+
<div class="role-badge role-${worker.role}">${worker.role.toUpperCase()}</div>
|
| 863 |
<div class="worker-stats">
|
| 864 |
<div class="worker-stat">
|
| 865 |
<div class="worker-stat-label">Active</div>
|
|
|
|
| 874 |
<div class="worker-stat-value">${worker.tokens}</div>
|
| 875 |
</div>
|
| 876 |
<div class="worker-stat">
|
| 877 |
+
<div class="worker-stat-label">Latency</div>
|
| 878 |
+
<div class="worker-stat-value">${worker.latency}ms</div>
|
| 879 |
</div>
|
| 880 |
</div>
|
| 881 |
+
<div class="progress-bar">
|
| 882 |
+
<div class="progress-fill" style="width: ${Math.min(worker.active * 33, 100)}%"></div>
|
| 883 |
+
</div>
|
| 884 |
</div>
|
| 885 |
`).join('');
|
| 886 |
|
| 887 |
+
// Timestamp
|
| 888 |
+
const now = new Date();
|
| 889 |
document.getElementById('timestamp').textContent =
|
| 890 |
+
`Last update: ${now.toLocaleTimeString()}`;
|
| 891 |
+
}
|
| 892 |
+
|
| 893 |
+
function formatUptime(seconds) {
|
| 894 |
+
const h = Math.floor(seconds / 3600);
|
| 895 |
+
const m = Math.floor((seconds % 3600) / 60);
|
| 896 |
+
const s = Math.floor(seconds % 60);
|
| 897 |
+
return `${h}h ${m}m ${s}s`;
|
| 898 |
}
|
| 899 |
</script>
|
| 900 |
</body>
|
| 901 |
</html>
|
| 902 |
"""
|
| 903 |
|
| 904 |
+
@app.websocket("/ws")
|
| 905 |
+
async def websocket_endpoint(websocket: WebSocket):
|
| 906 |
+
"""WebSocket for real-time dashboard updates"""
|
| 907 |
+
await websocket.accept()
|
| 908 |
+
active_connections.add(websocket)
|
| 909 |
+
|
| 910 |
+
try:
|
| 911 |
+
# Send initial data
|
| 912 |
+
await broadcast_stats()
|
| 913 |
+
|
| 914 |
+
# Keep connection alive
|
| 915 |
+
while True:
|
| 916 |
+
await websocket.receive_text()
|
| 917 |
+
except:
|
| 918 |
+
pass
|
| 919 |
+
finally:
|
| 920 |
+
active_connections.discard(websocket)
|
| 921 |
+
|
| 922 |
+
# ============================================================================
|
| 923 |
+
# API Endpoints
|
| 924 |
+
# ============================================================================
|
| 925 |
+
|
| 926 |
@app.get("/api/status")
|
| 927 |
async def api_status():
|
| 928 |
+
"""JSON API for status"""
|
| 929 |
mode, load = update_load_mode()
|
| 930 |
healthy_count = len(get_healthy_workers())
|
| 931 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 932 |
return {
|
| 933 |
"name": "SAM-Z-1 Distributed Cluster",
|
| 934 |
+
"version": "4.0.0",
|
| 935 |
"mode": mode,
|
| 936 |
"current_load": load,
|
| 937 |
"workers": len(WORKER_URLS),
|
| 938 |
"healthy_workers": healthy_count,
|
| 939 |
+
"features": ["distributed_compute", "smart_load_balancing", "real_time_dashboard"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 940 |
}
|
| 941 |
|
| 942 |
@app.get("/health")
|
|
|
|
| 947 |
"workers_healthy": healthy_count
|
| 948 |
}
|
| 949 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 950 |
@app.post("/v1/generate")
|
| 951 |
async def generate(request: GenerateRequest):
|
| 952 |
+
"""Generate text with distributed compute"""
|
| 953 |
track_request()
|
| 954 |
mode, load = update_load_mode()
|
| 955 |
|
| 956 |
+
healthy = get_healthy_workers()
|
| 957 |
+
if not healthy:
|
|
|
|
|
|
|
| 958 |
cluster_stats["failed_requests"] += 1
|
| 959 |
+
raise HTTPException(status_code=503, detail="No healthy workers")
|
|
|
|
|
|
|
|
|
|
| 960 |
|
| 961 |
request_data = {
|
| 962 |
"prompt": request.prompt,
|
|
|
|
| 968 |
"stream": True
|
| 969 |
}
|
| 970 |
|
| 971 |
+
print(f"🎯 {mode.upper()} | Load: {load} | Workers: {len(healthy)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 972 |
|
| 973 |
try:
|
| 974 |
+
if mode == "light" and len(healthy) >= 2:
|
| 975 |
+
# DISTRIBUTED MODE - 1 gen + multiple decoders
|
| 976 |
+
generators, decoders = select_distributed_workers()
|
| 977 |
if decoders:
|
| 978 |
cluster_stats["successful_requests"] += 1
|
| 979 |
return StreamingResponse(
|
|
|
|
| 981 |
media_type="text/event-stream"
|
| 982 |
)
|
| 983 |
|
| 984 |
+
# HEAVY/FALLBACK - single worker
|
| 985 |
+
worker = get_least_busy_worker()
|
| 986 |
cluster_stats["successful_requests"] += 1
|
| 987 |
return StreamingResponse(
|
| 988 |
heavy_load_generation(worker, request_data, "generate"),
|
|
|
|
| 994 |
|
| 995 |
@app.post("/v1/chat")
|
| 996 |
async def chat(request: ChatRequest):
|
| 997 |
+
"""Chat with distributed compute"""
|
| 998 |
track_request()
|
| 999 |
mode, load = update_load_mode()
|
| 1000 |
|
| 1001 |
+
healthy = get_healthy_workers()
|
| 1002 |
+
if not healthy:
|
|
|
|
|
|
|
| 1003 |
cluster_stats["failed_requests"] += 1
|
| 1004 |
+
raise HTTPException(status_code=503, detail="No healthy workers")
|
|
|
|
|
|
|
|
|
|
| 1005 |
|
| 1006 |
request_data = {
|
| 1007 |
"messages": [{"role": m.role, "content": m.content} for m in request.messages],
|
|
|
|
| 1013 |
"stream": True
|
| 1014 |
}
|
| 1015 |
|
| 1016 |
+
print(f"💬 {mode.upper()} | Load: {load} | Workers: {len(healthy)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1017 |
|
| 1018 |
try:
|
| 1019 |
+
if mode == "light" and len(healthy) >= 2:
|
| 1020 |
+
# DISTRIBUTED MODE - 1 gen + multiple decoders
|
| 1021 |
+
generators, decoders = select_distributed_workers()
|
| 1022 |
if decoders:
|
| 1023 |
cluster_stats["successful_requests"] += 1
|
| 1024 |
return StreamingResponse(
|
|
|
|
| 1026 |
media_type="text/event-stream"
|
| 1027 |
)
|
| 1028 |
|
| 1029 |
+
# HEAVY/FALLBACK - single worker
|
| 1030 |
+
worker = get_least_busy_worker()
|
| 1031 |
cluster_stats["successful_requests"] += 1
|
| 1032 |
return StreamingResponse(
|
| 1033 |
heavy_load_generation(worker, request_data, "chat"),
|