Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,7 +1,9 @@
|
|
| 1 |
"""
|
| 2 |
-
SAM-Z-1 Distributed Compute Cluster Head Node
|
| 3 |
- Smart load balancing with distributed compute
|
| 4 |
- Real-time status dashboard
|
|
|
|
|
|
|
| 5 |
"""
|
| 6 |
|
| 7 |
from fastapi import FastAPI, HTTPException, WebSocket
|
|
@@ -15,7 +17,7 @@ from typing import List, Optional, Dict
|
|
| 15 |
from collections import deque
|
| 16 |
import random
|
| 17 |
|
| 18 |
-
app = FastAPI(title="SAM-Z-1 Distributed Cluster", version="
|
| 19 |
|
| 20 |
# ============================================================================
|
| 21 |
# Configuration
|
|
@@ -30,13 +32,21 @@ WORKER_URLS = [
|
|
| 30 |
"https://bc-ai-worker-5.hf.space"
|
| 31 |
]
|
| 32 |
|
| 33 |
-
HEALTH_CHECK_INTERVAL = 5
|
| 34 |
LOAD_CHECK_WINDOW = 10
|
| 35 |
|
| 36 |
LIGHT_LOAD_THRESHOLD = 2
|
| 37 |
HEAVY_LOAD_THRESHOLD = 5
|
| 38 |
|
| 39 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
worker_health = {
|
| 41 |
url: {
|
| 42 |
"healthy": True,
|
|
@@ -45,12 +55,14 @@ worker_health = {
|
|
| 45 |
"total_requests": 0,
|
| 46 |
"total_tokens": 0,
|
| 47 |
"avg_latency": 0,
|
| 48 |
-
"role": "idle"
|
|
|
|
|
|
|
| 49 |
} for url in WORKER_URLS
|
| 50 |
}
|
| 51 |
|
| 52 |
request_timestamps = deque(maxlen=100)
|
| 53 |
-
current_load_mode = "light"
|
| 54 |
cluster_stats = {
|
| 55 |
"total_requests": 0,
|
| 56 |
"successful_requests": 0,
|
|
@@ -58,7 +70,6 @@ cluster_stats = {
|
|
| 58 |
"uptime_start": time.time()
|
| 59 |
}
|
| 60 |
|
| 61 |
-
# Active WebSocket connections for real-time updates
|
| 62 |
active_connections = set()
|
| 63 |
|
| 64 |
# ============================================================================
|
|
@@ -73,6 +84,7 @@ class GenerateRequest(BaseModel):
|
|
| 73 |
top_p: float = 0.9
|
| 74 |
repetition_penalty: float = 1.1
|
| 75 |
stream: bool = True
|
|
|
|
| 76 |
|
| 77 |
class ChatMessage(BaseModel):
|
| 78 |
role: str
|
|
@@ -86,6 +98,138 @@ class ChatRequest(BaseModel):
|
|
| 86 |
top_p: float = 0.9
|
| 87 |
repetition_penalty: float = 1.1
|
| 88 |
stream: bool = True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
|
| 90 |
# ============================================================================
|
| 91 |
# Load Management
|
|
@@ -100,21 +244,20 @@ def update_load_mode():
|
|
| 100 |
load = get_current_load()
|
| 101 |
healthy_count = len(get_healthy_workers())
|
| 102 |
|
| 103 |
-
# Adjust thresholds based on available workers
|
| 104 |
if healthy_count >= 5:
|
| 105 |
if load <= LIGHT_LOAD_THRESHOLD:
|
| 106 |
-
current_load_mode = "light"
|
| 107 |
-
elif load <=
|
| 108 |
-
current_load_mode = "medium"
|
| 109 |
else:
|
| 110 |
-
current_load_mode = "heavy"
|
| 111 |
elif healthy_count >= 3:
|
| 112 |
if load <= 2:
|
| 113 |
-
current_load_mode = "light"
|
| 114 |
else:
|
| 115 |
-
current_load_mode = "heavy"
|
| 116 |
else:
|
| 117 |
-
current_load_mode = "heavy"
|
| 118 |
|
| 119 |
return current_load_mode, load
|
| 120 |
|
|
@@ -122,42 +265,11 @@ def track_request():
|
|
| 122 |
request_timestamps.append(time.time())
|
| 123 |
cluster_stats["total_requests"] += 1
|
| 124 |
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
def get_least_busy_worker() -> Optional[str]:
|
| 129 |
-
healthy = get_healthy_workers()
|
| 130 |
-
if not healthy:
|
| 131 |
-
return None
|
| 132 |
-
return min(healthy, key=lambda url: worker_health[url]["active_requests"])
|
| 133 |
-
|
| 134 |
-
def select_distributed_workers() -> tuple:
|
| 135 |
-
"""
|
| 136 |
-
Select workers for distributed compute
|
| 137 |
-
Returns: (generators: List[str], decoders: List[str])
|
| 138 |
-
"""
|
| 139 |
-
healthy = get_healthy_workers()
|
| 140 |
-
if len(healthy) < 2:
|
| 141 |
-
return ([healthy[0]], []) if len(healthy) == 1 else ([], [])
|
| 142 |
-
|
| 143 |
-
# Sort by least busy
|
| 144 |
-
sorted_workers = sorted(healthy, key=lambda url: worker_health[url]["active_requests"])
|
| 145 |
-
|
| 146 |
-
if len(healthy) >= 5:
|
| 147 |
-
# OPTIMAL: 1 generator, 4 decoders
|
| 148 |
-
return ([sorted_workers[0]], sorted_workers[1:5])
|
| 149 |
-
elif len(healthy) == 4:
|
| 150 |
-
# 1 generator, 3 decoders
|
| 151 |
-
return ([sorted_workers[0]], sorted_workers[1:4])
|
| 152 |
-
elif len(healthy) == 3:
|
| 153 |
-
# 1 generator, 2 decoders
|
| 154 |
-
return ([sorted_workers[0]], sorted_workers[1:3])
|
| 155 |
-
else:
|
| 156 |
-
# 1 generator, 1 decoder
|
| 157 |
-
return ([sorted_workers[0]], [sorted_workers[1]])
|
| 158 |
|
| 159 |
async def broadcast_stats():
|
| 160 |
-
"""Broadcast stats to all connected WebSocket clients"""
|
| 161 |
if not active_connections:
|
| 162 |
return
|
| 163 |
|
|
@@ -170,13 +282,15 @@ async def broadcast_stats():
|
|
| 170 |
"load": load,
|
| 171 |
"workers": [
|
| 172 |
{
|
| 173 |
-
"url": url.split("//")[1].split(".")[0],
|
| 174 |
"healthy": status["healthy"],
|
| 175 |
"active": status["active_requests"],
|
| 176 |
"total": status["total_requests"],
|
| 177 |
"tokens": status["total_tokens"],
|
| 178 |
"latency": round(status["avg_latency"], 2),
|
| 179 |
-
"role": status["role"]
|
|
|
|
|
|
|
| 180 |
}
|
| 181 |
for url, status in worker_health.items()
|
| 182 |
],
|
|
@@ -189,7 +303,6 @@ async def broadcast_stats():
|
|
| 189 |
}
|
| 190 |
}
|
| 191 |
|
| 192 |
-
# Broadcast to all connections
|
| 193 |
disconnected = set()
|
| 194 |
for ws in active_connections:
|
| 195 |
try:
|
|
@@ -197,36 +310,24 @@ async def broadcast_stats():
|
|
| 197 |
except:
|
| 198 |
disconnected.add(ws)
|
| 199 |
|
| 200 |
-
# Remove disconnected
|
| 201 |
active_connections.difference_update(disconnected)
|
| 202 |
|
| 203 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
try:
|
| 205 |
-
async with httpx.AsyncClient(timeout=5.0) as client:
|
| 206 |
-
response = await client.get(f"{worker_url}/health")
|
| 207 |
-
return response.status_code == 200
|
| 208 |
-
except:
|
| 209 |
-
return False
|
| 210 |
-
|
| 211 |
-
async def health_check_loop():
|
| 212 |
-
while True:
|
| 213 |
-
# Check all workers
|
| 214 |
-
for worker_url in WORKER_URLS:
|
| 215 |
-
healthy = await check_worker_health(worker_url)
|
| 216 |
-
worker_health[worker_url]["healthy"] = healthy
|
| 217 |
-
worker_health[worker_url]["last_check"] = time.time()
|
| 218 |
-
|
| 219 |
-
# Always broadcast stats to connected clients
|
| 220 |
await broadcast_stats()
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
|
| 228 |
# ============================================================================
|
| 229 |
-
# Distributed
|
| 230 |
# ============================================================================
|
| 231 |
|
| 232 |
async def distributed_generation(
|
|
@@ -235,11 +336,7 @@ async def distributed_generation(
|
|
| 235 |
request_data: dict,
|
| 236 |
endpoint: str = "generate"
|
| 237 |
):
|
| 238 |
-
"""
|
| 239 |
-
DISTRIBUTED COMPUTE MODE
|
| 240 |
-
- Generator(s) produce token IDs
|
| 241 |
-
- Multiple decoders process in parallel (load balanced)
|
| 242 |
-
"""
|
| 243 |
|
| 244 |
if not generators or not decoders:
|
| 245 |
return
|
|
@@ -247,15 +344,13 @@ async def distributed_generation(
|
|
| 247 |
token_queue = asyncio.Queue(maxsize=50)
|
| 248 |
text_queue = asyncio.Queue(maxsize=50)
|
| 249 |
|
| 250 |
-
# Mark roles
|
| 251 |
for gen_url in generators:
|
| 252 |
worker_health[gen_url]["role"] = "generator"
|
| 253 |
for dec_url in decoders:
|
| 254 |
worker_health[dec_url]["role"] = "decoder"
|
| 255 |
|
| 256 |
async def generate_tokens():
|
| 257 |
-
|
| 258 |
-
gen_url = generators[0] # primary generator
|
| 259 |
try:
|
| 260 |
worker_health[gen_url]["active_requests"] += 1
|
| 261 |
request_data_tokens = {**request_data, "return_token_ids": True}
|
|
@@ -273,7 +368,6 @@ async def distributed_generation(
|
|
| 273 |
if "token_id" in data:
|
| 274 |
await token_queue.put(data["token_id"])
|
| 275 |
elif "done" in data:
|
| 276 |
-
# Send done signal for each decoder
|
| 277 |
for _ in decoders:
|
| 278 |
await token_queue.put(None)
|
| 279 |
break
|
|
@@ -288,18 +382,16 @@ async def distributed_generation(
|
|
| 288 |
worker_health[gen_url]["role"] = "idle"
|
| 289 |
|
| 290 |
async def decode_tokens(decoder_url: str, decoder_id: int):
|
| 291 |
-
"""Decoder worker - processes tokens from shared queue"""
|
| 292 |
try:
|
| 293 |
worker_health[decoder_url]["active_requests"] += 1
|
| 294 |
batch = []
|
| 295 |
-
batch_size = 2
|
| 296 |
|
| 297 |
while True:
|
| 298 |
try:
|
| 299 |
token_id = await asyncio.wait_for(token_queue.get(), timeout=2.0)
|
| 300 |
|
| 301 |
if token_id is None:
|
| 302 |
-
# Decode remaining batch
|
| 303 |
if batch:
|
| 304 |
async with httpx.AsyncClient(timeout=10.0) as client:
|
| 305 |
response = await client.post(
|
|
@@ -315,7 +407,6 @@ async def distributed_generation(
|
|
| 315 |
|
| 316 |
batch.append(token_id)
|
| 317 |
|
| 318 |
-
# Decode when batch is full
|
| 319 |
if len(batch) >= batch_size:
|
| 320 |
async with httpx.AsyncClient(timeout=10.0) as client:
|
| 321 |
response = await client.post(
|
|
@@ -338,16 +429,12 @@ async def distributed_generation(
|
|
| 338 |
worker_health[decoder_url]["active_requests"] -= 1
|
| 339 |
worker_health[decoder_url]["role"] = "idle"
|
| 340 |
|
| 341 |
-
# Start generator
|
| 342 |
gen_task = asyncio.create_task(generate_tokens())
|
| 343 |
-
|
| 344 |
-
# Start all decoders
|
| 345 |
decoder_tasks = [
|
| 346 |
asyncio.create_task(decode_tokens(dec_url, i))
|
| 347 |
for i, dec_url in enumerate(decoders)
|
| 348 |
]
|
| 349 |
|
| 350 |
-
# Stream results
|
| 351 |
accumulated_text = ""
|
| 352 |
decoders_done = 0
|
| 353 |
total_decoders = len(decoders)
|
|
@@ -393,24 +480,19 @@ async def heavy_load_generation(worker_url: str, request_data: dict, endpoint: s
|
|
| 393 |
worker_health[worker_url]["role"] = "idle"
|
| 394 |
|
| 395 |
# ============================================================================
|
| 396 |
-
#
|
| 397 |
# ============================================================================
|
| 398 |
|
| 399 |
@app.get("/", response_class=HTMLResponse)
|
| 400 |
async def dashboard():
|
| 401 |
-
"""Real-time
|
| 402 |
return """
|
| 403 |
<!DOCTYPE html>
|
| 404 |
<html>
|
| 405 |
<head>
|
| 406 |
-
<title>SAM-Z-1 Cluster Control</title>
|
| 407 |
<style>
|
| 408 |
-
* {
|
| 409 |
-
margin: 0;
|
| 410 |
-
padding: 0;
|
| 411 |
-
box-sizing: border-box;
|
| 412 |
-
}
|
| 413 |
-
|
| 414 |
body {
|
| 415 |
font-family: 'Courier New', monospace;
|
| 416 |
background: linear-gradient(135deg, #0a0e27 0%, #1a1f3a 100%);
|
|
@@ -419,14 +501,12 @@ async def dashboard():
|
|
| 419 |
overflow-x: hidden;
|
| 420 |
overflow-y: auto;
|
| 421 |
}
|
| 422 |
-
|
| 423 |
.container {
|
| 424 |
padding: 20px;
|
| 425 |
max-width: 1400px;
|
| 426 |
margin: 0 auto;
|
| 427 |
padding-bottom: 40px;
|
| 428 |
}
|
| 429 |
-
|
| 430 |
.header {
|
| 431 |
text-align: center;
|
| 432 |
margin-bottom: 30px;
|
|
@@ -436,7 +516,6 @@ async def dashboard():
|
|
| 436 |
border-radius: 10px;
|
| 437 |
box-shadow: 0 0 20px rgba(0, 255, 136, 0.3);
|
| 438 |
}
|
| 439 |
-
|
| 440 |
.header h1 {
|
| 441 |
font-size: 2.5em;
|
| 442 |
text-transform: uppercase;
|
|
@@ -444,18 +523,33 @@ async def dashboard():
|
|
| 444 |
text-shadow: 0 0 10px #00ff88;
|
| 445 |
animation: glow 2s ease-in-out infinite alternate;
|
| 446 |
}
|
| 447 |
-
|
| 448 |
@keyframes glow {
|
| 449 |
from { text-shadow: 0 0 10px #00ff88, 0 0 20px #00ff88; }
|
| 450 |
to { text-shadow: 0 0 20px #00ff88, 0 0 30px #00ff88, 0 0 40px #00ff88; }
|
| 451 |
}
|
| 452 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 453 |
.status-bar {
|
| 454 |
display: flex;
|
| 455 |
gap: 20px;
|
| 456 |
margin-bottom: 30px;
|
| 457 |
}
|
| 458 |
-
|
| 459 |
.stat-card {
|
| 460 |
flex: 1;
|
| 461 |
background: rgba(0, 255, 136, 0.05);
|
|
@@ -465,83 +559,22 @@ async def dashboard():
|
|
| 465 |
position: relative;
|
| 466 |
overflow: hidden;
|
| 467 |
}
|
| 468 |
-
|
| 469 |
-
.stat-card::before {
|
| 470 |
-
content: '';
|
| 471 |
-
position: absolute;
|
| 472 |
-
top: 0;
|
| 473 |
-
left: -100%;
|
| 474 |
-
width: 100%;
|
| 475 |
-
height: 100%;
|
| 476 |
-
background: linear-gradient(90deg, transparent, rgba(0, 255, 136, 0.2), transparent);
|
| 477 |
-
animation: scan 3s infinite;
|
| 478 |
-
}
|
| 479 |
-
|
| 480 |
-
@keyframes scan {
|
| 481 |
-
0% { left: -100%; }
|
| 482 |
-
100% { left: 100%; }
|
| 483 |
-
}
|
| 484 |
-
|
| 485 |
.stat-label {
|
| 486 |
font-size: 0.8em;
|
| 487 |
opacity: 0.7;
|
| 488 |
text-transform: uppercase;
|
| 489 |
}
|
| 490 |
-
|
| 491 |
.stat-value {
|
| 492 |
font-size: 2em;
|
| 493 |
font-weight: bold;
|
| 494 |
margin-top: 5px;
|
| 495 |
}
|
| 496 |
-
|
| 497 |
-
.mode-badge {
|
| 498 |
-
display: inline-block;
|
| 499 |
-
padding: 5px 15px;
|
| 500 |
-
border-radius: 20px;
|
| 501 |
-
font-size: 0.9em;
|
| 502 |
-
font-weight: bold;
|
| 503 |
-
text-transform: uppercase;
|
| 504 |
-
margin-top: 10px;
|
| 505 |
-
}
|
| 506 |
-
|
| 507 |
-
.mode-light {
|
| 508 |
-
background: rgba(0, 255, 136, 0.2);
|
| 509 |
-
border: 1px solid #00ff88;
|
| 510 |
-
color: #00ff88;
|
| 511 |
-
}
|
| 512 |
-
|
| 513 |
-
.mode-heavy {
|
| 514 |
-
background: rgba(255, 68, 68, 0.2);
|
| 515 |
-
border: 1px solid #ff4444;
|
| 516 |
-
color: #ff4444;
|
| 517 |
-
}
|
| 518 |
-
|
| 519 |
.workers-grid {
|
| 520 |
display: grid;
|
| 521 |
grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
|
| 522 |
gap: 20px;
|
| 523 |
margin-bottom: 30px;
|
| 524 |
}
|
| 525 |
-
|
| 526 |
-
@media (max-width: 768px) {
|
| 527 |
-
.workers-grid {
|
| 528 |
-
grid-template-columns: 1fr;
|
| 529 |
-
}
|
| 530 |
-
|
| 531 |
-
.status-bar {
|
| 532 |
-
flex-direction: column;
|
| 533 |
-
}
|
| 534 |
-
|
| 535 |
-
.info-grid {
|
| 536 |
-
grid-template-columns: repeat(2, 1fr);
|
| 537 |
-
}
|
| 538 |
-
|
| 539 |
-
.header h1 {
|
| 540 |
-
font-size: 1.5em;
|
| 541 |
-
letter-spacing: 2px;
|
| 542 |
-
}
|
| 543 |
-
}
|
| 544 |
-
|
| 545 |
.worker-card {
|
| 546 |
background: rgba(10, 14, 39, 0.8);
|
| 547 |
border: 2px solid #00ff88;
|
|
@@ -550,166 +583,89 @@ async def dashboard():
|
|
| 550 |
position: relative;
|
| 551 |
transition: all 0.3s;
|
| 552 |
}
|
| 553 |
-
|
| 554 |
.worker-card:hover {
|
| 555 |
transform: translateY(-5px);
|
| 556 |
box-shadow: 0 5px 30px rgba(0, 255, 136, 0.4);
|
| 557 |
}
|
| 558 |
-
|
| 559 |
.worker-card.offline {
|
| 560 |
border-color: #ff4444;
|
| 561 |
opacity: 0.6;
|
| 562 |
}
|
| 563 |
-
|
| 564 |
.worker-header {
|
| 565 |
display: flex;
|
| 566 |
justify-content: space-between;
|
| 567 |
align-items: center;
|
| 568 |
margin-bottom: 15px;
|
| 569 |
}
|
| 570 |
-
|
| 571 |
.worker-name {
|
| 572 |
font-size: 1.2em;
|
| 573 |
font-weight: bold;
|
| 574 |
}
|
| 575 |
-
|
| 576 |
.status-dot {
|
| 577 |
width: 12px;
|
| 578 |
height: 12px;
|
| 579 |
border-radius: 50%;
|
| 580 |
animation: pulse 2s infinite;
|
| 581 |
}
|
| 582 |
-
|
| 583 |
.status-dot.online {
|
| 584 |
background: #00ff88;
|
| 585 |
box-shadow: 0 0 10px #00ff88;
|
| 586 |
}
|
| 587 |
-
|
| 588 |
.status-dot.offline {
|
| 589 |
background: #ff4444;
|
| 590 |
box-shadow: 0 0 10px #ff4444;
|
| 591 |
}
|
| 592 |
-
|
| 593 |
@keyframes pulse {
|
| 594 |
0%, 100% { opacity: 1; }
|
| 595 |
50% { opacity: 0.5; }
|
| 596 |
}
|
| 597 |
-
|
| 598 |
.worker-stats {
|
| 599 |
display: grid;
|
| 600 |
grid-template-columns: repeat(2, 1fr);
|
| 601 |
gap: 10px;
|
| 602 |
margin-top: 15px;
|
| 603 |
}
|
| 604 |
-
|
| 605 |
.worker-stat {
|
| 606 |
background: rgba(0, 255, 136, 0.05);
|
| 607 |
padding: 10px;
|
| 608 |
border-radius: 5px;
|
| 609 |
}
|
| 610 |
-
|
| 611 |
.worker-stat-label {
|
| 612 |
font-size: 0.7em;
|
| 613 |
opacity: 0.7;
|
| 614 |
}
|
| 615 |
-
|
| 616 |
.worker-stat-value {
|
| 617 |
font-size: 1.3em;
|
| 618 |
font-weight: bold;
|
| 619 |
margin-top: 3px;
|
| 620 |
}
|
| 621 |
-
|
| 622 |
-
.role-badge {
|
| 623 |
-
display: inline-block;
|
| 624 |
-
padding: 3px 10px;
|
| 625 |
-
border-radius: 12px;
|
| 626 |
-
font-size: 0.75em;
|
| 627 |
-
margin-top: 10px;
|
| 628 |
-
font-weight: bold;
|
| 629 |
-
}
|
| 630 |
-
|
| 631 |
-
.role-generator {
|
| 632 |
-
background: rgba(255, 165, 0, 0.2);
|
| 633 |
-
border: 1px solid #ffa500;
|
| 634 |
-
color: #ffa500;
|
| 635 |
-
}
|
| 636 |
-
|
| 637 |
-
.role-decoder {
|
| 638 |
-
background: rgba(0, 191, 255, 0.2);
|
| 639 |
-
border: 1px solid #00bfff;
|
| 640 |
-
color: #00bfff;
|
| 641 |
-
}
|
| 642 |
-
|
| 643 |
-
.role-full {
|
| 644 |
-
background: rgba(138, 43, 226, 0.2);
|
| 645 |
-
border: 1px solid #8a2be2;
|
| 646 |
-
color: #8a2be2;
|
| 647 |
-
}
|
| 648 |
-
|
| 649 |
-
.role-idle {
|
| 650 |
-
background: rgba(128, 128, 128, 0.2);
|
| 651 |
-
border: 1px solid #808080;
|
| 652 |
-
color: #808080;
|
| 653 |
-
}
|
| 654 |
-
|
| 655 |
-
.progress-bar {
|
| 656 |
-
width: 100%;
|
| 657 |
-
height: 4px;
|
| 658 |
-
background: rgba(0, 255, 136, 0.1);
|
| 659 |
-
border-radius: 2px;
|
| 660 |
-
margin-top: 10px;
|
| 661 |
-
overflow: hidden;
|
| 662 |
-
}
|
| 663 |
-
|
| 664 |
-
.progress-fill {
|
| 665 |
-
height: 100%;
|
| 666 |
-
background: linear-gradient(90deg, #00ff88, #00ffff);
|
| 667 |
-
transition: width 0.3s;
|
| 668 |
-
box-shadow: 0 0 10px #00ff88;
|
| 669 |
-
}
|
| 670 |
-
|
| 671 |
-
.cluster-info {
|
| 672 |
-
background: rgba(0, 255, 136, 0.05);
|
| 673 |
-
border: 1px solid #00ff88;
|
| 674 |
-
border-radius: 8px;
|
| 675 |
-
padding: 20px;
|
| 676 |
-
}
|
| 677 |
-
|
| 678 |
-
.info-grid {
|
| 679 |
-
display: grid;
|
| 680 |
-
grid-template-columns: repeat(4, 1fr);
|
| 681 |
-
gap: 20px;
|
| 682 |
-
}
|
| 683 |
-
|
| 684 |
-
.info-item {
|
| 685 |
-
text-align: center;
|
| 686 |
-
}
|
| 687 |
-
|
| 688 |
.timestamp {
|
| 689 |
text-align: center;
|
| 690 |
margin-top: 20px;
|
| 691 |
opacity: 0.5;
|
| 692 |
font-size: 0.9em;
|
| 693 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 694 |
</style>
|
| 695 |
</head>
|
| 696 |
<body>
|
| 697 |
<div class="container">
|
| 698 |
<div class="header">
|
| 699 |
<h1>⚡ SAM-Z-1 CLUSTER ⚡</h1>
|
| 700 |
-
<div>DISTRIBUTED COMPUTE SYSTEM
|
| 701 |
</div>
|
| 702 |
|
| 703 |
<div class="status-bar">
|
| 704 |
<div class="stat-card">
|
| 705 |
<div class="stat-label">Load Mode</div>
|
| 706 |
<div class="stat-value" id="mode">--</div>
|
| 707 |
-
<div class="mode-badge" id="mode-badge">INITIALIZING</div>
|
| 708 |
</div>
|
| 709 |
<div class="stat-card">
|
| 710 |
<div class="stat-label">Current Load</div>
|
| 711 |
<div class="stat-value" id="load">0</div>
|
| 712 |
-
<div class="stat-label">requests / 10s</div>
|
| 713 |
</div>
|
| 714 |
<div class="stat-card">
|
| 715 |
<div class="stat-label">Total Requests</div>
|
|
@@ -721,149 +677,47 @@ async def dashboard():
|
|
| 721 |
</div>
|
| 722 |
</div>
|
| 723 |
|
| 724 |
-
<div class="workers-grid" id="workers">
|
| 725 |
-
<!-- Workers populated by JS -->
|
| 726 |
-
</div>
|
| 727 |
-
|
| 728 |
-
<div class="cluster-info">
|
| 729 |
-
<div class="stat-label" style="margin-bottom: 15px;">CLUSTER STATISTICS</div>
|
| 730 |
-
<div class="info-grid">
|
| 731 |
-
<div class="info-item">
|
| 732 |
-
<div class="stat-label">Successful</div>
|
| 733 |
-
<div class="stat-value" style="font-size: 1.5em;" id="success">0</div>
|
| 734 |
-
</div>
|
| 735 |
-
<div class="info-item">
|
| 736 |
-
<div class="stat-label">Failed</div>
|
| 737 |
-
<div class="stat-value" style="font-size: 1.5em;" id="failed">0</div>
|
| 738 |
-
</div>
|
| 739 |
-
<div class="info-item">
|
| 740 |
-
<div class="stat-label">Uptime</div>
|
| 741 |
-
<div class="stat-value" style="font-size: 1.5em;" id="uptime">0s</div>
|
| 742 |
-
</div>
|
| 743 |
-
<div class="info-item">
|
| 744 |
-
<div class="stat-label">Healthy Workers</div>
|
| 745 |
-
<div class="stat-value" style="font-size: 1.5em;" id="healthy">0</div>
|
| 746 |
-
</div>
|
| 747 |
-
</div>
|
| 748 |
-
</div>
|
| 749 |
|
| 750 |
<div class="timestamp" id="timestamp">Last update: --</div>
|
| 751 |
</div>
|
| 752 |
|
| 753 |
<script>
|
| 754 |
-
// Use wss:// for HTTPS, ws:// for HTTP
|
| 755 |
const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
|
| 756 |
let ws;
|
| 757 |
-
let usePolling = false;
|
| 758 |
|
| 759 |
function connectWebSocket() {
|
| 760 |
try {
|
| 761 |
ws = new WebSocket(`${protocol}//${window.location.host}/ws`);
|
| 762 |
|
| 763 |
-
ws.onopen = () =>
|
| 764 |
-
|
| 765 |
-
|
| 766 |
-
|
| 767 |
-
|
| 768 |
-
ws.onmessage = (event) => {
|
| 769 |
-
const data = JSON.parse(event.data);
|
| 770 |
-
updateDashboard(data);
|
| 771 |
-
};
|
| 772 |
-
|
| 773 |
-
ws.onerror = (error) => {
|
| 774 |
-
console.error('❌ WebSocket error, switching to polling');
|
| 775 |
-
usePolling = true;
|
| 776 |
-
startPolling();
|
| 777 |
-
};
|
| 778 |
-
|
| 779 |
-
ws.onclose = () => {
|
| 780 |
-
console.log('🔌 WebSocket disconnected');
|
| 781 |
-
if (!usePolling) {
|
| 782 |
-
setTimeout(connectWebSocket, 3000);
|
| 783 |
-
}
|
| 784 |
-
};
|
| 785 |
} catch (e) {
|
| 786 |
-
console.error('Failed to connect WebSocket
|
| 787 |
-
usePolling = true;
|
| 788 |
-
startPolling();
|
| 789 |
}
|
| 790 |
}
|
| 791 |
|
| 792 |
-
async function pollStats() {
|
| 793 |
-
if (!usePolling) return;
|
| 794 |
-
|
| 795 |
-
try {
|
| 796 |
-
const response = await fetch('/api/status');
|
| 797 |
-
const data = await response.json();
|
| 798 |
-
|
| 799 |
-
// Fetch worker stats too
|
| 800 |
-
const workersRes = await fetch('/workers');
|
| 801 |
-
const workersData = await workersRes.json();
|
| 802 |
-
|
| 803 |
-
// Format data like WebSocket
|
| 804 |
-
const formattedData = {
|
| 805 |
-
timestamp: Date.now() / 1000,
|
| 806 |
-
mode: data.mode,
|
| 807 |
-
load: data.current_load,
|
| 808 |
-
workers: workersData.workers.map(w => ({
|
| 809 |
-
url: w.url.split("//")[1].split(".")[0],
|
| 810 |
-
healthy: w.healthy,
|
| 811 |
-
active: w.active_requests || 0,
|
| 812 |
-
total: 0,
|
| 813 |
-
tokens: 0,
|
| 814 |
-
latency: 0,
|
| 815 |
-
role: "idle"
|
| 816 |
-
})),
|
| 817 |
-
cluster: {
|
| 818 |
-
total_requests: 0,
|
| 819 |
-
successful: 0,
|
| 820 |
-
failed: 0,
|
| 821 |
-
uptime: 0,
|
| 822 |
-
rps: 0
|
| 823 |
-
}
|
| 824 |
-
};
|
| 825 |
-
|
| 826 |
-
updateDashboard(formattedData);
|
| 827 |
-
} catch (e) {
|
| 828 |
-
console.error('Polling error:', e);
|
| 829 |
-
}
|
| 830 |
-
}
|
| 831 |
-
|
| 832 |
-
function startPolling() {
|
| 833 |
-
pollStats();
|
| 834 |
-
setInterval(pollStats, 1000);
|
| 835 |
-
}
|
| 836 |
-
|
| 837 |
-
// Try WebSocket first
|
| 838 |
connectWebSocket();
|
| 839 |
|
| 840 |
function updateDashboard(data) {
|
| 841 |
-
// Mode
|
| 842 |
document.getElementById('mode').textContent = data.mode.toUpperCase();
|
| 843 |
-
const modeBadge = document.getElementById('mode-badge');
|
| 844 |
-
modeBadge.textContent = `${data.mode.toUpperCase()} MODE`;
|
| 845 |
-
modeBadge.className = `mode-badge mode-${data.mode}`;
|
| 846 |
-
|
| 847 |
-
// Stats
|
| 848 |
document.getElementById('load').textContent = data.load;
|
| 849 |
document.getElementById('total-req').textContent = data.cluster.total_requests;
|
| 850 |
document.getElementById('rps').textContent = data.cluster.rps;
|
| 851 |
-
document.getElementById('success').textContent = data.cluster.successful;
|
| 852 |
-
document.getElementById('failed').textContent = data.cluster.failed;
|
| 853 |
-
document.getElementById('uptime').textContent = formatUptime(data.cluster.uptime);
|
| 854 |
|
| 855 |
-
// Workers
|
| 856 |
const workersDiv = document.getElementById('workers');
|
| 857 |
-
const healthyCount = data.workers.filter(w => w.healthy).length;
|
| 858 |
-
document.getElementById('healthy').textContent = `${healthyCount}/${data.workers.length}`;
|
| 859 |
-
|
| 860 |
workersDiv.innerHTML = data.workers.map(worker => `
|
| 861 |
<div class="worker-card ${worker.healthy ? '' : 'offline'}">
|
| 862 |
<div class="worker-header">
|
| 863 |
-
<div
|
|
|
|
|
|
|
|
|
|
|
|
|
| 864 |
<div class="status-dot ${worker.healthy ? 'online' : 'offline'}"></div>
|
| 865 |
</div>
|
| 866 |
-
<div class="role-badge role-${worker.role}">${worker.role.toUpperCase()}</div>
|
| 867 |
<div class="worker-stats">
|
| 868 |
<div class="worker-stat">
|
| 869 |
<div class="worker-stat-label">Active</div>
|
|
@@ -878,69 +732,46 @@ async def dashboard():
|
|
| 878 |
<div class="worker-stat-value">${worker.tokens}</div>
|
| 879 |
</div>
|
| 880 |
<div class="worker-stat">
|
| 881 |
-
<div class="worker-stat-label">
|
| 882 |
-
<div class="worker-stat-value">${worker.
|
| 883 |
</div>
|
| 884 |
</div>
|
| 885 |
-
<div class="progress-bar">
|
| 886 |
-
<div class="progress-fill" style="width: ${Math.min(worker.active * 33, 100)}%"></div>
|
| 887 |
-
</div>
|
| 888 |
</div>
|
| 889 |
`).join('');
|
| 890 |
|
| 891 |
-
// Timestamp
|
| 892 |
-
const now = new Date();
|
| 893 |
document.getElementById('timestamp').textContent =
|
| 894 |
-
`Last update: ${
|
| 895 |
-
}
|
| 896 |
-
|
| 897 |
-
function formatUptime(seconds) {
|
| 898 |
-
const h = Math.floor(seconds / 3600);
|
| 899 |
-
const m = Math.floor((seconds % 3600) / 60);
|
| 900 |
-
const s = Math.floor(seconds % 60);
|
| 901 |
-
return `${h}h ${m}m ${s}s`;
|
| 902 |
}
|
| 903 |
</script>
|
| 904 |
</body>
|
| 905 |
</html>
|
| 906 |
"""
|
| 907 |
|
| 908 |
-
@app.websocket("/ws")
|
| 909 |
-
async def websocket_endpoint(websocket: WebSocket):
|
| 910 |
-
"""WebSocket for real-time dashboard updates"""
|
| 911 |
-
await websocket.accept()
|
| 912 |
-
active_connections.add(websocket)
|
| 913 |
-
|
| 914 |
-
try:
|
| 915 |
-
# Send initial data
|
| 916 |
-
await broadcast_stats()
|
| 917 |
-
|
| 918 |
-
# Keep connection alive
|
| 919 |
-
while True:
|
| 920 |
-
await websocket.receive_text()
|
| 921 |
-
except:
|
| 922 |
-
pass
|
| 923 |
-
finally:
|
| 924 |
-
active_connections.discard(websocket)
|
| 925 |
-
|
| 926 |
-
# ============================================================================
|
| 927 |
-
# API Endpoints
|
| 928 |
-
# ============================================================================
|
| 929 |
-
|
| 930 |
@app.get("/api/status")
|
| 931 |
async def api_status():
|
| 932 |
-
"""JSON API for status"""
|
| 933 |
mode, load = update_load_mode()
|
| 934 |
healthy_count = len(get_healthy_workers())
|
| 935 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 936 |
return {
|
| 937 |
"name": "SAM-Z-1 Distributed Cluster",
|
| 938 |
-
"version": "
|
| 939 |
"mode": mode,
|
| 940 |
"current_load": load,
|
| 941 |
"workers": len(WORKER_URLS),
|
| 942 |
"healthy_workers": healthy_count,
|
| 943 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 944 |
}
|
| 945 |
|
| 946 |
@app.get("/health")
|
|
@@ -951,16 +782,34 @@ async def health():
|
|
| 951 |
"workers_healthy": healthy_count
|
| 952 |
}
|
| 953 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 954 |
@app.post("/v1/generate")
|
| 955 |
async def generate(request: GenerateRequest):
|
| 956 |
-
"""Generate text with
|
| 957 |
track_request()
|
| 958 |
mode, load = update_load_mode()
|
| 959 |
|
| 960 |
-
|
| 961 |
-
|
|
|
|
|
|
|
| 962 |
cluster_stats["failed_requests"] += 1
|
| 963 |
-
raise HTTPException(
|
|
|
|
|
|
|
|
|
|
| 964 |
|
| 965 |
request_data = {
|
| 966 |
"prompt": request.prompt,
|
|
@@ -972,12 +821,15 @@ async def generate(request: GenerateRequest):
|
|
| 972 |
"stream": True
|
| 973 |
}
|
| 974 |
|
| 975 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 976 |
|
| 977 |
try:
|
| 978 |
-
if mode == "light" and len(
|
| 979 |
-
|
| 980 |
-
generators, decoders = select_distributed_workers()
|
| 981 |
if decoders:
|
| 982 |
cluster_stats["successful_requests"] += 1
|
| 983 |
return StreamingResponse(
|
|
@@ -985,8 +837,7 @@ async def generate(request: GenerateRequest):
|
|
| 985 |
media_type="text/event-stream"
|
| 986 |
)
|
| 987 |
|
| 988 |
-
|
| 989 |
-
worker = get_least_busy_worker()
|
| 990 |
cluster_stats["successful_requests"] += 1
|
| 991 |
return StreamingResponse(
|
| 992 |
heavy_load_generation(worker, request_data, "generate"),
|
|
@@ -998,14 +849,19 @@ async def generate(request: GenerateRequest):
|
|
| 998 |
|
| 999 |
@app.post("/v1/chat")
|
| 1000 |
async def chat(request: ChatRequest):
|
| 1001 |
-
"""Chat with
|
| 1002 |
track_request()
|
| 1003 |
mode, load = update_load_mode()
|
| 1004 |
|
| 1005 |
-
|
| 1006 |
-
|
|
|
|
|
|
|
| 1007 |
cluster_stats["failed_requests"] += 1
|
| 1008 |
-
raise HTTPException(
|
|
|
|
|
|
|
|
|
|
| 1009 |
|
| 1010 |
request_data = {
|
| 1011 |
"messages": [{"role": m.role, "content": m.content} for m in request.messages],
|
|
@@ -1017,12 +873,15 @@ async def chat(request: ChatRequest):
|
|
| 1017 |
"stream": True
|
| 1018 |
}
|
| 1019 |
|
| 1020 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1021 |
|
| 1022 |
try:
|
| 1023 |
-
if mode == "light" and len(
|
| 1024 |
-
|
| 1025 |
-
generators, decoders = select_distributed_workers()
|
| 1026 |
if decoders:
|
| 1027 |
cluster_stats["successful_requests"] += 1
|
| 1028 |
return StreamingResponse(
|
|
@@ -1030,8 +889,7 @@ async def chat(request: ChatRequest):
|
|
| 1030 |
media_type="text/event-stream"
|
| 1031 |
)
|
| 1032 |
|
| 1033 |
-
|
| 1034 |
-
worker = get_least_busy_worker()
|
| 1035 |
cluster_stats["successful_requests"] += 1
|
| 1036 |
return StreamingResponse(
|
| 1037 |
heavy_load_generation(worker, request_data, "chat"),
|
|
|
|
| 1 |
"""
|
| 2 |
+
SAM-Z-1 Distributed Compute Cluster Head Node v5.0
|
| 3 |
- Smart load balancing with distributed compute
|
| 4 |
- Real-time status dashboard
|
| 5 |
+
- Auto-detects worker version (v4 vs v5)
|
| 6 |
+
- Supports 4 new models with backward compatibility
|
| 7 |
"""
|
| 8 |
|
| 9 |
from fastapi import FastAPI, HTTPException, WebSocket
|
|
|
|
| 17 |
from collections import deque
|
| 18 |
import random
|
| 19 |
|
| 20 |
+
app = FastAPI(title="SAM-Z-1 Distributed Cluster", version="5.0.0")
|
| 21 |
|
| 22 |
# ============================================================================
|
| 23 |
# Configuration
|
|
|
|
| 32 |
"https://bc-ai-worker-5.hf.space"
|
| 33 |
]
|
| 34 |
|
| 35 |
+
HEALTH_CHECK_INTERVAL = 5
|
| 36 |
LOAD_CHECK_WINDOW = 10
|
| 37 |
|
| 38 |
LIGHT_LOAD_THRESHOLD = 2
|
| 39 |
HEAVY_LOAD_THRESHOLD = 5
|
| 40 |
|
| 41 |
+
# New models added in v5
|
| 42 |
+
NEW_MODELS = [
|
| 43 |
+
"SAM-X-1-Large",
|
| 44 |
+
"SAM-X-1-Fast",
|
| 45 |
+
"SAM-X-1-Mini",
|
| 46 |
+
"SAM-X-1-Nano"
|
| 47 |
+
]
|
| 48 |
+
|
| 49 |
+
# Worker state with version detection
|
| 50 |
worker_health = {
|
| 51 |
url: {
|
| 52 |
"healthy": True,
|
|
|
|
| 55 |
"total_requests": 0,
|
| 56 |
"total_tokens": 0,
|
| 57 |
"avg_latency": 0,
|
| 58 |
+
"role": "idle",
|
| 59 |
+
"version": None, # Will be auto-detected: "v4" or "v5"
|
| 60 |
+
"supports_models": [] # Models this worker supports
|
| 61 |
} for url in WORKER_URLS
|
| 62 |
}
|
| 63 |
|
| 64 |
request_timestamps = deque(maxlen=100)
|
| 65 |
+
current_load_mode = "light"
|
| 66 |
cluster_stats = {
|
| 67 |
"total_requests": 0,
|
| 68 |
"successful_requests": 0,
|
|
|
|
| 70 |
"uptime_start": time.time()
|
| 71 |
}
|
| 72 |
|
|
|
|
| 73 |
active_connections = set()
|
| 74 |
|
| 75 |
# ============================================================================
|
|
|
|
| 84 |
top_p: float = 0.9
|
| 85 |
repetition_penalty: float = 1.1
|
| 86 |
stream: bool = True
|
| 87 |
+
model: Optional[str] = None # NEW: Model selection
|
| 88 |
|
| 89 |
class ChatMessage(BaseModel):
|
| 90 |
role: str
|
|
|
|
| 98 |
top_p: float = 0.9
|
| 99 |
repetition_penalty: float = 1.1
|
| 100 |
stream: bool = True
|
| 101 |
+
model: Optional[str] = None # NEW: Model selection
|
| 102 |
+
|
| 103 |
+
# ============================================================================
|
| 104 |
+
# Worker Version Detection
|
| 105 |
+
# ============================================================================
|
| 106 |
+
|
| 107 |
+
async def detect_worker_version(worker_url: str) -> tuple:
|
| 108 |
+
"""
|
| 109 |
+
Detect worker version and supported models
|
| 110 |
+
Returns: (version: str, supported_models: List[str])
|
| 111 |
+
"""
|
| 112 |
+
try:
|
| 113 |
+
async with httpx.AsyncClient(timeout=10.0) as client:
|
| 114 |
+
# Try to get worker info endpoint (v5 feature)
|
| 115 |
+
try:
|
| 116 |
+
response = await client.get(f"{worker_url}/info")
|
| 117 |
+
if response.status_code == 200:
|
| 118 |
+
data = response.json()
|
| 119 |
+
version = data.get("version", "v5")
|
| 120 |
+
models = data.get("models", NEW_MODELS)
|
| 121 |
+
return version, models
|
| 122 |
+
except:
|
| 123 |
+
pass
|
| 124 |
+
|
| 125 |
+
# Try to get models list (v5 feature)
|
| 126 |
+
try:
|
| 127 |
+
response = await client.get(f"{worker_url}/models")
|
| 128 |
+
if response.status_code == 200:
|
| 129 |
+
data = response.json()
|
| 130 |
+
models = data.get("models", [])
|
| 131 |
+
if models:
|
| 132 |
+
return "v5", models
|
| 133 |
+
except:
|
| 134 |
+
pass
|
| 135 |
+
|
| 136 |
+
# Fallback: worker is v4 (no model selection)
|
| 137 |
+
return "v4", []
|
| 138 |
+
|
| 139 |
+
except Exception as e:
|
| 140 |
+
print(f"⚠️ Version detection failed for {worker_url}: {e}")
|
| 141 |
+
return "v4", []
|
| 142 |
+
|
| 143 |
+
async def check_worker_health(worker_url: str) -> bool:
|
| 144 |
+
"""Check if worker is healthy"""
|
| 145 |
+
try:
|
| 146 |
+
async with httpx.AsyncClient(timeout=5.0) as client:
|
| 147 |
+
response = await client.get(f"{worker_url}/health")
|
| 148 |
+
return response.status_code == 200
|
| 149 |
+
except:
|
| 150 |
+
return False
|
| 151 |
+
|
| 152 |
+
async def health_check_loop():
|
| 153 |
+
"""Health check with version detection"""
|
| 154 |
+
while True:
|
| 155 |
+
for worker_url in WORKER_URLS:
|
| 156 |
+
# Check health
|
| 157 |
+
healthy = await check_worker_health(worker_url)
|
| 158 |
+
worker_health[worker_url]["healthy"] = healthy
|
| 159 |
+
worker_health[worker_url]["last_check"] = time.time()
|
| 160 |
+
|
| 161 |
+
# Detect version if not yet detected
|
| 162 |
+
if worker_health[worker_url]["version"] is None:
|
| 163 |
+
version, models = await detect_worker_version(worker_url)
|
| 164 |
+
worker_health[worker_url]["version"] = version
|
| 165 |
+
worker_health[worker_url]["supports_models"] = models
|
| 166 |
+
|
| 167 |
+
status = "✅" if healthy else "❌"
|
| 168 |
+
print(f"{status} Worker: {worker_url.split('//')[1].split('.')[0]} | Version: {version} | Models: {len(models)}")
|
| 169 |
+
|
| 170 |
+
await broadcast_stats()
|
| 171 |
+
await asyncio.sleep(HEALTH_CHECK_INTERVAL)
|
| 172 |
+
|
| 173 |
+
@app.on_event("startup")
|
| 174 |
+
async def startup_event():
|
| 175 |
+
asyncio.create_task(health_check_loop())
|
| 176 |
+
|
| 177 |
+
# ============================================================================
|
| 178 |
+
# Smart Worker Selection
|
| 179 |
+
# ============================================================================
|
| 180 |
+
|
| 181 |
+
def get_workers_for_model(model_name: Optional[str]) -> List[str]:
|
| 182 |
+
"""Get workers that support the requested model"""
|
| 183 |
+
healthy = get_healthy_workers()
|
| 184 |
+
|
| 185 |
+
if not model_name:
|
| 186 |
+
# No specific model requested, use any healthy worker
|
| 187 |
+
return healthy
|
| 188 |
+
|
| 189 |
+
# Filter workers by model support
|
| 190 |
+
compatible = []
|
| 191 |
+
for url in healthy:
|
| 192 |
+
version = worker_health[url]["version"]
|
| 193 |
+
models = worker_health[url]["supports_models"]
|
| 194 |
+
|
| 195 |
+
if version == "v5" and model_name in models:
|
| 196 |
+
# v5 worker with explicit model support
|
| 197 |
+
compatible.append(url)
|
| 198 |
+
elif version == "v4":
|
| 199 |
+
# v4 workers don't support model selection but work with default
|
| 200 |
+
compatible.append(url)
|
| 201 |
+
|
| 202 |
+
return compatible if compatible else healthy
|
| 203 |
+
|
| 204 |
+
def get_healthy_workers() -> List[str]:
|
| 205 |
+
return [url for url, status in worker_health.items() if status["healthy"]]
|
| 206 |
+
|
| 207 |
+
def get_least_busy_worker(worker_list: List[str] = None) -> Optional[str]:
|
| 208 |
+
workers = worker_list if worker_list is not None else get_healthy_workers()
|
| 209 |
+
if not workers:
|
| 210 |
+
return None
|
| 211 |
+
return min(workers, key=lambda url: worker_health[url]["active_requests"])
|
| 212 |
+
|
| 213 |
+
def select_distributed_workers(model_name: Optional[str] = None) -> tuple:
|
| 214 |
+
"""
|
| 215 |
+
Select workers for distributed compute with model compatibility
|
| 216 |
+
Returns: (generators: List[str], decoders: List[str])
|
| 217 |
+
"""
|
| 218 |
+
compatible = get_workers_for_model(model_name)
|
| 219 |
+
|
| 220 |
+
if len(compatible) < 2:
|
| 221 |
+
return ([compatible[0]], []) if len(compatible) == 1 else ([], [])
|
| 222 |
+
|
| 223 |
+
sorted_workers = sorted(compatible, key=lambda url: worker_health[url]["active_requests"])
|
| 224 |
+
|
| 225 |
+
if len(compatible) >= 5:
|
| 226 |
+
return ([sorted_workers[0]], sorted_workers[1:5])
|
| 227 |
+
elif len(compatible) == 4:
|
| 228 |
+
return ([sorted_workers[0]], sorted_workers[1:4])
|
| 229 |
+
elif len(compatible) == 3:
|
| 230 |
+
return ([sorted_workers[0]], sorted_workers[1:3])
|
| 231 |
+
else:
|
| 232 |
+
return ([sorted_workers[0]], [sorted_workers[1]])
|
| 233 |
|
| 234 |
# ============================================================================
|
| 235 |
# Load Management
|
|
|
|
| 244 |
load = get_current_load()
|
| 245 |
healthy_count = len(get_healthy_workers())
|
| 246 |
|
|
|
|
| 247 |
if healthy_count >= 5:
|
| 248 |
if load <= LIGHT_LOAD_THRESHOLD:
|
| 249 |
+
current_load_mode = "light"
|
| 250 |
+
elif load <= HEAVY_LOAD_THRESHOLD:
|
| 251 |
+
current_load_mode = "medium"
|
| 252 |
else:
|
| 253 |
+
current_load_mode = "heavy"
|
| 254 |
elif healthy_count >= 3:
|
| 255 |
if load <= 2:
|
| 256 |
+
current_load_mode = "light"
|
| 257 |
else:
|
| 258 |
+
current_load_mode = "heavy"
|
| 259 |
else:
|
| 260 |
+
current_load_mode = "heavy"
|
| 261 |
|
| 262 |
return current_load_mode, load
|
| 263 |
|
|
|
|
| 265 |
request_timestamps.append(time.time())
|
| 266 |
cluster_stats["total_requests"] += 1
|
| 267 |
|
| 268 |
+
# ============================================================================
|
| 269 |
+
# Dashboard & WebSocket
|
| 270 |
+
# ============================================================================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 271 |
|
| 272 |
async def broadcast_stats():
|
|
|
|
| 273 |
if not active_connections:
|
| 274 |
return
|
| 275 |
|
|
|
|
| 282 |
"load": load,
|
| 283 |
"workers": [
|
| 284 |
{
|
| 285 |
+
"url": url.split("//")[1].split(".")[0],
|
| 286 |
"healthy": status["healthy"],
|
| 287 |
"active": status["active_requests"],
|
| 288 |
"total": status["total_requests"],
|
| 289 |
"tokens": status["total_tokens"],
|
| 290 |
"latency": round(status["avg_latency"], 2),
|
| 291 |
+
"role": status["role"],
|
| 292 |
+
"version": status["version"] or "detecting...",
|
| 293 |
+
"models": len(status["supports_models"])
|
| 294 |
}
|
| 295 |
for url, status in worker_health.items()
|
| 296 |
],
|
|
|
|
| 303 |
}
|
| 304 |
}
|
| 305 |
|
|
|
|
| 306 |
disconnected = set()
|
| 307 |
for ws in active_connections:
|
| 308 |
try:
|
|
|
|
| 310 |
except:
|
| 311 |
disconnected.add(ws)
|
| 312 |
|
|
|
|
| 313 |
active_connections.difference_update(disconnected)
|
| 314 |
|
| 315 |
+
@app.websocket("/ws")
|
| 316 |
+
async def websocket_endpoint(websocket: WebSocket):
|
| 317 |
+
await websocket.accept()
|
| 318 |
+
active_connections.add(websocket)
|
| 319 |
+
|
| 320 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 321 |
await broadcast_stats()
|
| 322 |
+
while True:
|
| 323 |
+
await websocket.receive_text()
|
| 324 |
+
except:
|
| 325 |
+
pass
|
| 326 |
+
finally:
|
| 327 |
+
active_connections.discard(websocket)
|
| 328 |
|
| 329 |
# ============================================================================
|
| 330 |
+
# Distributed Generation
|
| 331 |
# ============================================================================
|
| 332 |
|
| 333 |
async def distributed_generation(
|
|
|
|
| 336 |
request_data: dict,
|
| 337 |
endpoint: str = "generate"
|
| 338 |
):
|
| 339 |
+
"""Distributed compute with v4/v5 compatibility"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 340 |
|
| 341 |
if not generators or not decoders:
|
| 342 |
return
|
|
|
|
| 344 |
token_queue = asyncio.Queue(maxsize=50)
|
| 345 |
text_queue = asyncio.Queue(maxsize=50)
|
| 346 |
|
|
|
|
| 347 |
for gen_url in generators:
|
| 348 |
worker_health[gen_url]["role"] = "generator"
|
| 349 |
for dec_url in decoders:
|
| 350 |
worker_health[dec_url]["role"] = "decoder"
|
| 351 |
|
| 352 |
async def generate_tokens():
|
| 353 |
+
gen_url = generators[0]
|
|
|
|
| 354 |
try:
|
| 355 |
worker_health[gen_url]["active_requests"] += 1
|
| 356 |
request_data_tokens = {**request_data, "return_token_ids": True}
|
|
|
|
| 368 |
if "token_id" in data:
|
| 369 |
await token_queue.put(data["token_id"])
|
| 370 |
elif "done" in data:
|
|
|
|
| 371 |
for _ in decoders:
|
| 372 |
await token_queue.put(None)
|
| 373 |
break
|
|
|
|
| 382 |
worker_health[gen_url]["role"] = "idle"
|
| 383 |
|
| 384 |
async def decode_tokens(decoder_url: str, decoder_id: int):
|
|
|
|
| 385 |
try:
|
| 386 |
worker_health[decoder_url]["active_requests"] += 1
|
| 387 |
batch = []
|
| 388 |
+
batch_size = 2
|
| 389 |
|
| 390 |
while True:
|
| 391 |
try:
|
| 392 |
token_id = await asyncio.wait_for(token_queue.get(), timeout=2.0)
|
| 393 |
|
| 394 |
if token_id is None:
|
|
|
|
| 395 |
if batch:
|
| 396 |
async with httpx.AsyncClient(timeout=10.0) as client:
|
| 397 |
response = await client.post(
|
|
|
|
| 407 |
|
| 408 |
batch.append(token_id)
|
| 409 |
|
|
|
|
| 410 |
if len(batch) >= batch_size:
|
| 411 |
async with httpx.AsyncClient(timeout=10.0) as client:
|
| 412 |
response = await client.post(
|
|
|
|
| 429 |
worker_health[decoder_url]["active_requests"] -= 1
|
| 430 |
worker_health[decoder_url]["role"] = "idle"
|
| 431 |
|
|
|
|
| 432 |
gen_task = asyncio.create_task(generate_tokens())
|
|
|
|
|
|
|
| 433 |
decoder_tasks = [
|
| 434 |
asyncio.create_task(decode_tokens(dec_url, i))
|
| 435 |
for i, dec_url in enumerate(decoders)
|
| 436 |
]
|
| 437 |
|
|
|
|
| 438 |
accumulated_text = ""
|
| 439 |
decoders_done = 0
|
| 440 |
total_decoders = len(decoders)
|
|
|
|
| 480 |
worker_health[worker_url]["role"] = "idle"
|
| 481 |
|
| 482 |
# ============================================================================
|
| 483 |
+
# API Endpoints
|
| 484 |
# ============================================================================
|
| 485 |
|
| 486 |
@app.get("/", response_class=HTMLResponse)
|
| 487 |
async def dashboard():
|
| 488 |
+
"""Real-time dashboard with version info"""
|
| 489 |
return """
|
| 490 |
<!DOCTYPE html>
|
| 491 |
<html>
|
| 492 |
<head>
|
| 493 |
+
<title>SAM-Z-1 Cluster Control v5.0</title>
|
| 494 |
<style>
|
| 495 |
+
* { margin: 0; padding: 0; box-sizing: border-box; }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 496 |
body {
|
| 497 |
font-family: 'Courier New', monospace;
|
| 498 |
background: linear-gradient(135deg, #0a0e27 0%, #1a1f3a 100%);
|
|
|
|
| 501 |
overflow-x: hidden;
|
| 502 |
overflow-y: auto;
|
| 503 |
}
|
|
|
|
| 504 |
.container {
|
| 505 |
padding: 20px;
|
| 506 |
max-width: 1400px;
|
| 507 |
margin: 0 auto;
|
| 508 |
padding-bottom: 40px;
|
| 509 |
}
|
|
|
|
| 510 |
.header {
|
| 511 |
text-align: center;
|
| 512 |
margin-bottom: 30px;
|
|
|
|
| 516 |
border-radius: 10px;
|
| 517 |
box-shadow: 0 0 20px rgba(0, 255, 136, 0.3);
|
| 518 |
}
|
|
|
|
| 519 |
.header h1 {
|
| 520 |
font-size: 2.5em;
|
| 521 |
text-transform: uppercase;
|
|
|
|
| 523 |
text-shadow: 0 0 10px #00ff88;
|
| 524 |
animation: glow 2s ease-in-out infinite alternate;
|
| 525 |
}
|
|
|
|
| 526 |
@keyframes glow {
|
| 527 |
from { text-shadow: 0 0 10px #00ff88, 0 0 20px #00ff88; }
|
| 528 |
to { text-shadow: 0 0 20px #00ff88, 0 0 30px #00ff88, 0 0 40px #00ff88; }
|
| 529 |
}
|
| 530 |
+
.version-badge {
|
| 531 |
+
display: inline-block;
|
| 532 |
+
padding: 3px 10px;
|
| 533 |
+
border-radius: 12px;
|
| 534 |
+
font-size: 10px;
|
| 535 |
+
margin-left: 8px;
|
| 536 |
+
font-weight: bold;
|
| 537 |
+
}
|
| 538 |
+
.version-v5 {
|
| 539 |
+
background: rgba(0, 255, 136, 0.2);
|
| 540 |
+
border: 1px solid #00ff88;
|
| 541 |
+
color: #00ff88;
|
| 542 |
+
}
|
| 543 |
+
.version-v4 {
|
| 544 |
+
background: rgba(255, 165, 0, 0.2);
|
| 545 |
+
border: 1px solid #ffa500;
|
| 546 |
+
color: #ffa500;
|
| 547 |
+
}
|
| 548 |
.status-bar {
|
| 549 |
display: flex;
|
| 550 |
gap: 20px;
|
| 551 |
margin-bottom: 30px;
|
| 552 |
}
|
|
|
|
| 553 |
.stat-card {
|
| 554 |
flex: 1;
|
| 555 |
background: rgba(0, 255, 136, 0.05);
|
|
|
|
| 559 |
position: relative;
|
| 560 |
overflow: hidden;
|
| 561 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 562 |
.stat-label {
|
| 563 |
font-size: 0.8em;
|
| 564 |
opacity: 0.7;
|
| 565 |
text-transform: uppercase;
|
| 566 |
}
|
|
|
|
| 567 |
.stat-value {
|
| 568 |
font-size: 2em;
|
| 569 |
font-weight: bold;
|
| 570 |
margin-top: 5px;
|
| 571 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 572 |
.workers-grid {
|
| 573 |
display: grid;
|
| 574 |
grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
|
| 575 |
gap: 20px;
|
| 576 |
margin-bottom: 30px;
|
| 577 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 578 |
.worker-card {
|
| 579 |
background: rgba(10, 14, 39, 0.8);
|
| 580 |
border: 2px solid #00ff88;
|
|
|
|
| 583 |
position: relative;
|
| 584 |
transition: all 0.3s;
|
| 585 |
}
|
|
|
|
| 586 |
.worker-card:hover {
|
| 587 |
transform: translateY(-5px);
|
| 588 |
box-shadow: 0 5px 30px rgba(0, 255, 136, 0.4);
|
| 589 |
}
|
|
|
|
| 590 |
.worker-card.offline {
|
| 591 |
border-color: #ff4444;
|
| 592 |
opacity: 0.6;
|
| 593 |
}
|
|
|
|
| 594 |
.worker-header {
|
| 595 |
display: flex;
|
| 596 |
justify-content: space-between;
|
| 597 |
align-items: center;
|
| 598 |
margin-bottom: 15px;
|
| 599 |
}
|
|
|
|
| 600 |
.worker-name {
|
| 601 |
font-size: 1.2em;
|
| 602 |
font-weight: bold;
|
| 603 |
}
|
|
|
|
| 604 |
.status-dot {
|
| 605 |
width: 12px;
|
| 606 |
height: 12px;
|
| 607 |
border-radius: 50%;
|
| 608 |
animation: pulse 2s infinite;
|
| 609 |
}
|
|
|
|
| 610 |
.status-dot.online {
|
| 611 |
background: #00ff88;
|
| 612 |
box-shadow: 0 0 10px #00ff88;
|
| 613 |
}
|
|
|
|
| 614 |
.status-dot.offline {
|
| 615 |
background: #ff4444;
|
| 616 |
box-shadow: 0 0 10px #ff4444;
|
| 617 |
}
|
|
|
|
| 618 |
@keyframes pulse {
|
| 619 |
0%, 100% { opacity: 1; }
|
| 620 |
50% { opacity: 0.5; }
|
| 621 |
}
|
|
|
|
| 622 |
.worker-stats {
|
| 623 |
display: grid;
|
| 624 |
grid-template-columns: repeat(2, 1fr);
|
| 625 |
gap: 10px;
|
| 626 |
margin-top: 15px;
|
| 627 |
}
|
|
|
|
| 628 |
.worker-stat {
|
| 629 |
background: rgba(0, 255, 136, 0.05);
|
| 630 |
padding: 10px;
|
| 631 |
border-radius: 5px;
|
| 632 |
}
|
|
|
|
| 633 |
.worker-stat-label {
|
| 634 |
font-size: 0.7em;
|
| 635 |
opacity: 0.7;
|
| 636 |
}
|
|
|
|
| 637 |
.worker-stat-value {
|
| 638 |
font-size: 1.3em;
|
| 639 |
font-weight: bold;
|
| 640 |
margin-top: 3px;
|
| 641 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 642 |
.timestamp {
|
| 643 |
text-align: center;
|
| 644 |
margin-top: 20px;
|
| 645 |
opacity: 0.5;
|
| 646 |
font-size: 0.9em;
|
| 647 |
}
|
| 648 |
+
@media (max-width: 768px) {
|
| 649 |
+
.workers-grid { grid-template-columns: 1fr; }
|
| 650 |
+
.status-bar { flex-direction: column; }
|
| 651 |
+
}
|
| 652 |
</style>
|
| 653 |
</head>
|
| 654 |
<body>
|
| 655 |
<div class="container">
|
| 656 |
<div class="header">
|
| 657 |
<h1>⚡ SAM-Z-1 CLUSTER ⚡</h1>
|
| 658 |
+
<div>DISTRIBUTED COMPUTE SYSTEM v5.0 • AUTO VERSION DETECTION</div>
|
| 659 |
</div>
|
| 660 |
|
| 661 |
<div class="status-bar">
|
| 662 |
<div class="stat-card">
|
| 663 |
<div class="stat-label">Load Mode</div>
|
| 664 |
<div class="stat-value" id="mode">--</div>
|
|
|
|
| 665 |
</div>
|
| 666 |
<div class="stat-card">
|
| 667 |
<div class="stat-label">Current Load</div>
|
| 668 |
<div class="stat-value" id="load">0</div>
|
|
|
|
| 669 |
</div>
|
| 670 |
<div class="stat-card">
|
| 671 |
<div class="stat-label">Total Requests</div>
|
|
|
|
| 677 |
</div>
|
| 678 |
</div>
|
| 679 |
|
| 680 |
+
<div class="workers-grid" id="workers"></div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 681 |
|
| 682 |
<div class="timestamp" id="timestamp">Last update: --</div>
|
| 683 |
</div>
|
| 684 |
|
| 685 |
<script>
|
|
|
|
| 686 |
const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
|
| 687 |
let ws;
|
|
|
|
| 688 |
|
| 689 |
function connectWebSocket() {
|
| 690 |
try {
|
| 691 |
ws = new WebSocket(`${protocol}//${window.location.host}/ws`);
|
| 692 |
|
| 693 |
+
ws.onopen = () => console.log('✅ WebSocket connected');
|
| 694 |
+
ws.onmessage = (event) => updateDashboard(JSON.parse(event.data));
|
| 695 |
+
ws.onerror = () => console.error('❌ WebSocket error');
|
| 696 |
+
ws.onclose = () => setTimeout(connectWebSocket, 3000);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 697 |
} catch (e) {
|
| 698 |
+
console.error('Failed to connect WebSocket');
|
|
|
|
|
|
|
| 699 |
}
|
| 700 |
}
|
| 701 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 702 |
connectWebSocket();
|
| 703 |
|
| 704 |
function updateDashboard(data) {
|
|
|
|
| 705 |
document.getElementById('mode').textContent = data.mode.toUpperCase();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 706 |
document.getElementById('load').textContent = data.load;
|
| 707 |
document.getElementById('total-req').textContent = data.cluster.total_requests;
|
| 708 |
document.getElementById('rps').textContent = data.cluster.rps;
|
|
|
|
|
|
|
|
|
|
| 709 |
|
|
|
|
| 710 |
const workersDiv = document.getElementById('workers');
|
|
|
|
|
|
|
|
|
|
| 711 |
workersDiv.innerHTML = data.workers.map(worker => `
|
| 712 |
<div class="worker-card ${worker.healthy ? '' : 'offline'}">
|
| 713 |
<div class="worker-header">
|
| 714 |
+
<div>
|
| 715 |
+
<div class="worker-name">${worker.url}</div>
|
| 716 |
+
<span class="version-badge version-${worker.version}">${worker.version.toUpperCase()}</span>
|
| 717 |
+
${worker.models > 0 ? `<span style="font-size:0.8em;opacity:0.7;margin-left:5px">${worker.models} models</span>` : ''}
|
| 718 |
+
</div>
|
| 719 |
<div class="status-dot ${worker.healthy ? 'online' : 'offline'}"></div>
|
| 720 |
</div>
|
|
|
|
| 721 |
<div class="worker-stats">
|
| 722 |
<div class="worker-stat">
|
| 723 |
<div class="worker-stat-label">Active</div>
|
|
|
|
| 732 |
<div class="worker-stat-value">${worker.tokens}</div>
|
| 733 |
</div>
|
| 734 |
<div class="worker-stat">
|
| 735 |
+
<div class="worker-stat-label">Role</div>
|
| 736 |
+
<div class="worker-stat-value" style="font-size:1em;">${worker.role}</div>
|
| 737 |
</div>
|
| 738 |
</div>
|
|
|
|
|
|
|
|
|
|
| 739 |
</div>
|
| 740 |
`).join('');
|
| 741 |
|
|
|
|
|
|
|
| 742 |
document.getElementById('timestamp').textContent =
|
| 743 |
+
`Last update: ${new Date().toLocaleTimeString()}`;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 744 |
}
|
| 745 |
</script>
|
| 746 |
</body>
|
| 747 |
</html>
|
| 748 |
"""
|
| 749 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 750 |
@app.get("/api/status")
|
| 751 |
async def api_status():
|
|
|
|
| 752 |
mode, load = update_load_mode()
|
| 753 |
healthy_count = len(get_healthy_workers())
|
| 754 |
|
| 755 |
+
# Count v4 vs v5 workers
|
| 756 |
+
v4_count = sum(1 for w in worker_health.values() if w["version"] == "v4")
|
| 757 |
+
v5_count = sum(1 for w in worker_health.values() if w["version"] == "v5")
|
| 758 |
+
|
| 759 |
return {
|
| 760 |
"name": "SAM-Z-1 Distributed Cluster",
|
| 761 |
+
"version": "5.0.0",
|
| 762 |
"mode": mode,
|
| 763 |
"current_load": load,
|
| 764 |
"workers": len(WORKER_URLS),
|
| 765 |
"healthy_workers": healthy_count,
|
| 766 |
+
"v4_workers": v4_count,
|
| 767 |
+
"v5_workers": v5_count,
|
| 768 |
+
"features": [
|
| 769 |
+
"distributed_compute",
|
| 770 |
+
"smart_load_balancing",
|
| 771 |
+
"auto_version_detection",
|
| 772 |
+
"multi_model_support",
|
| 773 |
+
"real_time_dashboard"
|
| 774 |
+
]
|
| 775 |
}
|
| 776 |
|
| 777 |
@app.get("/health")
|
|
|
|
| 782 |
"workers_healthy": healthy_count
|
| 783 |
}
|
| 784 |
|
| 785 |
+
@app.get("/models")
|
| 786 |
+
async def list_models():
|
| 787 |
+
"""List all available models across all workers"""
|
| 788 |
+
all_models = set()
|
| 789 |
+
for url, status in worker_health.items():
|
| 790 |
+
if status["healthy"] and status["version"] == "v5":
|
| 791 |
+
all_models.update(status["supports_models"])
|
| 792 |
+
|
| 793 |
+
return {
|
| 794 |
+
"models": sorted(list(all_models)),
|
| 795 |
+
"default": "SAM-X-1-Nano" if "SAM-X-1-Nano" in all_models else None
|
| 796 |
+
}
|
| 797 |
+
|
| 798 |
@app.post("/v1/generate")
|
| 799 |
async def generate(request: GenerateRequest):
|
| 800 |
+
"""Generate text with automatic model routing"""
|
| 801 |
track_request()
|
| 802 |
mode, load = update_load_mode()
|
| 803 |
|
| 804 |
+
# Get compatible workers
|
| 805 |
+
compatible = get_workers_for_model(request.model)
|
| 806 |
+
|
| 807 |
+
if not compatible:
|
| 808 |
cluster_stats["failed_requests"] += 1
|
| 809 |
+
raise HTTPException(
|
| 810 |
+
status_code=503,
|
| 811 |
+
detail=f"No workers available for model: {request.model or 'default'}"
|
| 812 |
+
)
|
| 813 |
|
| 814 |
request_data = {
|
| 815 |
"prompt": request.prompt,
|
|
|
|
| 821 |
"stream": True
|
| 822 |
}
|
| 823 |
|
| 824 |
+
# Add model parameter for v5 workers
|
| 825 |
+
if request.model:
|
| 826 |
+
request_data["model"] = request.model
|
| 827 |
+
|
| 828 |
+
print(f"🎯 {mode.upper()} | Load: {load} | Model: {request.model or 'default'} | Workers: {len(compatible)}")
|
| 829 |
|
| 830 |
try:
|
| 831 |
+
if mode == "light" and len(compatible) >= 2:
|
| 832 |
+
generators, decoders = select_distributed_workers(request.model)
|
|
|
|
| 833 |
if decoders:
|
| 834 |
cluster_stats["successful_requests"] += 1
|
| 835 |
return StreamingResponse(
|
|
|
|
| 837 |
media_type="text/event-stream"
|
| 838 |
)
|
| 839 |
|
| 840 |
+
worker = get_least_busy_worker(compatible)
|
|
|
|
| 841 |
cluster_stats["successful_requests"] += 1
|
| 842 |
return StreamingResponse(
|
| 843 |
heavy_load_generation(worker, request_data, "generate"),
|
|
|
|
| 849 |
|
| 850 |
@app.post("/v1/chat")
|
| 851 |
async def chat(request: ChatRequest):
|
| 852 |
+
"""Chat with automatic model routing"""
|
| 853 |
track_request()
|
| 854 |
mode, load = update_load_mode()
|
| 855 |
|
| 856 |
+
# Get compatible workers
|
| 857 |
+
compatible = get_workers_for_model(request.model)
|
| 858 |
+
|
| 859 |
+
if not compatible:
|
| 860 |
cluster_stats["failed_requests"] += 1
|
| 861 |
+
raise HTTPException(
|
| 862 |
+
status_code=503,
|
| 863 |
+
detail=f"No workers available for model: {request.model or 'default'}"
|
| 864 |
+
)
|
| 865 |
|
| 866 |
request_data = {
|
| 867 |
"messages": [{"role": m.role, "content": m.content} for m in request.messages],
|
|
|
|
| 873 |
"stream": True
|
| 874 |
}
|
| 875 |
|
| 876 |
+
# Add model parameter for v5 workers
|
| 877 |
+
if request.model:
|
| 878 |
+
request_data["model"] = request.model
|
| 879 |
+
|
| 880 |
+
print(f"💬 {mode.upper()} | Load: {load} | Model: {request.model or 'default'} | Workers: {len(compatible)}")
|
| 881 |
|
| 882 |
try:
|
| 883 |
+
if mode == "light" and len(compatible) >= 2:
|
| 884 |
+
generators, decoders = select_distributed_workers(request.model)
|
|
|
|
| 885 |
if decoders:
|
| 886 |
cluster_stats["successful_requests"] += 1
|
| 887 |
return StreamingResponse(
|
|
|
|
| 889 |
media_type="text/event-stream"
|
| 890 |
)
|
| 891 |
|
| 892 |
+
worker = get_least_busy_worker(compatible)
|
|
|
|
| 893 |
cluster_stats["successful_requests"] += 1
|
| 894 |
return StreamingResponse(
|
| 895 |
heavy_load_generation(worker, request_data, "chat"),
|