Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,9 +4,8 @@ import httpx
|
|
| 4 |
import asyncio
|
| 5 |
import time
|
| 6 |
from datetime import datetime
|
| 7 |
-
from typing import Dict, List
|
| 8 |
from contextlib import asynccontextmanager
|
| 9 |
-
import json
|
| 10 |
|
| 11 |
# Configuration
|
| 12 |
PING_INTERVAL = 300 # 5 minutes
|
|
@@ -24,39 +23,54 @@ regular_servers = [
|
|
| 24 |
"https://rajhuggingface4253-compressor3pro.hf.space"
|
| 25 |
]
|
| 26 |
|
| 27 |
-
#
|
| 28 |
models_to_warm = [
|
| 29 |
{
|
| 30 |
"name": "Qwen 1",
|
| 31 |
"url": "https://rajhuggingface4253-qwen.hf.space",
|
| 32 |
"endpoint": "/chat",
|
| 33 |
"type": "streaming_chat",
|
| 34 |
-
"warmup_prompt": "Hello, are you ready? Respond with just 'OK'.",
|
| 35 |
"timeout": 30.0,
|
| 36 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
},
|
| 38 |
{
|
| 39 |
"name": "Qwen 2",
|
| 40 |
"url": "https://rajhuggingface4253-qwen2.hf.space",
|
| 41 |
"endpoint": "/chat",
|
| 42 |
"type": "streaming_chat",
|
| 43 |
-
"warmup_prompt": "Say 'ready' if you're working.",
|
| 44 |
"timeout": 30.0,
|
| 45 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
},
|
| 47 |
{
|
| 48 |
"name": "Qwen 3",
|
| 49 |
"url": "https://rajhuggingface4253-qwen3.hf.space",
|
| 50 |
"endpoint": "/chat",
|
| 51 |
"type": "streaming_chat",
|
| 52 |
-
"warmup_prompt": "Ping! Respond with 'pong'.",
|
| 53 |
"timeout": 30.0,
|
| 54 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
},
|
| 56 |
{
|
| 57 |
"name": "Kokoro TTS",
|
| 58 |
"url": "https://rajhuggingface4253-koko.hf.space",
|
| 59 |
-
"endpoint": "/health",
|
| 60 |
"type": "health_check",
|
| 61 |
"timeout": 15.0
|
| 62 |
}
|
|
@@ -91,23 +105,12 @@ async def ping_server(url: str) -> Dict:
|
|
| 91 |
}
|
| 92 |
|
| 93 |
async def warmup_streaming_model(model_config: Dict) -> Dict:
|
| 94 |
-
"""
|
| 95 |
-
Warm up streaming models by calling their chat endpoints
|
| 96 |
-
and reading the streaming response
|
| 97 |
-
"""
|
| 98 |
try:
|
| 99 |
start_time = time.time()
|
| 100 |
async with httpx.AsyncClient(timeout=model_config.get('timeout', 30.0)) as client:
|
| 101 |
|
| 102 |
-
|
| 103 |
-
payload = {
|
| 104 |
-
"prompt": model_config['warmup_prompt'],
|
| 105 |
-
"max_new_tokens": 50,
|
| 106 |
-
"temperature": 0.1, # Low temperature for consistent responses
|
| 107 |
-
"enable_code_execution": False,
|
| 108 |
-
"enable_web_search": False
|
| 109 |
-
}
|
| 110 |
-
|
| 111 |
api_url = f"{model_config['url']}{model_config['endpoint']}"
|
| 112 |
|
| 113 |
# Make the streaming request
|
|
@@ -133,15 +136,8 @@ async def warmup_streaming_model(model_config: Dict) -> Dict:
|
|
| 133 |
|
| 134 |
# Check if we got a valid response
|
| 135 |
if collected_response:
|
| 136 |
-
# Check for expected response if specified
|
| 137 |
-
expected_response = model_config.get('expected_response', '').lower()
|
| 138 |
-
if expected_response and expected_response in collected_response.lower():
|
| 139 |
-
status = 'success'
|
| 140 |
-
else:
|
| 141 |
-
status = 'success' # Still success if we got any response
|
| 142 |
-
|
| 143 |
return {
|
| 144 |
-
'status':
|
| 145 |
'response_time_ms': response_time,
|
| 146 |
'status_code': response.status_code,
|
| 147 |
'model_response': collected_response[:200] + "..." if len(collected_response) > 200 else collected_response,
|
|
@@ -177,7 +173,7 @@ async def warmup_streaming_model(model_config: Dict) -> Dict:
|
|
| 177 |
}
|
| 178 |
|
| 179 |
async def warmup_model_health(model_config: Dict) -> Dict:
|
| 180 |
-
"""Warm up models using health endpoints
|
| 181 |
try:
|
| 182 |
start_time = time.time()
|
| 183 |
async with httpx.AsyncClient(timeout=model_config.get('timeout', 15.0)) as client:
|
|
@@ -278,7 +274,7 @@ async def warmup_all_models():
|
|
| 278 |
'health_check': results[i]
|
| 279 |
}
|
| 280 |
|
| 281 |
-
# Log
|
| 282 |
result = results[i]
|
| 283 |
model_name = model['name']
|
| 284 |
|
|
@@ -288,13 +284,8 @@ async def warmup_all_models():
|
|
| 288 |
if result.get('response_length'):
|
| 289 |
response_info += f" ({result['response_length']} chars)"
|
| 290 |
print(f"β
{model_name}: {response_info}")
|
| 291 |
-
|
| 292 |
-
# Log snippet of model response for streaming models
|
| 293 |
-
if result.get('model_response'):
|
| 294 |
-
snippet = result['model_response'].replace('\n', ' ')[:50]
|
| 295 |
-
print(f" π Response: {snippet}...")
|
| 296 |
else:
|
| 297 |
-
print(f"β
{model_name}: {result['response_time_ms']}ms
|
| 298 |
else:
|
| 299 |
error_msg = result.get('error', 'Unknown error')
|
| 300 |
print(f"β {model_name}: {error_msg}")
|
|
@@ -341,13 +332,11 @@ async def ping_single_health(client: httpx.AsyncClient, space_url: str, health_u
|
|
| 341 |
print(f"π Health ping failed for {space_url}: {e}")
|
| 342 |
|
| 343 |
async def continuous_pinging():
|
| 344 |
-
"""Main pinging loop with
|
| 345 |
-
print("π
|
| 346 |
print(f"π Regular servers: {len(regular_servers)}")
|
| 347 |
print(f"π€ Models to warm: {len(models_to_warm)}")
|
| 348 |
print(f"π Pinger network: {len(pinger_spaces)}")
|
| 349 |
-
print("β° Health checks every 5 minutes")
|
| 350 |
-
print("π― Using streaming endpoints for model warming")
|
| 351 |
|
| 352 |
last_health_check = 0
|
| 353 |
|
|
@@ -357,14 +346,13 @@ async def continuous_pinging():
|
|
| 357 |
if regular_servers:
|
| 358 |
await ping_all_servers()
|
| 359 |
|
| 360 |
-
# Warm up models
|
| 361 |
if models_to_warm:
|
| 362 |
await warmup_all_models()
|
| 363 |
|
| 364 |
# Ping health endpoints every 30 minutes
|
| 365 |
current_time = time.time()
|
| 366 |
if current_time - last_health_check >= HEALTH_CHECK_INTERVAL and pinger_spaces:
|
| 367 |
-
print("π Pinging other pinger spaces...")
|
| 368 |
await ping_health_endpoints()
|
| 369 |
last_health_check = current_time
|
| 370 |
|
|
@@ -377,13 +365,12 @@ async def continuous_pinging():
|
|
| 377 |
@asynccontextmanager
|
| 378 |
async def lifespan(app: FastAPI):
|
| 379 |
# Startup
|
| 380 |
-
print("Starting up Enhanced Smart Model Warmer...")
|
| 381 |
asyncio.create_task(continuous_pinging())
|
| 382 |
yield
|
| 383 |
# Shutdown
|
| 384 |
print("Shutting down...")
|
| 385 |
|
| 386 |
-
app = FastAPI(title="
|
| 387 |
|
| 388 |
@app.get("/", response_class=HTMLResponse)
|
| 389 |
async def home():
|
|
@@ -397,57 +384,36 @@ async def home():
|
|
| 397 |
for url, data in model_warmup_results.items():
|
| 398 |
model_info = data['model_info']
|
| 399 |
health = data['health_check']
|
| 400 |
-
|
| 401 |
-
status_info = {
|
| 402 |
'name': model_info['name'],
|
| 403 |
'type': model_info.get('type', 'health_check'),
|
| 404 |
'status': health['status'],
|
| 405 |
'response_time': health.get('response_time_ms', 0),
|
| 406 |
'error': health.get('error', '')
|
| 407 |
-
}
|
| 408 |
-
|
| 409 |
-
# Add response info for streaming models
|
| 410 |
-
if health.get('got_response') and health.get('model_response'):
|
| 411 |
-
status_info['response_snippet'] = health['model_response']
|
| 412 |
-
status_info['response_length'] = health.get('response_length', 0)
|
| 413 |
-
|
| 414 |
-
model_statuses.append(status_info)
|
| 415 |
|
| 416 |
model_status_html = "".join([
|
| 417 |
-
f"""
|
| 418 |
-
<li>
|
| 419 |
-
<strong>{m['name']}</strong> ({m['type']}):
|
| 420 |
-
<span class="{'success' if m['status'] == 'success' else 'error'}">{m['status']}</span>
|
| 421 |
-
({m['response_time']}ms)
|
| 422 |
-
{f" - {m['response_length']} chars" if m.get('response_length') else ''}
|
| 423 |
-
{f"<br><small>Error: {m['error']}</small>" if m['error'] else ''}
|
| 424 |
-
{f"<br><small>Snippet: {m.get('response_snippet', '')}</small>" if m.get('response_snippet') else ''}
|
| 425 |
-
</li>
|
| 426 |
-
"""
|
| 427 |
for m in model_statuses
|
| 428 |
])
|
| 429 |
|
| 430 |
html_content = f"""
|
| 431 |
<html>
|
| 432 |
<head>
|
| 433 |
-
<title>
|
| 434 |
<style>
|
| 435 |
body {{ font-family: Arial, sans-serif; margin: 40px; }}
|
| 436 |
.success {{ color: green; font-weight: bold; }}
|
| 437 |
.error {{ color: red; font-weight: bold; }}
|
| 438 |
-
.container {{ max-width:
|
| 439 |
.model-list {{ background: #f5f5f5; padding: 15px; border-radius: 5px; }}
|
| 440 |
-
.status-grid {{ display: grid; grid-template-columns: 1fr 1fr 1fr; gap: 20px; margin-bottom: 30px; }}
|
| 441 |
-
.model-item {{ margin: 10px 0; padding: 10px; background: white; border-radius: 5px; }}
|
| 442 |
-
small {{ color: #666; }}
|
| 443 |
</style>
|
| 444 |
</head>
|
| 445 |
<body>
|
| 446 |
<div class="container">
|
| 447 |
-
<h1
|
| 448 |
-
<p><em>Now with streaming endpoint support for AI models</em></p>
|
| 449 |
|
| 450 |
-
<div
|
| 451 |
<div style="background: #e8f5e8; padding: 15px; border-radius: 8px;">
|
| 452 |
<h3>π Regular Servers</h3>
|
| 453 |
<p><strong>{regular_success}/{len(regular_servers)} OK</strong></p>
|
|
@@ -463,7 +429,7 @@ async def home():
|
|
| 463 |
</div>
|
| 464 |
|
| 465 |
<div class="model-list">
|
| 466 |
-
<h3>Model
|
| 467 |
<ul>
|
| 468 |
{model_status_html if model_statuses else "<li>No model data yet</li>"}
|
| 469 |
</ul>
|
|
@@ -471,7 +437,6 @@ async def home():
|
|
| 471 |
|
| 472 |
<p><strong>Last Model Check:</strong> {last_model_warmup.strftime('%Y-%m-%d %H:%M:%S') if last_model_warmup else 'Never'}</p>
|
| 473 |
<p><strong>Next check in:</strong> ~5 minutes</p>
|
| 474 |
-
<p><strong>Streaming endpoints:</strong> Using /chat for Qwen models</p>
|
| 475 |
</div>
|
| 476 |
</body>
|
| 477 |
</html>
|
|
@@ -483,10 +448,9 @@ async def health():
|
|
| 483 |
"""Health endpoint for other pingers"""
|
| 484 |
return JSONResponse({
|
| 485 |
"status": "healthy",
|
| 486 |
-
"service": "
|
| 487 |
"regular_servers": len(regular_servers),
|
| 488 |
"ai_models": len(models_to_warm),
|
| 489 |
-
"streaming_models": sum(1 for m in models_to_warm if m.get('type') == 'streaming_chat'),
|
| 490 |
"last_model_warmup": last_model_warmup.isoformat() if last_model_warmup else None,
|
| 491 |
"timestamp": datetime.now().isoformat()
|
| 492 |
})
|
|
@@ -513,20 +477,6 @@ async def ping_now():
|
|
| 513 |
"timestamp": datetime.now().isoformat()
|
| 514 |
})
|
| 515 |
|
| 516 |
-
@app.get("/test-streaming/{model_index}")
|
| 517 |
-
async def test_streaming(model_index: int = 0):
|
| 518 |
-
"""Test a specific streaming model"""
|
| 519 |
-
if model_index >= len(models_to_warm):
|
| 520 |
-
return JSONResponse({"error": "Model index out of range"}, status_code=400)
|
| 521 |
-
|
| 522 |
-
model_config = models_to_warm[model_index]
|
| 523 |
-
result = await warmup_single_model(model_config)
|
| 524 |
-
|
| 525 |
-
return JSONResponse({
|
| 526 |
-
"model": model_config['name'],
|
| 527 |
-
"test_result": result
|
| 528 |
-
})
|
| 529 |
-
|
| 530 |
if __name__ == "__main__":
|
| 531 |
import uvicorn
|
| 532 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|
|
| 4 |
import asyncio
|
| 5 |
import time
|
| 6 |
from datetime import datetime
|
| 7 |
+
from typing import Dict, List
|
| 8 |
from contextlib import asynccontextmanager
|
|
|
|
| 9 |
|
| 10 |
# Configuration
|
| 11 |
PING_INTERVAL = 300 # 5 minutes
|
|
|
|
| 23 |
"https://rajhuggingface4253-compressor3pro.hf.space"
|
| 24 |
]
|
| 25 |
|
| 26 |
+
# Models to warm with correct payloads
|
| 27 |
models_to_warm = [
|
| 28 |
{
|
| 29 |
"name": "Qwen 1",
|
| 30 |
"url": "https://rajhuggingface4253-qwen.hf.space",
|
| 31 |
"endpoint": "/chat",
|
| 32 |
"type": "streaming_chat",
|
|
|
|
| 33 |
"timeout": 30.0,
|
| 34 |
+
"payload": {
|
| 35 |
+
"prompt": "Hello, are you ready? Respond with just 'OK'.",
|
| 36 |
+
"max_new_tokens": 50,
|
| 37 |
+
"temperature": 0.1,
|
| 38 |
+
"enable_code_execution": False,
|
| 39 |
+
"enable_web_search": False
|
| 40 |
+
}
|
| 41 |
},
|
| 42 |
{
|
| 43 |
"name": "Qwen 2",
|
| 44 |
"url": "https://rajhuggingface4253-qwen2.hf.space",
|
| 45 |
"endpoint": "/chat",
|
| 46 |
"type": "streaming_chat",
|
|
|
|
| 47 |
"timeout": 30.0,
|
| 48 |
+
"payload": {
|
| 49 |
+
"prompt": "Say 'ready' if you're working.",
|
| 50 |
+
"max_new_tokens": 50,
|
| 51 |
+
"temperature": 0.1,
|
| 52 |
+
"enable_code_execution": False,
|
| 53 |
+
"enable_web_search": False
|
| 54 |
+
}
|
| 55 |
},
|
| 56 |
{
|
| 57 |
"name": "Qwen 3",
|
| 58 |
"url": "https://rajhuggingface4253-qwen3.hf.space",
|
| 59 |
"endpoint": "/chat",
|
| 60 |
"type": "streaming_chat",
|
|
|
|
| 61 |
"timeout": 30.0,
|
| 62 |
+
"payload": {
|
| 63 |
+
"prompt": "Ping! Respond with 'pong'.",
|
| 64 |
+
"max_new_tokens": 50,
|
| 65 |
+
"temperature": 0.1,
|
| 66 |
+
"enable_code_execution": False,
|
| 67 |
+
"enable_web_search": False
|
| 68 |
+
}
|
| 69 |
},
|
| 70 |
{
|
| 71 |
"name": "Kokoro TTS",
|
| 72 |
"url": "https://rajhuggingface4253-koko.hf.space",
|
| 73 |
+
"endpoint": "/health",
|
| 74 |
"type": "health_check",
|
| 75 |
"timeout": 15.0
|
| 76 |
}
|
|
|
|
| 105 |
}
|
| 106 |
|
| 107 |
async def warmup_streaming_model(model_config: Dict) -> Dict:
|
| 108 |
+
"""Warm up streaming models by calling their chat endpoints"""
|
|
|
|
|
|
|
|
|
|
| 109 |
try:
|
| 110 |
start_time = time.time()
|
| 111 |
async with httpx.AsyncClient(timeout=model_config.get('timeout', 30.0)) as client:
|
| 112 |
|
| 113 |
+
payload = model_config['payload']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
api_url = f"{model_config['url']}{model_config['endpoint']}"
|
| 115 |
|
| 116 |
# Make the streaming request
|
|
|
|
| 136 |
|
| 137 |
# Check if we got a valid response
|
| 138 |
if collected_response:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
return {
|
| 140 |
+
'status': 'success',
|
| 141 |
'response_time_ms': response_time,
|
| 142 |
'status_code': response.status_code,
|
| 143 |
'model_response': collected_response[:200] + "..." if len(collected_response) > 200 else collected_response,
|
|
|
|
| 173 |
}
|
| 174 |
|
| 175 |
async def warmup_model_health(model_config: Dict) -> Dict:
|
| 176 |
+
"""Warm up models using health endpoints"""
|
| 177 |
try:
|
| 178 |
start_time = time.time()
|
| 179 |
async with httpx.AsyncClient(timeout=model_config.get('timeout', 15.0)) as client:
|
|
|
|
| 274 |
'health_check': results[i]
|
| 275 |
}
|
| 276 |
|
| 277 |
+
# Log results
|
| 278 |
result = results[i]
|
| 279 |
model_name = model['name']
|
| 280 |
|
|
|
|
| 284 |
if result.get('response_length'):
|
| 285 |
response_info += f" ({result['response_length']} chars)"
|
| 286 |
print(f"β
{model_name}: {response_info}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 287 |
else:
|
| 288 |
+
print(f"β
{model_name}: {result['response_time_ms']}ms")
|
| 289 |
else:
|
| 290 |
error_msg = result.get('error', 'Unknown error')
|
| 291 |
print(f"β {model_name}: {error_msg}")
|
|
|
|
| 332 |
print(f"π Health ping failed for {space_url}: {e}")
|
| 333 |
|
| 334 |
async def continuous_pinging():
|
| 335 |
+
"""Main pinging loop with model warming"""
|
| 336 |
+
print("π Smart Model Warmer Started!")
|
| 337 |
print(f"π Regular servers: {len(regular_servers)}")
|
| 338 |
print(f"π€ Models to warm: {len(models_to_warm)}")
|
| 339 |
print(f"π Pinger network: {len(pinger_spaces)}")
|
|
|
|
|
|
|
| 340 |
|
| 341 |
last_health_check = 0
|
| 342 |
|
|
|
|
| 346 |
if regular_servers:
|
| 347 |
await ping_all_servers()
|
| 348 |
|
| 349 |
+
# Warm up models
|
| 350 |
if models_to_warm:
|
| 351 |
await warmup_all_models()
|
| 352 |
|
| 353 |
# Ping health endpoints every 30 minutes
|
| 354 |
current_time = time.time()
|
| 355 |
if current_time - last_health_check >= HEALTH_CHECK_INTERVAL and pinger_spaces:
|
|
|
|
| 356 |
await ping_health_endpoints()
|
| 357 |
last_health_check = current_time
|
| 358 |
|
|
|
|
| 365 |
@asynccontextmanager
|
| 366 |
async def lifespan(app: FastAPI):
|
| 367 |
# Startup
|
|
|
|
| 368 |
asyncio.create_task(continuous_pinging())
|
| 369 |
yield
|
| 370 |
# Shutdown
|
| 371 |
print("Shutting down...")
|
| 372 |
|
| 373 |
+
app = FastAPI(title="Smart Model Warmer", lifespan=lifespan)
|
| 374 |
|
| 375 |
@app.get("/", response_class=HTMLResponse)
|
| 376 |
async def home():
|
|
|
|
| 384 |
for url, data in model_warmup_results.items():
|
| 385 |
model_info = data['model_info']
|
| 386 |
health = data['health_check']
|
| 387 |
+
model_statuses.append({
|
|
|
|
| 388 |
'name': model_info['name'],
|
| 389 |
'type': model_info.get('type', 'health_check'),
|
| 390 |
'status': health['status'],
|
| 391 |
'response_time': health.get('response_time_ms', 0),
|
| 392 |
'error': health.get('error', '')
|
| 393 |
+
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 394 |
|
| 395 |
model_status_html = "".join([
|
| 396 |
+
f"<li>{m['name']} ({m['type']}): <span class={'success' if m['status'] == 'success' else 'error'}>{m['status']}</span> ({m['response_time']}ms){f' - {m[\"error\"]}' if m['error'] else ''}</li>"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 397 |
for m in model_statuses
|
| 398 |
])
|
| 399 |
|
| 400 |
html_content = f"""
|
| 401 |
<html>
|
| 402 |
<head>
|
| 403 |
+
<title>Smart Model Warmer</title>
|
| 404 |
<style>
|
| 405 |
body {{ font-family: Arial, sans-serif; margin: 40px; }}
|
| 406 |
.success {{ color: green; font-weight: bold; }}
|
| 407 |
.error {{ color: red; font-weight: bold; }}
|
| 408 |
+
.container {{ max-width: 1000px; margin: 0 auto; }}
|
| 409 |
.model-list {{ background: #f5f5f5; padding: 15px; border-radius: 5px; }}
|
|
|
|
|
|
|
|
|
|
| 410 |
</style>
|
| 411 |
</head>
|
| 412 |
<body>
|
| 413 |
<div class="container">
|
| 414 |
+
<h1>π€ Smart Model Warmer</h1>
|
|
|
|
| 415 |
|
| 416 |
+
<div style="display: grid; grid-template-columns: 1fr 1fr 1fr; gap: 20px; margin-bottom: 30px;">
|
| 417 |
<div style="background: #e8f5e8; padding: 15px; border-radius: 8px;">
|
| 418 |
<h3>π Regular Servers</h3>
|
| 419 |
<p><strong>{regular_success}/{len(regular_servers)} OK</strong></p>
|
|
|
|
| 429 |
</div>
|
| 430 |
|
| 431 |
<div class="model-list">
|
| 432 |
+
<h3>Model Status</h3>
|
| 433 |
<ul>
|
| 434 |
{model_status_html if model_statuses else "<li>No model data yet</li>"}
|
| 435 |
</ul>
|
|
|
|
| 437 |
|
| 438 |
<p><strong>Last Model Check:</strong> {last_model_warmup.strftime('%Y-%m-%d %H:%M:%S') if last_model_warmup else 'Never'}</p>
|
| 439 |
<p><strong>Next check in:</strong> ~5 minutes</p>
|
|
|
|
| 440 |
</div>
|
| 441 |
</body>
|
| 442 |
</html>
|
|
|
|
| 448 |
"""Health endpoint for other pingers"""
|
| 449 |
return JSONResponse({
|
| 450 |
"status": "healthy",
|
| 451 |
+
"service": "smart-model-warmer",
|
| 452 |
"regular_servers": len(regular_servers),
|
| 453 |
"ai_models": len(models_to_warm),
|
|
|
|
| 454 |
"last_model_warmup": last_model_warmup.isoformat() if last_model_warmup else None,
|
| 455 |
"timestamp": datetime.now().isoformat()
|
| 456 |
})
|
|
|
|
| 477 |
"timestamp": datetime.now().isoformat()
|
| 478 |
})
|
| 479 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 480 |
if __name__ == "__main__":
|
| 481 |
import uvicorn
|
| 482 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|