Rajhuggingface4253 commited on
Commit
098eb43
Β·
verified Β·
1 Parent(s): 8dbef7a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +169 -195
app.py CHANGED
@@ -16,50 +16,50 @@ pinger_spaces = [
16
  "https://rajhuggingface4253-ping.hf.space",
17
  ]
18
 
19
- # Regular servers to ping
20
  regular_servers = [
21
  "https://rajhuggingface4253-backend-compressorpro.hf.space",
22
  "https://rajhuggingface4253-backend-compressorpro2.hf.space",
23
  "https://rajhuggingface4253-compressor3pro.hf.space"
24
  ]
25
 
26
- # Chat models that need warmup messages
27
- chat_models = [
28
  {
 
29
  "url": "https://rajhuggingface4253-qwen.hf.space",
30
- "api_endpoint": "/chat",
31
- "warmup_message": "Say 'active' in one word?",
32
- "type": "qwen"
33
  },
34
  {
35
- "url": "https://rajhuggingface4253-qwen2.hf.space",
36
- "api_endpoint": "/chat",
37
- "warmup_message": "Say 'active' in one word",
38
- "type": "qwen2"
39
  },
40
  {
 
41
  "url": "https://rajhuggingface4253-qwen3.hf.space",
42
- "api_endpoint": "/chat",
43
- "warmup_message": "Just say OK",
44
- "type": "qwen3"
45
  },
46
  {
47
- "url": "https://rajhuggingface4253-koko.hf.space",
48
- "api_endpoint": "/tts",
49
- "warmup_message": "Active",
50
- "type": "tts"
51
  }
52
  ]
53
 
54
  # Global state
55
  ping_results: Dict[str, Dict] = {}
56
- chat_warmup_results: Dict[str, Dict] = {}
57
  health_results: Dict[str, Dict] = {}
58
  last_ping_run: datetime = None
59
- last_chat_warmup: datetime = None
60
 
61
  async def ping_server(url: str) -> Dict:
62
- """Ping a regular server"""
63
  try:
64
  start_time = time.time()
65
  async with httpx.AsyncClient(timeout=10.0) as client:
@@ -79,110 +79,54 @@ async def ping_server(url: str) -> Dict:
79
  'timestamp': datetime.now().isoformat()
80
  }
81
 
82
- async def warmup_chat_model(model_config: Dict) -> Dict:
83
- """Send a warmup message to a chat model with streaming support"""
84
  try:
85
  start_time = time.time()
86
-
87
- # Try both streaming and non-streaming approaches
88
- async with httpx.AsyncClient(timeout=25.0) as client:
89
-
90
- # First try: Non-streaming request (preferred for warming)
91
- payload = {
92
- "messages": [{"role": "user", "content": model_config["warmup_message"]}],
93
- "max_tokens": 15,
94
- "stream": False # Explicitly disable streaming for warming
95
- }
96
-
97
- # Adjust payload for specific model types
98
- if model_config["type"].startswith("qwen"):
99
- payload = {
100
- "message": model_config["warmup_message"],
101
- "max_tokens": 15,
102
- "stream": False
103
- }
104
 
105
- api_url = f"{model_config['url']}{model_config['api_endpoint']}"
 
 
106
 
107
- try:
108
- response = await client.post(
109
- api_url,
110
- json=payload,
111
- headers={"Content-Type": "application/json"}
112
- )
113
-
114
- response_time = round((time.time() - start_time) * 1000, 1)
115
- response_text = await response.atext() if response.status_code == 200 else ""
116
-
117
- # Check if we actually got a meaningful response
118
- got_valid_response = len(response_text.strip()) > 0 and response.status_code == 200
119
-
120
- result = {
121
- 'status': 'success' if got_valid_response else 'error',
122
- 'response_time_ms': response_time,
123
- 'status_code': response.status_code,
124
- 'response_preview': response_text[:100] if got_valid_response else "No response",
125
- 'got_response': got_valid_response,
126
- 'method': 'non-streaming',
127
- 'timestamp': datetime.now().isoformat()
128
- }
129
-
130
- if got_valid_response:
131
- return result
132
 
133
- except (httpx.ReadTimeout, asyncio.TimeoutError):
134
- # Non-streaming failed, try streaming approach
135
- pass
136
-
137
- # Second try: Streaming approach (if non-streaming fails)
138
- print(f"πŸ”„ Trying streaming approach for {model_config['url']}")
139
- try:
140
- streaming_start = time.time()
141
- streaming_payload = payload.copy()
142
- streaming_payload["stream"] = True # Enable streaming
143
-
144
- async with client.stream(
145
- 'POST',
146
- api_url,
147
- json=streaming_payload,
148
- timeout=20.0
149
- ) as response:
150
-
151
- first_chunk_received = False
152
- full_response = ""
153
-
154
- async for line in response.aiter_lines():
155
- if line.strip() and 'data:' in line:
156
- first_chunk_received = True
157
- # Try to extract actual content from streaming format
158
- if '"content":"' in line:
159
- try:
160
- content_start = line.find('"content":"') + 10
161
- content_end = line.find('"', content_start)
162
- if content_end > content_start:
163
- content = line[content_start:content_end]
164
- full_response += content
165
- except:
166
- pass
167
- break # Got at least one chunk
168
-
169
- streaming_time = round((time.time() - streaming_start) * 1000, 1)
170
 
171
  return {
172
- 'status': 'success' if first_chunk_received else 'error',
173
- 'response_time_ms': streaming_time,
174
  'status_code': response.status_code,
175
- 'response_preview': full_response[:100] if full_response else "Streaming chunk received",
176
- 'got_response': first_chunk_received,
177
- 'method': 'streaming',
178
  'timestamp': datetime.now().isoformat()
179
  }
180
-
181
- except Exception as stream_error:
 
 
 
 
 
 
 
 
 
 
182
  return {
183
  'status': 'error',
184
- 'error': f"Both methods failed: {str(stream_error)}",
185
- 'response_time_ms': round((time.time() - start_time) * 1000, 1),
 
 
 
186
  'timestamp': datetime.now().isoformat()
187
  }
188
 
@@ -190,55 +134,13 @@ async def warmup_chat_model(model_config: Dict) -> Dict:
190
  return {
191
  'status': 'error',
192
  'error': str(e),
 
 
 
 
193
  'timestamp': datetime.now().isoformat()
194
  }
195
 
196
- async def warmup_tts_model(model_config: Dict) -> Dict:
197
- """Warm up TTS model with proper response handling"""
198
- try:
199
- start_time = time.time()
200
- async with httpx.AsyncClient(timeout=15.0) as client:
201
-
202
- payload = {
203
- "text": model_config["warmup_message"],
204
- "voice": "default"
205
- }
206
-
207
- api_url = f"{model_config['url']}{model_config['api_endpoint']}"
208
- response = await client.post(
209
- api_url,
210
- json=payload,
211
- headers={"Content-Type": "application/json"}
212
- )
213
-
214
- response_time = round((time.time() - start_time) * 1000, 1)
215
-
216
- # For TTS, success means we got any response (audio data or confirmation)
217
- success = response.status_code == 200 and len(response.content) > 0
218
-
219
- return {
220
- 'status': 'success' if success else 'error',
221
- 'response_time_ms': response_time,
222
- 'status_code': response.status_code,
223
- 'response_preview': f"TTS response: {len(response.content)} bytes" if success else "No audio data",
224
- 'got_response': success,
225
- 'timestamp': datetime.now().isoformat()
226
- }
227
-
228
- except Exception as e:
229
- return {
230
- 'status': 'error',
231
- 'error': str(e),
232
- 'timestamp': datetime.now().isoformat()
233
- }
234
-
235
- async def warmup_single_model(model_config: Dict) -> Dict:
236
- """Route to appropriate warming function based on model type"""
237
- if model_config["type"] == "tts":
238
- return await warmup_tts_model(model_config)
239
- else:
240
- return await warmup_chat_model(model_config)
241
-
242
  async def ping_all_servers():
243
  """Ping all regular servers"""
244
  global ping_results, last_ping_run
@@ -259,30 +161,34 @@ async def ping_all_servers():
259
 
260
  return results
261
 
262
- async def warmup_all_chat_models():
263
- """Warm up all chat models with actual messages and response verification"""
264
- global chat_warmup_results, last_chat_warmup
265
 
266
- if not chat_models:
267
  return []
268
 
269
- tasks = [warmup_single_model(model) for model in chat_models]
270
  results = await asyncio.gather(*tasks)
271
 
272
- for i, model in enumerate(chat_models):
273
- chat_warmup_results[model['url']] = results[i]
 
 
 
274
 
275
- # Log detailed results for debugging
276
- if results[i]['status'] == 'success' and results[i].get('got_response'):
277
- print(f"βœ… {model['url']}: {results[i]['response_time_ms']}ms - {results[i]['response_preview']}")
 
278
  else:
279
- print(f"❌ {model['url']}: {results[i].get('error', 'No response')}")
280
 
281
- last_chat_warmup = datetime.now()
282
 
283
- # Only count as success if we actually got a response
284
- success_count = sum(1 for result in results if result.get('got_response', False))
285
- print(f"πŸ€– {datetime.now().strftime('%H:%M:%S')} - Chat models: {success_count}/{len(chat_models)} Actually responded")
286
 
287
  return results
288
 
@@ -311,20 +217,22 @@ async def ping_single_health(client: httpx.AsyncClient, space_url: str, health_u
311
  'status_code': response.status_code,
312
  'last_ping': datetime.now().isoformat()
313
  }
 
314
  except Exception as e:
315
  health_results[space_url] = {
316
  'status': 'error',
317
  'error': str(e),
318
  'last_ping': datetime.now().isoformat()
319
  }
 
320
 
321
  async def continuous_pinging():
322
- """Main pinging loop with chat model warming"""
323
- print("πŸš€ Enhanced Chat Model Warmer Started!")
324
  print(f"🌐 Regular servers: {len(regular_servers)}")
325
- print(f"πŸ€– Chat models: {len(chat_models)}")
326
  print(f"πŸ”— Pinger network: {len(pinger_spaces)}")
327
- print("⏰ Warming with response verification every 5 minutes")
328
 
329
  last_health_check = 0
330
 
@@ -334,13 +242,14 @@ async def continuous_pinging():
334
  if regular_servers:
335
  await ping_all_servers()
336
 
337
- # Warm up chat models with response verification
338
- if chat_models:
339
- await warmup_all_chat_models()
340
 
341
  # Ping health endpoints every 30 minutes
342
  current_time = time.time()
343
  if current_time - last_health_check >= HEALTH_CHECK_INTERVAL and pinger_spaces:
 
344
  await ping_health_endpoints()
345
  last_health_check = current_time
346
 
@@ -353,31 +262,80 @@ async def continuous_pinging():
353
  @asynccontextmanager
354
  async def lifespan(app: FastAPI):
355
  # Startup
356
- print("Starting up Enhanced Chat Model Warmer...")
357
  asyncio.create_task(continuous_pinging())
358
  yield
359
  # Shutdown
360
  print("Shutting down...")
361
 
362
- app = FastAPI(title="Enhanced Chat Model Warmer", lifespan=lifespan)
363
 
364
  @app.get("/", response_class=HTMLResponse)
365
  async def home():
366
- """Minimal dashboard"""
367
  regular_success = sum(1 for r in ping_results.values() if r.get('status') == 'success')
368
- # Only count chat models that actually responded
369
- chat_success = sum(1 for r in chat_warmup_results.values() if r.get('got_response', False))
370
  health_success = sum(1 for r in health_results.values() if r.get('status') == 'success')
371
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
372
  html_content = f"""
373
  <html>
374
- <head><title>Enhanced Chat Model Warmer</title></head>
 
 
 
 
 
 
 
 
 
375
  <body>
376
- <h1>πŸ€– Enhanced Chat Model Warmer</h1>
377
- <p><strong>Regular Servers:</strong> {regular_success}/{len(regular_servers)} OK</p>
378
- <p><strong>Chat Models (Responded):</strong> {chat_success}/{len(chat_models)} Actually Warm</p>
379
- <p><strong>Last Chat Warmup:</strong> {last_chat_warmup.strftime('%H:%M:%S') if last_chat_warmup else 'Never'}</p>
380
- <p><strong>Network:</strong> {health_success}/{len(pinger_spaces)} OK</p>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
381
  </body>
382
  </html>
383
  """
@@ -385,19 +343,35 @@ async def home():
385
 
386
  @app.get("/health")
387
  async def health():
 
388
  return JSONResponse({
389
  "status": "healthy",
390
- "service": "enhanced-chat-model-warmer",
391
  "regular_servers": len(regular_servers),
392
- "chat_models": len(chat_models),
393
- "last_chat_warmup": last_chat_warmup.isoformat() if last_chat_warmup else None
 
394
  })
395
 
396
  @app.get("/status")
397
  async def status():
 
398
  return JSONResponse({
399
  "regular_servers": ping_results,
400
- "chat_models": chat_warmup_results,
 
 
 
 
 
 
 
 
 
 
 
 
 
401
  "timestamp": datetime.now().isoformat()
402
  })
403
 
 
16
  "https://rajhuggingface4253-ping.hf.space",
17
  ]
18
 
19
+ # Regular servers to ping (HTTP GET)
20
  regular_servers = [
21
  "https://rajhuggingface4253-backend-compressorpro.hf.space",
22
  "https://rajhuggingface4253-backend-compressorpro2.hf.space",
23
  "https://rajhuggingface4253-compressor3pro.hf.space"
24
  ]
25
 
26
+ # Models to warm using health endpoints
27
+ models_to_warm = [
28
  {
29
+ "name": "Qwen 1",
30
  "url": "https://rajhuggingface4253-qwen.hf.space",
31
+ "endpoint": "/health",
32
+ "type": "health_check"
 
33
  },
34
  {
35
+ "name": "Qwen 2",
36
+ "url": "https://rajhuggingface4253-qwen2.hf.space",
37
+ "endpoint": "/health",
38
+ "type": "health_check"
39
  },
40
  {
41
+ "name": "Qwen 3",
42
  "url": "https://rajhuggingface4253-qwen3.hf.space",
43
+ "endpoint": "/health",
44
+ "type": "health_check"
 
45
  },
46
  {
47
+ "name": "Kokoro TTS",
48
+ "url": "https://rajhuggingface4253-koko.hf.space",
49
+ "endpoint": "/health", # Assuming it has a health endpoint
50
+ "type": "health_check"
51
  }
52
  ]
53
 
54
  # Global state
55
  ping_results: Dict[str, Dict] = {}
56
+ model_warmup_results: Dict[str, Dict] = {}
57
  health_results: Dict[str, Dict] = {}
58
  last_ping_run: datetime = None
59
+ last_model_warmup: datetime = None
60
 
61
  async def ping_server(url: str) -> Dict:
62
+ """Ping a regular server with HTTP GET"""
63
  try:
64
  start_time = time.time()
65
  async with httpx.AsyncClient(timeout=10.0) as client:
 
79
  'timestamp': datetime.now().isoformat()
80
  }
81
 
82
+ async def warmup_model_health(model_config: Dict) -> Dict:
83
+ """Warm up a model by calling its health endpoint"""
84
  try:
85
  start_time = time.time()
86
+ async with httpx.AsyncClient(timeout=15.0) as client:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
+ api_url = f"{model_config['url']}{model_config['endpoint']}"
89
+ response = await client.get(api_url)
90
+ response_time = round((time.time() - start_time) * 1000, 1)
91
 
92
+ # Check if health endpoint returned successful status
93
+ if response.status_code == 200:
94
+ try:
95
+ # Try to parse JSON response
96
+ health_data = response.json()
97
+ model_status = health_data.get('status', 'unknown')
98
+ model_name = health_data.get('model', 'Unknown')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
 
100
+ is_healthy = model_status in ['ok', 'healthy', 'ready']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
  return {
103
+ 'status': 'success' if is_healthy else 'error',
104
+ 'response_time_ms': response_time,
105
  'status_code': response.status_code,
106
+ 'model_status': model_status,
107
+ 'model_name': model_name,
108
+ 'got_response': True,
109
  'timestamp': datetime.now().isoformat()
110
  }
111
+ except:
112
+ # If JSON parsing fails but status is 200, consider it successful
113
+ return {
114
+ 'status': 'success',
115
+ 'response_time_ms': response_time,
116
+ 'status_code': response.status_code,
117
+ 'model_status': 'ok',
118
+ 'model_name': 'Unknown',
119
+ 'got_response': True,
120
+ 'timestamp': datetime.now().isoformat()
121
+ }
122
+ else:
123
  return {
124
  'status': 'error',
125
+ 'response_time_ms': response_time,
126
+ 'status_code': response.status_code,
127
+ 'model_status': 'unhealthy',
128
+ 'model_name': 'Unknown',
129
+ 'got_response': False,
130
  'timestamp': datetime.now().isoformat()
131
  }
132
 
 
134
  return {
135
  'status': 'error',
136
  'error': str(e),
137
+ 'response_time_ms': round((time.time() - start_time) * 1000, 1),
138
+ 'model_status': 'connection_failed',
139
+ 'model_name': 'Unknown',
140
+ 'got_response': False,
141
  'timestamp': datetime.now().isoformat()
142
  }
143
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  async def ping_all_servers():
145
  """Ping all regular servers"""
146
  global ping_results, last_ping_run
 
161
 
162
  return results
163
 
164
+ async def warmup_all_models():
165
+ """Warm up all models using health endpoints"""
166
+ global model_warmup_results, last_model_warmup
167
 
168
+ if not models_to_warm:
169
  return []
170
 
171
+ tasks = [warmup_model_health(model) for model in models_to_warm]
172
  results = await asyncio.gather(*tasks)
173
 
174
+ for i, model in enumerate(models_to_warm):
175
+ model_warmup_results[model['url']] = {
176
+ 'model_info': model,
177
+ 'health_check': results[i]
178
+ }
179
 
180
+ # Log detailed results
181
+ result = results[i]
182
+ if result['status'] == 'success' and result.get('got_response'):
183
+ print(f"βœ… {model['name']}: {result['response_time_ms']}ms - {result['model_name']} ({result['model_status']})")
184
  else:
185
+ print(f"❌ {model['name']}: {result.get('error', 'Health check failed')}")
186
 
187
+ last_model_warmup = datetime.now()
188
 
189
+ # Count as success only if health check passed
190
+ success_count = sum(1 for result in results if result['status'] == 'success')
191
+ print(f"πŸ€– {datetime.now().strftime('%H:%M:%S')} - Models: {success_count}/{len(models_to_warm)} Healthy")
192
 
193
  return results
194
 
 
217
  'status_code': response.status_code,
218
  'last_ping': datetime.now().isoformat()
219
  }
220
+ print(f"πŸ”— Health ping to {space_url}: {response_time}ms")
221
  except Exception as e:
222
  health_results[space_url] = {
223
  'status': 'error',
224
  'error': str(e),
225
  'last_ping': datetime.now().isoformat()
226
  }
227
+ print(f"πŸ”— Health ping failed for {space_url}: {e}")
228
 
229
  async def continuous_pinging():
230
+ """Main pinging loop with model health checking"""
231
+ print("πŸš€ Smart Model Warmer Started!")
232
  print(f"🌐 Regular servers: {len(regular_servers)}")
233
+ print(f"πŸ€– Models to warm: {len(models_to_warm)}")
234
  print(f"πŸ”— Pinger network: {len(pinger_spaces)}")
235
+ print("⏰ Health checks every 5 minutes")
236
 
237
  last_health_check = 0
238
 
 
242
  if regular_servers:
243
  await ping_all_servers()
244
 
245
+ # Warm up models using health endpoints
246
+ if models_to_warm:
247
+ await warmup_all_models()
248
 
249
  # Ping health endpoints every 30 minutes
250
  current_time = time.time()
251
  if current_time - last_health_check >= HEALTH_CHECK_INTERVAL and pinger_spaces:
252
+ print("πŸ”„ Pinging other pinger spaces...")
253
  await ping_health_endpoints()
254
  last_health_check = current_time
255
 
 
262
  @asynccontextmanager
263
  async def lifespan(app: FastAPI):
264
  # Startup
265
+ print("Starting up Smart Model Warmer...")
266
  asyncio.create_task(continuous_pinging())
267
  yield
268
  # Shutdown
269
  print("Shutting down...")
270
 
271
+ app = FastAPI(title="Smart Model Warmer", lifespan=lifespan)
272
 
273
  @app.get("/", response_class=HTMLResponse)
274
  async def home():
275
+ """Dashboard showing warming status"""
276
  regular_success = sum(1 for r in ping_results.values() if r.get('status') == 'success')
277
+ model_success = sum(1 for r in model_warmup_results.values() if r['health_check'].get('status') == 'success')
 
278
  health_success = sum(1 for r in health_results.values() if r.get('status') == 'success')
279
 
280
+ # Get model details for display
281
+ model_statuses = []
282
+ for url, data in model_warmup_results.items():
283
+ model_info = data['model_info']
284
+ health = data['health_check']
285
+ model_statuses.append({
286
+ 'name': model_info['name'],
287
+ 'status': health['status'],
288
+ 'response_time': health.get('response_time_ms', 0),
289
+ 'model_name': health.get('model_name', 'Unknown')
290
+ })
291
+
292
+ model_status_html = "".join([
293
+ f"<li>{m['name']}: <span class={'success' if m['status'] == 'success' else 'error'}>{m['status']}</span> "
294
+ f"({m['response_time']}ms) - {m['model_name']}</li>"
295
+ for m in model_statuses
296
+ ])
297
+
298
  html_content = f"""
299
  <html>
300
+ <head>
301
+ <title>Smart Model Warmer</title>
302
+ <style>
303
+ body {{ font-family: Arial, sans-serif; margin: 40px; }}
304
+ .success {{ color: green; font-weight: bold; }}
305
+ .error {{ color: red; font-weight: bold; }}
306
+ .container {{ max-width: 1000px; margin: 0 auto; }}
307
+ .model-list {{ background: #f5f5f5; padding: 15px; border-radius: 5px; }}
308
+ </style>
309
+ </head>
310
  <body>
311
+ <div class="container">
312
+ <h1>πŸ€– Smart Model Warmer</h1>
313
+
314
+ <div style="display: grid; grid-template-columns: 1fr 1fr 1fr; gap: 20px; margin-bottom: 30px;">
315
+ <div style="background: #e8f5e8; padding: 15px; border-radius: 8px;">
316
+ <h3>🌐 Regular Servers</h3>
317
+ <p><strong>{regular_success}/{len(regular_servers)} OK</strong></p>
318
+ </div>
319
+ <div style="background: #e3f2fd; padding: 15px; border-radius: 8px;">
320
+ <h3>πŸ€– AI Models</h3>
321
+ <p><strong>{model_success}/{len(models_to_warm)} Healthy</strong></p>
322
+ </div>
323
+ <div style="background: #fff3e0; padding: 15px; border-radius: 8px;">
324
+ <h3>πŸ”— Pinger Network</h3>
325
+ <p><strong>{health_success}/{len(pinger_spaces)} OK</strong></p>
326
+ </div>
327
+ </div>
328
+
329
+ <div class="model-list">
330
+ <h3>Model Health Status</h3>
331
+ <ul>
332
+ {model_status_html if model_statuses else "<li>No model data yet</li>"}
333
+ </ul>
334
+ </div>
335
+
336
+ <p><strong>Last Model Check:</strong> {last_model_warmup.strftime('%Y-%m-%d %H:%M:%S') if last_model_warmup else 'Never'}</p>
337
+ <p><strong>Next check in:</strong> ~5 minutes</p>
338
+ </div>
339
  </body>
340
  </html>
341
  """
 
343
 
344
  @app.get("/health")
345
  async def health():
346
+ """Health endpoint for other pingers"""
347
  return JSONResponse({
348
  "status": "healthy",
349
+ "service": "smart-model-warmer",
350
  "regular_servers": len(regular_servers),
351
+ "ai_models": len(models_to_warm),
352
+ "last_model_warmup": last_model_warmup.isoformat() if last_model_warmup else None,
353
+ "timestamp": datetime.now().isoformat()
354
  })
355
 
356
  @app.get("/status")
357
  async def status():
358
+ """Detailed status endpoint"""
359
  return JSONResponse({
360
  "regular_servers": ping_results,
361
+ "ai_models": model_warmup_results,
362
+ "pinger_network": health_results,
363
+ "timestamp": datetime.now().isoformat()
364
+ })
365
+
366
+ @app.get("/ping-now")
367
+ async def ping_now():
368
+ """Manually trigger immediate warming"""
369
+ results = await warmup_all_models()
370
+ success_count = sum(1 for result in results if result['status'] == 'success')
371
+
372
+ return JSONResponse({
373
+ "message": "Manual warming completed",
374
+ "models_healthy": f"{success_count}/{len(models_to_warm)}",
375
  "timestamp": datetime.now().isoformat()
376
  })
377