Rajhuggingface4253 commited on
Commit
908cd1e
Β·
verified Β·
1 Parent(s): d1ef9f8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +182 -80
app.py CHANGED
@@ -16,50 +16,50 @@ pinger_spaces = [
16
  "https://rajhuggingface4253-ping2.hf.space",
17
  ]
18
 
19
- # Regular servers to ping
20
  regular_servers = [
21
  "https://rajhuggingface4253-backend-compressorpro.hf.space",
22
  "https://rajhuggingface4253-backend-compressorpro2.hf.space",
23
  "https://rajhuggingface4253-compressor3pro.hf.space"
24
  ]
25
 
26
- # Chat models that need warmup messages
27
- chat_models = [
28
  {
 
29
  "url": "https://rajhuggingface4253-qwen.hf.space",
30
- "api_endpoint": "/chat", # Adjust based on your API
31
- "warmup_message": "Say 'active' in one word?",
32
- "type": "qwen"
33
  },
34
  {
35
- "url": "https://rajhuggingface4253-qwen2.hf.space",
36
- "api_endpoint": "/chat",
37
- "warmup_message": "Say 'active' in one word",
38
- "type": "qwen2"
39
  },
40
  {
 
41
  "url": "https://rajhuggingface4253-qwen3.hf.space",
42
- "api_endpoint": "/chat",
43
- "warmup_message": "Just say OK",
44
- "type": "qwen3"
45
  },
46
  {
47
- "url": "https://rajhuggingface4253-koko.hf.space",
48
- "api_endpoint": "/tts",
49
- "warmup_message": "Active",
50
- "type": "tts"
51
  }
52
  ]
53
 
54
  # Global state
55
  ping_results: Dict[str, Dict] = {}
56
- chat_warmup_results: Dict[str, Dict] = {}
57
  health_results: Dict[str, Dict] = {}
58
  last_ping_run: datetime = None
59
- last_chat_warmup: datetime = None
60
 
61
  async def ping_server(url: str) -> Dict:
62
- """Ping a regular server"""
63
  try:
64
  start_time = time.time()
65
  async with httpx.AsyncClient(timeout=10.0) as client:
@@ -79,43 +79,65 @@ async def ping_server(url: str) -> Dict:
79
  'timestamp': datetime.now().isoformat()
80
  }
81
 
82
- async def warmup_chat_model(model_config: Dict) -> Dict:
83
- """Send a warmup message to a chat model"""
84
  try:
85
  start_time = time.time()
86
- async with httpx.AsyncClient(timeout=30.0) as client:
87
- if model_config["type"].startswith("qwen"):
88
- payload = {
89
- "message": model_config["warmup_message"],
90
- "max_tokens": 10
91
- }
92
- else:
93
- payload = {
94
- "messages": [{"role": "user", "content": model_config["warmup_message"]}],
95
- "max_tokens": 10
96
- }
97
-
98
- api_url = f"{model_config['url']}{model_config['api_endpoint']}"
99
- response = await client.post(
100
- api_url,
101
- json=payload,
102
- headers={"Content-Type": "application/json"}
103
- )
104
 
 
 
105
  response_time = round((time.time() - start_time) * 1000, 1)
106
 
107
- return {
108
- 'status': 'success',
109
- 'response_time_ms': response_time,
110
- 'status_code': response.status_code,
111
- 'response_preview': str(response.text)[:100],
112
- 'timestamp': datetime.now().isoformat()
113
- }
114
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  except Exception as e:
116
  return {
117
  'status': 'error',
118
  'error': str(e),
 
 
 
 
119
  'timestamp': datetime.now().isoformat()
120
  }
121
 
@@ -139,23 +161,34 @@ async def ping_all_servers():
139
 
140
  return results
141
 
142
- async def warmup_all_chat_models():
143
- """Warm up all chat models with actual messages"""
144
- global chat_warmup_results, last_chat_warmup
145
 
146
- if not chat_models:
147
  return []
148
 
149
- tasks = [warmup_chat_model(model) for model in chat_models]
150
  results = await asyncio.gather(*tasks)
151
 
152
- for i, model in enumerate(chat_models):
153
- chat_warmup_results[model['url']] = results[i]
 
 
 
 
 
 
 
 
 
 
154
 
155
- last_chat_warmup = datetime.now()
156
 
 
157
  success_count = sum(1 for result in results if result['status'] == 'success')
158
- print(f"πŸ€– {datetime.now().strftime('%H:%M:%S')} - Chat models: {success_count}/{len(chat_models)} Warmed up")
159
 
160
  return results
161
 
@@ -184,20 +217,22 @@ async def ping_single_health(client: httpx.AsyncClient, space_url: str, health_u
184
  'status_code': response.status_code,
185
  'last_ping': datetime.now().isoformat()
186
  }
 
187
  except Exception as e:
188
  health_results[space_url] = {
189
  'status': 'error',
190
  'error': str(e),
191
  'last_ping': datetime.now().isoformat()
192
  }
 
193
 
194
  async def continuous_pinging():
195
- """Main pinging loop with chat model warming"""
196
- print("πŸš€ Chat Model Warmer Started!")
197
  print(f"🌐 Regular servers: {len(regular_servers)}")
198
- print(f"πŸ€– Chat models: {len(chat_models)}")
199
  print(f"πŸ”— Pinger network: {len(pinger_spaces)}")
200
- print("⏰ Chat warmup every 5 minutes")
201
 
202
  last_health_check = 0
203
 
@@ -207,49 +242,100 @@ async def continuous_pinging():
207
  if regular_servers:
208
  await ping_all_servers()
209
 
210
- # Warm up chat models (most important!)
211
- if chat_models:
212
- await warmup_all_chat_models()
213
 
214
- # Ping health endpoints every 30 minutes (FIXED: use the defined constant)
215
  current_time = time.time()
216
  if current_time - last_health_check >= HEALTH_CHECK_INTERVAL and pinger_spaces:
 
217
  await ping_health_endpoints()
218
  last_health_check = current_time
219
 
220
  await asyncio.sleep(PING_INTERVAL)
221
 
222
  except Exception as e:
223
- print(f"❌ Error: {e}")
224
  await asyncio.sleep(60)
225
 
226
  @asynccontextmanager
227
  async def lifespan(app: FastAPI):
228
  # Startup
229
- print("Starting up Chat Model Warmer...")
230
  asyncio.create_task(continuous_pinging())
231
  yield
232
  # Shutdown
233
  print("Shutting down...")
234
 
235
- app = FastAPI(title="Chat Model Warmer", lifespan=lifespan)
236
 
237
  @app.get("/", response_class=HTMLResponse)
238
  async def home():
239
- """Minimal dashboard"""
240
  regular_success = sum(1 for r in ping_results.values() if r.get('status') == 'success')
241
- chat_success = sum(1 for r in chat_warmup_results.values() if r.get('status') == 'success')
242
  health_success = sum(1 for r in health_results.values() if r.get('status') == 'success')
243
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
  html_content = f"""
245
  <html>
246
- <head><title>Chat Model Warmer</title></head>
 
 
 
 
 
 
 
 
 
247
  <body>
248
- <h1>πŸ€– Chat Model Warmer</h1>
249
- <p><strong>Regular Servers:</strong> {regular_success}/{len(regular_servers)} OK</p>
250
- <p><strong>Chat Models:</strong> {chat_success}/{len(chat_models)} Warmed up</p>
251
- <p><strong>Last Chat Warmup:</strong> {last_chat_warmup.strftime('%H:%M:%S') if last_chat_warmup else 'Never'}</p>
252
- <p><strong>Network:</strong> {health_success}/{len(pinger_spaces)} OK</p>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
253
  </body>
254
  </html>
255
  """
@@ -257,19 +343,35 @@ async def home():
257
 
258
  @app.get("/health")
259
  async def health():
 
260
  return JSONResponse({
261
  "status": "healthy",
262
- "service": "chat-model-warmer",
263
  "regular_servers": len(regular_servers),
264
- "chat_models": len(chat_models),
265
- "last_chat_warmup": last_chat_warmup.isoformat() if last_chat_warmup else None
 
266
  })
267
 
268
  @app.get("/status")
269
  async def status():
 
270
  return JSONResponse({
271
  "regular_servers": ping_results,
272
- "chat_models": chat_warmup_results,
 
 
 
 
 
 
 
 
 
 
 
 
 
273
  "timestamp": datetime.now().isoformat()
274
  })
275
 
 
16
  "https://rajhuggingface4253-ping2.hf.space",
17
  ]
18
 
19
+ # Regular servers to ping (HTTP GET)
20
  regular_servers = [
21
  "https://rajhuggingface4253-backend-compressorpro.hf.space",
22
  "https://rajhuggingface4253-backend-compressorpro2.hf.space",
23
  "https://rajhuggingface4253-compressor3pro.hf.space"
24
  ]
25
 
26
+ # Models to warm using health endpoints
27
+ models_to_warm = [
28
  {
29
+ "name": "Qwen 1",
30
  "url": "https://rajhuggingface4253-qwen.hf.space",
31
+ "endpoint": "/health",
32
+ "type": "health_check"
 
33
  },
34
  {
35
+ "name": "Qwen 2",
36
+ "url": "https://rajhuggingface4253-qwen2.hf.space",
37
+ "endpoint": "/health",
38
+ "type": "health_check"
39
  },
40
  {
41
+ "name": "Qwen 3",
42
  "url": "https://rajhuggingface4253-qwen3.hf.space",
43
+ "endpoint": "/health",
44
+ "type": "health_check"
 
45
  },
46
  {
47
+ "name": "Kokoro TTS",
48
+ "url": "https://rajhuggingface4253-koko.hf.space",
49
+ "endpoint": "/health", # Assuming it has a health endpoint
50
+ "type": "health_check"
51
  }
52
  ]
53
 
54
  # Global state
55
  ping_results: Dict[str, Dict] = {}
56
+ model_warmup_results: Dict[str, Dict] = {}
57
  health_results: Dict[str, Dict] = {}
58
  last_ping_run: datetime = None
59
+ last_model_warmup: datetime = None
60
 
61
  async def ping_server(url: str) -> Dict:
62
+ """Ping a regular server with HTTP GET"""
63
  try:
64
  start_time = time.time()
65
  async with httpx.AsyncClient(timeout=10.0) as client:
 
79
  'timestamp': datetime.now().isoformat()
80
  }
81
 
82
+ async def warmup_model_health(model_config: Dict) -> Dict:
83
+ """Warm up a model by calling its health endpoint"""
84
  try:
85
  start_time = time.time()
86
+ async with httpx.AsyncClient(timeout=15.0) as client:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
+ api_url = f"{model_config['url']}{model_config['endpoint']}"
89
+ response = await client.get(api_url)
90
  response_time = round((time.time() - start_time) * 1000, 1)
91
 
92
+ # Check if health endpoint returned successful status
93
+ if response.status_code == 200:
94
+ try:
95
+ # Try to parse JSON response
96
+ health_data = response.json()
97
+ model_status = health_data.get('status', 'unknown')
98
+ model_name = health_data.get('model', 'Unknown')
99
+
100
+ is_healthy = model_status in ['ok', 'healthy', 'ready']
101
+
102
+ return {
103
+ 'status': 'success' if is_healthy else 'error',
104
+ 'response_time_ms': response_time,
105
+ 'status_code': response.status_code,
106
+ 'model_status': model_status,
107
+ 'model_name': model_name,
108
+ 'got_response': True,
109
+ 'timestamp': datetime.now().isoformat()
110
+ }
111
+ except:
112
+ # If JSON parsing fails but status is 200, consider it successful
113
+ return {
114
+ 'status': 'success',
115
+ 'response_time_ms': response_time,
116
+ 'status_code': response.status_code,
117
+ 'model_status': 'ok',
118
+ 'model_name': 'Unknown',
119
+ 'got_response': True,
120
+ 'timestamp': datetime.now().isoformat()
121
+ }
122
+ else:
123
+ return {
124
+ 'status': 'error',
125
+ 'response_time_ms': response_time,
126
+ 'status_code': response.status_code,
127
+ 'model_status': 'unhealthy',
128
+ 'model_name': 'Unknown',
129
+ 'got_response': False,
130
+ 'timestamp': datetime.now().isoformat()
131
+ }
132
+
133
  except Exception as e:
134
  return {
135
  'status': 'error',
136
  'error': str(e),
137
+ 'response_time_ms': round((time.time() - start_time) * 1000, 1),
138
+ 'model_status': 'connection_failed',
139
+ 'model_name': 'Unknown',
140
+ 'got_response': False,
141
  'timestamp': datetime.now().isoformat()
142
  }
143
 
 
161
 
162
  return results
163
 
164
+ async def warmup_all_models():
165
+ """Warm up all models using health endpoints"""
166
+ global model_warmup_results, last_model_warmup
167
 
168
+ if not models_to_warm:
169
  return []
170
 
171
+ tasks = [warmup_model_health(model) for model in models_to_warm]
172
  results = await asyncio.gather(*tasks)
173
 
174
+ for i, model in enumerate(models_to_warm):
175
+ model_warmup_results[model['url']] = {
176
+ 'model_info': model,
177
+ 'health_check': results[i]
178
+ }
179
+
180
+ # Log detailed results
181
+ result = results[i]
182
+ if result['status'] == 'success' and result.get('got_response'):
183
+ print(f"βœ… {model['name']}: {result['response_time_ms']}ms - {result['model_name']} ({result['model_status']})")
184
+ else:
185
+ print(f"❌ {model['name']}: {result.get('error', 'Health check failed')}")
186
 
187
+ last_model_warmup = datetime.now()
188
 
189
+ # Count as success only if health check passed
190
  success_count = sum(1 for result in results if result['status'] == 'success')
191
+ print(f"πŸ€– {datetime.now().strftime('%H:%M:%S')} - Models: {success_count}/{len(models_to_warm)} Healthy")
192
 
193
  return results
194
 
 
217
  'status_code': response.status_code,
218
  'last_ping': datetime.now().isoformat()
219
  }
220
+ print(f"πŸ”— Health ping to {space_url}: {response_time}ms")
221
  except Exception as e:
222
  health_results[space_url] = {
223
  'status': 'error',
224
  'error': str(e),
225
  'last_ping': datetime.now().isoformat()
226
  }
227
+ print(f"πŸ”— Health ping failed for {space_url}: {e}")
228
 
229
  async def continuous_pinging():
230
+ """Main pinging loop with model health checking"""
231
+ print("πŸš€ Smart Model Warmer Started!")
232
  print(f"🌐 Regular servers: {len(regular_servers)}")
233
+ print(f"πŸ€– Models to warm: {len(models_to_warm)}")
234
  print(f"πŸ”— Pinger network: {len(pinger_spaces)}")
235
+ print("⏰ Health checks every 5 minutes")
236
 
237
  last_health_check = 0
238
 
 
242
  if regular_servers:
243
  await ping_all_servers()
244
 
245
+ # Warm up models using health endpoints
246
+ if models_to_warm:
247
+ await warmup_all_models()
248
 
249
+ # Ping health endpoints every 30 minutes
250
  current_time = time.time()
251
  if current_time - last_health_check >= HEALTH_CHECK_INTERVAL and pinger_spaces:
252
+ print("πŸ”„ Pinging other pinger spaces...")
253
  await ping_health_endpoints()
254
  last_health_check = current_time
255
 
256
  await asyncio.sleep(PING_INTERVAL)
257
 
258
  except Exception as e:
259
+ print(f"❌ Error in main loop: {e}")
260
  await asyncio.sleep(60)
261
 
262
  @asynccontextmanager
263
  async def lifespan(app: FastAPI):
264
  # Startup
265
+ print("Starting up Smart Model Warmer...")
266
  asyncio.create_task(continuous_pinging())
267
  yield
268
  # Shutdown
269
  print("Shutting down...")
270
 
271
+ app = FastAPI(title="Smart Model Warmer", lifespan=lifespan)
272
 
273
  @app.get("/", response_class=HTMLResponse)
274
  async def home():
275
+ """Dashboard showing warming status"""
276
  regular_success = sum(1 for r in ping_results.values() if r.get('status') == 'success')
277
+ model_success = sum(1 for r in model_warmup_results.values() if r['health_check'].get('status') == 'success')
278
  health_success = sum(1 for r in health_results.values() if r.get('status') == 'success')
279
 
280
+ # Get model details for display
281
+ model_statuses = []
282
+ for url, data in model_warmup_results.items():
283
+ model_info = data['model_info']
284
+ health = data['health_check']
285
+ model_statuses.append({
286
+ 'name': model_info['name'],
287
+ 'status': health['status'],
288
+ 'response_time': health.get('response_time_ms', 0),
289
+ 'model_name': health.get('model_name', 'Unknown')
290
+ })
291
+
292
+ model_status_html = "".join([
293
+ f"<li>{m['name']}: <span class={'success' if m['status'] == 'success' else 'error'}>{m['status']}</span> "
294
+ f"({m['response_time']}ms) - {m['model_name']}</li>"
295
+ for m in model_statuses
296
+ ])
297
+
298
  html_content = f"""
299
  <html>
300
+ <head>
301
+ <title>Smart Model Warmer</title>
302
+ <style>
303
+ body {{ font-family: Arial, sans-serif; margin: 40px; }}
304
+ .success {{ color: green; font-weight: bold; }}
305
+ .error {{ color: red; font-weight: bold; }}
306
+ .container {{ max-width: 1000px; margin: 0 auto; }}
307
+ .model-list {{ background: #f5f5f5; padding: 15px; border-radius: 5px; }}
308
+ </style>
309
+ </head>
310
  <body>
311
+ <div class="container">
312
+ <h1>πŸ€– Smart Model Warmer</h1>
313
+
314
+ <div style="display: grid; grid-template-columns: 1fr 1fr 1fr; gap: 20px; margin-bottom: 30px;">
315
+ <div style="background: #e8f5e8; padding: 15px; border-radius: 8px;">
316
+ <h3>🌐 Regular Servers</h3>
317
+ <p><strong>{regular_success}/{len(regular_servers)} OK</strong></p>
318
+ </div>
319
+ <div style="background: #e3f2fd; padding: 15px; border-radius: 8px;">
320
+ <h3>πŸ€– AI Models</h3>
321
+ <p><strong>{model_success}/{len(models_to_warm)} Healthy</strong></p>
322
+ </div>
323
+ <div style="background: #fff3e0; padding: 15px; border-radius: 8px;">
324
+ <h3>πŸ”— Pinger Network</h3>
325
+ <p><strong>{health_success}/{len(pinger_spaces)} OK</strong></p>
326
+ </div>
327
+ </div>
328
+
329
+ <div class="model-list">
330
+ <h3>Model Health Status</h3>
331
+ <ul>
332
+ {model_status_html if model_statuses else "<li>No model data yet</li>"}
333
+ </ul>
334
+ </div>
335
+
336
+ <p><strong>Last Model Check:</strong> {last_model_warmup.strftime('%Y-%m-%d %H:%M:%S') if last_model_warmup else 'Never'}</p>
337
+ <p><strong>Next check in:</strong> ~5 minutes</p>
338
+ </div>
339
  </body>
340
  </html>
341
  """
 
343
 
344
  @app.get("/health")
345
  async def health():
346
+ """Health endpoint for other pingers"""
347
  return JSONResponse({
348
  "status": "healthy",
349
+ "service": "smart-model-warmer",
350
  "regular_servers": len(regular_servers),
351
+ "ai_models": len(models_to_warm),
352
+ "last_model_warmup": last_model_warmup.isoformat() if last_model_warmup else None,
353
+ "timestamp": datetime.now().isoformat()
354
  })
355
 
356
  @app.get("/status")
357
  async def status():
358
+ """Detailed status endpoint"""
359
  return JSONResponse({
360
  "regular_servers": ping_results,
361
+ "ai_models": model_warmup_results,
362
+ "pinger_network": health_results,
363
+ "timestamp": datetime.now().isoformat()
364
+ })
365
+
366
+ @app.get("/ping-now")
367
+ async def ping_now():
368
+ """Manually trigger immediate warming"""
369
+ results = await warmup_all_models()
370
+ success_count = sum(1 for result in results if result['status'] == 'success')
371
+
372
+ return JSONResponse({
373
+ "message": "Manual warming completed",
374
+ "models_healthy": f"{success_count}/{len(models_to_warm)}",
375
  "timestamp": datetime.now().isoformat()
376
  })
377