Rajhuggingface4253 commited on
Commit
a3f3af9
Β·
verified Β·
1 Parent(s): 1eda88b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -93
app.py CHANGED
@@ -4,9 +4,8 @@ import httpx
4
  import asyncio
5
  import time
6
  from datetime import datetime
7
- from typing import Dict, List, Optional
8
  from contextlib import asynccontextmanager
9
- import json
10
 
11
  # Configuration
12
  PING_INTERVAL = 300 # 5 minutes
@@ -24,39 +23,54 @@ regular_servers = [
24
  "https://rajhuggingface4253-compressor3pro.hf.space"
25
  ]
26
 
27
- # Enhanced models configuration with streaming support
28
  models_to_warm = [
29
  {
30
  "name": "Qwen 1",
31
  "url": "https://rajhuggingface4253-qwen.hf.space",
32
  "endpoint": "/chat",
33
  "type": "streaming_chat",
34
- "warmup_prompt": "Hello, are you ready? Respond with just 'OK'.",
35
  "timeout": 30.0,
36
- "expected_response": "OK"
 
 
 
 
 
 
37
  },
38
  {
39
  "name": "Qwen 2",
40
  "url": "https://rajhuggingface4253-qwen2.hf.space",
41
  "endpoint": "/chat",
42
  "type": "streaming_chat",
43
- "warmup_prompt": "Say 'ready' if you're working.",
44
  "timeout": 30.0,
45
- "expected_response": "ready"
 
 
 
 
 
 
46
  },
47
  {
48
  "name": "Qwen 3",
49
  "url": "https://rajhuggingface4253-qwen3.hf.space",
50
  "endpoint": "/chat",
51
  "type": "streaming_chat",
52
- "warmup_prompt": "Ping! Respond with 'pong'.",
53
  "timeout": 30.0,
54
- "expected_response": "pong"
 
 
 
 
 
 
55
  },
56
  {
57
  "name": "Kokoro TTS",
58
  "url": "https://rajhuggingface4253-koko.hf.space",
59
- "endpoint": "/health", # TTS might still use health endpoint
60
  "type": "health_check",
61
  "timeout": 15.0
62
  }
@@ -91,23 +105,12 @@ async def ping_server(url: str) -> Dict:
91
  }
92
 
93
  async def warmup_streaming_model(model_config: Dict) -> Dict:
94
- """
95
- Warm up streaming models by calling their chat endpoints
96
- and reading the streaming response
97
- """
98
  try:
99
  start_time = time.time()
100
  async with httpx.AsyncClient(timeout=model_config.get('timeout', 30.0)) as client:
101
 
102
- # Prepare the request payload for chat endpoint
103
- payload = {
104
- "prompt": model_config['warmup_prompt'],
105
- "max_new_tokens": 50,
106
- "temperature": 0.1, # Low temperature for consistent responses
107
- "enable_code_execution": False,
108
- "enable_web_search": False
109
- }
110
-
111
  api_url = f"{model_config['url']}{model_config['endpoint']}"
112
 
113
  # Make the streaming request
@@ -133,15 +136,8 @@ async def warmup_streaming_model(model_config: Dict) -> Dict:
133
 
134
  # Check if we got a valid response
135
  if collected_response:
136
- # Check for expected response if specified
137
- expected_response = model_config.get('expected_response', '').lower()
138
- if expected_response and expected_response in collected_response.lower():
139
- status = 'success'
140
- else:
141
- status = 'success' # Still success if we got any response
142
-
143
  return {
144
- 'status': status,
145
  'response_time_ms': response_time,
146
  'status_code': response.status_code,
147
  'model_response': collected_response[:200] + "..." if len(collected_response) > 200 else collected_response,
@@ -177,7 +173,7 @@ async def warmup_streaming_model(model_config: Dict) -> Dict:
177
  }
178
 
179
  async def warmup_model_health(model_config: Dict) -> Dict:
180
- """Warm up models using health endpoints (for non-streaming models)"""
181
  try:
182
  start_time = time.time()
183
  async with httpx.AsyncClient(timeout=model_config.get('timeout', 15.0)) as client:
@@ -278,7 +274,7 @@ async def warmup_all_models():
278
  'health_check': results[i]
279
  }
280
 
281
- # Log detailed results
282
  result = results[i]
283
  model_name = model['name']
284
 
@@ -288,13 +284,8 @@ async def warmup_all_models():
288
  if result.get('response_length'):
289
  response_info += f" ({result['response_length']} chars)"
290
  print(f"βœ… {model_name}: {response_info}")
291
-
292
- # Log snippet of model response for streaming models
293
- if result.get('model_response'):
294
- snippet = result['model_response'].replace('\n', ' ')[:50]
295
- print(f" πŸ“ Response: {snippet}...")
296
  else:
297
- print(f"βœ… {model_name}: {result['response_time_ms']}ms (health check)")
298
  else:
299
  error_msg = result.get('error', 'Unknown error')
300
  print(f"❌ {model_name}: {error_msg}")
@@ -341,13 +332,11 @@ async def ping_single_health(client: httpx.AsyncClient, space_url: str, health_u
341
  print(f"πŸ”— Health ping failed for {space_url}: {e}")
342
 
343
  async def continuous_pinging():
344
- """Main pinging loop with enhanced model warming"""
345
- print("πŸš€ Enhanced Smart Model Warmer Started!")
346
  print(f"🌐 Regular servers: {len(regular_servers)}")
347
  print(f"πŸ€– Models to warm: {len(models_to_warm)}")
348
  print(f"πŸ”— Pinger network: {len(pinger_spaces)}")
349
- print("⏰ Health checks every 5 minutes")
350
- print("🎯 Using streaming endpoints for model warming")
351
 
352
  last_health_check = 0
353
 
@@ -357,14 +346,13 @@ async def continuous_pinging():
357
  if regular_servers:
358
  await ping_all_servers()
359
 
360
- # Warm up models using appropriate endpoints
361
  if models_to_warm:
362
  await warmup_all_models()
363
 
364
  # Ping health endpoints every 30 minutes
365
  current_time = time.time()
366
  if current_time - last_health_check >= HEALTH_CHECK_INTERVAL and pinger_spaces:
367
- print("πŸ”„ Pinging other pinger spaces...")
368
  await ping_health_endpoints()
369
  last_health_check = current_time
370
 
@@ -377,13 +365,12 @@ async def continuous_pinging():
377
  @asynccontextmanager
378
  async def lifespan(app: FastAPI):
379
  # Startup
380
- print("Starting up Enhanced Smart Model Warmer...")
381
  asyncio.create_task(continuous_pinging())
382
  yield
383
  # Shutdown
384
  print("Shutting down...")
385
 
386
- app = FastAPI(title="Enhanced Smart Model Warmer", lifespan=lifespan)
387
 
388
  @app.get("/", response_class=HTMLResponse)
389
  async def home():
@@ -397,57 +384,36 @@ async def home():
397
  for url, data in model_warmup_results.items():
398
  model_info = data['model_info']
399
  health = data['health_check']
400
-
401
- status_info = {
402
  'name': model_info['name'],
403
  'type': model_info.get('type', 'health_check'),
404
  'status': health['status'],
405
  'response_time': health.get('response_time_ms', 0),
406
  'error': health.get('error', '')
407
- }
408
-
409
- # Add response info for streaming models
410
- if health.get('got_response') and health.get('model_response'):
411
- status_info['response_snippet'] = health['model_response']
412
- status_info['response_length'] = health.get('response_length', 0)
413
-
414
- model_statuses.append(status_info)
415
 
416
  model_status_html = "".join([
417
- f"""
418
- <li>
419
- <strong>{m['name']}</strong> ({m['type']}):
420
- <span class="{'success' if m['status'] == 'success' else 'error'}">{m['status']}</span>
421
- ({m['response_time']}ms)
422
- {f" - {m['response_length']} chars" if m.get('response_length') else ''}
423
- {f"<br><small>Error: {m['error']}</small>" if m['error'] else ''}
424
- {f"<br><small>Snippet: {m.get('response_snippet', '')}</small>" if m.get('response_snippet') else ''}
425
- </li>
426
- """
427
  for m in model_statuses
428
  ])
429
 
430
  html_content = f"""
431
  <html>
432
  <head>
433
- <title>Enhanced Smart Model Warmer</title>
434
  <style>
435
  body {{ font-family: Arial, sans-serif; margin: 40px; }}
436
  .success {{ color: green; font-weight: bold; }}
437
  .error {{ color: red; font-weight: bold; }}
438
- .container {{ max-width: 1200px; margin: 0 auto; }}
439
  .model-list {{ background: #f5f5f5; padding: 15px; border-radius: 5px; }}
440
- .status-grid {{ display: grid; grid-template-columns: 1fr 1fr 1fr; gap: 20px; margin-bottom: 30px; }}
441
- .model-item {{ margin: 10px 0; padding: 10px; background: white; border-radius: 5px; }}
442
- small {{ color: #666; }}
443
  </style>
444
  </head>
445
  <body>
446
  <div class="container">
447
- <h1>πŸš€ Enhanced Smart Model Warmer</h1>
448
- <p><em>Now with streaming endpoint support for AI models</em></p>
449
 
450
- <div class="status-grid">
451
  <div style="background: #e8f5e8; padding: 15px; border-radius: 8px;">
452
  <h3>🌐 Regular Servers</h3>
453
  <p><strong>{regular_success}/{len(regular_servers)} OK</strong></p>
@@ -463,7 +429,7 @@ async def home():
463
  </div>
464
 
465
  <div class="model-list">
466
- <h3>Model Warming Status</h3>
467
  <ul>
468
  {model_status_html if model_statuses else "<li>No model data yet</li>"}
469
  </ul>
@@ -471,7 +437,6 @@ async def home():
471
 
472
  <p><strong>Last Model Check:</strong> {last_model_warmup.strftime('%Y-%m-%d %H:%M:%S') if last_model_warmup else 'Never'}</p>
473
  <p><strong>Next check in:</strong> ~5 minutes</p>
474
- <p><strong>Streaming endpoints:</strong> Using /chat for Qwen models</p>
475
  </div>
476
  </body>
477
  </html>
@@ -483,10 +448,9 @@ async def health():
483
  """Health endpoint for other pingers"""
484
  return JSONResponse({
485
  "status": "healthy",
486
- "service": "enhanced-smart-model-warmer",
487
  "regular_servers": len(regular_servers),
488
  "ai_models": len(models_to_warm),
489
- "streaming_models": sum(1 for m in models_to_warm if m.get('type') == 'streaming_chat'),
490
  "last_model_warmup": last_model_warmup.isoformat() if last_model_warmup else None,
491
  "timestamp": datetime.now().isoformat()
492
  })
@@ -513,20 +477,6 @@ async def ping_now():
513
  "timestamp": datetime.now().isoformat()
514
  })
515
 
516
- @app.get("/test-streaming/{model_index}")
517
- async def test_streaming(model_index: int = 0):
518
- """Test a specific streaming model"""
519
- if model_index >= len(models_to_warm):
520
- return JSONResponse({"error": "Model index out of range"}, status_code=400)
521
-
522
- model_config = models_to_warm[model_index]
523
- result = await warmup_single_model(model_config)
524
-
525
- return JSONResponse({
526
- "model": model_config['name'],
527
- "test_result": result
528
- })
529
-
530
  if __name__ == "__main__":
531
  import uvicorn
532
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
4
  import asyncio
5
  import time
6
  from datetime import datetime
7
+ from typing import Dict, List
8
  from contextlib import asynccontextmanager
 
9
 
10
  # Configuration
11
  PING_INTERVAL = 300 # 5 minutes
 
23
  "https://rajhuggingface4253-compressor3pro.hf.space"
24
  ]
25
 
26
+ # Models to warm with correct payloads
27
  models_to_warm = [
28
  {
29
  "name": "Qwen 1",
30
  "url": "https://rajhuggingface4253-qwen.hf.space",
31
  "endpoint": "/chat",
32
  "type": "streaming_chat",
 
33
  "timeout": 30.0,
34
+ "payload": {
35
+ "prompt": "Hello, are you ready? Respond with just 'OK'.",
36
+ "max_new_tokens": 50,
37
+ "temperature": 0.1,
38
+ "enable_code_execution": False,
39
+ "enable_web_search": False
40
+ }
41
  },
42
  {
43
  "name": "Qwen 2",
44
  "url": "https://rajhuggingface4253-qwen2.hf.space",
45
  "endpoint": "/chat",
46
  "type": "streaming_chat",
 
47
  "timeout": 30.0,
48
+ "payload": {
49
+ "prompt": "Say 'ready' if you're working.",
50
+ "max_new_tokens": 50,
51
+ "temperature": 0.1,
52
+ "enable_code_execution": False,
53
+ "enable_web_search": False
54
+ }
55
  },
56
  {
57
  "name": "Qwen 3",
58
  "url": "https://rajhuggingface4253-qwen3.hf.space",
59
  "endpoint": "/chat",
60
  "type": "streaming_chat",
 
61
  "timeout": 30.0,
62
+ "payload": {
63
+ "prompt": "Ping! Respond with 'pong'.",
64
+ "max_new_tokens": 50,
65
+ "temperature": 0.1,
66
+ "enable_code_execution": False,
67
+ "enable_web_search": False
68
+ }
69
  },
70
  {
71
  "name": "Kokoro TTS",
72
  "url": "https://rajhuggingface4253-koko.hf.space",
73
+ "endpoint": "/health",
74
  "type": "health_check",
75
  "timeout": 15.0
76
  }
 
105
  }
106
 
107
  async def warmup_streaming_model(model_config: Dict) -> Dict:
108
+ """Warm up streaming models by calling their chat endpoints"""
 
 
 
109
  try:
110
  start_time = time.time()
111
  async with httpx.AsyncClient(timeout=model_config.get('timeout', 30.0)) as client:
112
 
113
+ payload = model_config['payload']
 
 
 
 
 
 
 
 
114
  api_url = f"{model_config['url']}{model_config['endpoint']}"
115
 
116
  # Make the streaming request
 
136
 
137
  # Check if we got a valid response
138
  if collected_response:
 
 
 
 
 
 
 
139
  return {
140
+ 'status': 'success',
141
  'response_time_ms': response_time,
142
  'status_code': response.status_code,
143
  'model_response': collected_response[:200] + "..." if len(collected_response) > 200 else collected_response,
 
173
  }
174
 
175
  async def warmup_model_health(model_config: Dict) -> Dict:
176
+ """Warm up models using health endpoints"""
177
  try:
178
  start_time = time.time()
179
  async with httpx.AsyncClient(timeout=model_config.get('timeout', 15.0)) as client:
 
274
  'health_check': results[i]
275
  }
276
 
277
+ # Log results
278
  result = results[i]
279
  model_name = model['name']
280
 
 
284
  if result.get('response_length'):
285
  response_info += f" ({result['response_length']} chars)"
286
  print(f"βœ… {model_name}: {response_info}")
 
 
 
 
 
287
  else:
288
+ print(f"βœ… {model_name}: {result['response_time_ms']}ms")
289
  else:
290
  error_msg = result.get('error', 'Unknown error')
291
  print(f"❌ {model_name}: {error_msg}")
 
332
  print(f"πŸ”— Health ping failed for {space_url}: {e}")
333
 
334
  async def continuous_pinging():
335
+ """Main pinging loop with model warming"""
336
+ print("πŸš€ Smart Model Warmer Started!")
337
  print(f"🌐 Regular servers: {len(regular_servers)}")
338
  print(f"πŸ€– Models to warm: {len(models_to_warm)}")
339
  print(f"πŸ”— Pinger network: {len(pinger_spaces)}")
 
 
340
 
341
  last_health_check = 0
342
 
 
346
  if regular_servers:
347
  await ping_all_servers()
348
 
349
+ # Warm up models
350
  if models_to_warm:
351
  await warmup_all_models()
352
 
353
  # Ping health endpoints every 30 minutes
354
  current_time = time.time()
355
  if current_time - last_health_check >= HEALTH_CHECK_INTERVAL and pinger_spaces:
 
356
  await ping_health_endpoints()
357
  last_health_check = current_time
358
 
 
365
  @asynccontextmanager
366
  async def lifespan(app: FastAPI):
367
  # Startup
 
368
  asyncio.create_task(continuous_pinging())
369
  yield
370
  # Shutdown
371
  print("Shutting down...")
372
 
373
+ app = FastAPI(title="Smart Model Warmer", lifespan=lifespan)
374
 
375
  @app.get("/", response_class=HTMLResponse)
376
  async def home():
 
384
  for url, data in model_warmup_results.items():
385
  model_info = data['model_info']
386
  health = data['health_check']
387
+ model_statuses.append({
 
388
  'name': model_info['name'],
389
  'type': model_info.get('type', 'health_check'),
390
  'status': health['status'],
391
  'response_time': health.get('response_time_ms', 0),
392
  'error': health.get('error', '')
393
+ })
 
 
 
 
 
 
 
394
 
395
  model_status_html = "".join([
396
+ f"<li>{m['name']} ({m['type']}): <span class={'success' if m['status'] == 'success' else 'error'}>{m['status']}</span> ({m['response_time']}ms){f' - {m[\"error\"]}' if m['error'] else ''}</li>"
 
 
 
 
 
 
 
 
 
397
  for m in model_statuses
398
  ])
399
 
400
  html_content = f"""
401
  <html>
402
  <head>
403
+ <title>Smart Model Warmer</title>
404
  <style>
405
  body {{ font-family: Arial, sans-serif; margin: 40px; }}
406
  .success {{ color: green; font-weight: bold; }}
407
  .error {{ color: red; font-weight: bold; }}
408
+ .container {{ max-width: 1000px; margin: 0 auto; }}
409
  .model-list {{ background: #f5f5f5; padding: 15px; border-radius: 5px; }}
 
 
 
410
  </style>
411
  </head>
412
  <body>
413
  <div class="container">
414
+ <h1>πŸ€– Smart Model Warmer</h1>
 
415
 
416
+ <div style="display: grid; grid-template-columns: 1fr 1fr 1fr; gap: 20px; margin-bottom: 30px;">
417
  <div style="background: #e8f5e8; padding: 15px; border-radius: 8px;">
418
  <h3>🌐 Regular Servers</h3>
419
  <p><strong>{regular_success}/{len(regular_servers)} OK</strong></p>
 
429
  </div>
430
 
431
  <div class="model-list">
432
+ <h3>Model Status</h3>
433
  <ul>
434
  {model_status_html if model_statuses else "<li>No model data yet</li>"}
435
  </ul>
 
437
 
438
  <p><strong>Last Model Check:</strong> {last_model_warmup.strftime('%Y-%m-%d %H:%M:%S') if last_model_warmup else 'Never'}</p>
439
  <p><strong>Next check in:</strong> ~5 minutes</p>
 
440
  </div>
441
  </body>
442
  </html>
 
448
  """Health endpoint for other pingers"""
449
  return JSONResponse({
450
  "status": "healthy",
451
+ "service": "smart-model-warmer",
452
  "regular_servers": len(regular_servers),
453
  "ai_models": len(models_to_warm),
 
454
  "last_model_warmup": last_model_warmup.isoformat() if last_model_warmup else None,
455
  "timestamp": datetime.now().isoformat()
456
  })
 
477
  "timestamp": datetime.now().isoformat()
478
  })
479
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
480
  if __name__ == "__main__":
481
  import uvicorn
482
  uvicorn.run(app, host="0.0.0.0", port=7860)