Spaces:
Running
Running
| from fastapi import FastAPI | |
| from fastapi.responses import HTMLResponse, JSONResponse | |
| import httpx | |
| import asyncio | |
| import time | |
| from datetime import datetime | |
| from typing import Dict, List | |
| from contextlib import asynccontextmanager | |
| # Configuration | |
| PING_INTERVAL = 600 | |
| HEALTH_CHECK_INTERVAL = 1800 # 30 minutes | |
| # List of other pinger Spaces | |
| pinger_spaces = [ | |
| "https://rajhuggingface4253-ping2.hf.space", | |
| ] | |
| # Regular servers to ping (HTTP GET) | |
| regular_servers = [ | |
| "https://rajhuggingface4253-backend-compressorpro.hf.space", | |
| "https://rajhuggingface4253-backend-compressorpro2.hf.space", | |
| "https://rajhuggingface4253-compressor3pro.hf.space", | |
| "https://rajhuggingface4253-bgr.hf.space", | |
| "https://rajhuggingface4253-real.hf.space", | |
| "https://rajhuggingface4253-cmy.hf.space", | |
| "https://rajhuggingface4253-waif.hf.space" | |
| ] | |
| # Models to warm with proper configuration | |
| models_to_warm = [ | |
| { | |
| "name": "gemma", | |
| "url": "https://rajhuggingface4253-gemma-checking.hf.space", | |
| "endpoint": "/fix", | |
| "type": "streaming_chat", | |
| "timeout": 45.0, | |
| "payload": { | |
| "text": "warmup", | |
| } | |
| }, | |
| { | |
| "name": "NLLB Translator", | |
| "url": "https://rajhuggingface4253-translate.hf.space", | |
| "endpoint": "/translate", | |
| "type": "streaming_chat", | |
| "timeout": 45.0, | |
| "payload": { | |
| "text": "warmup", | |
| "src_lang": "eng_Latn", | |
| "tgt_lang": "hin_Deva" | |
| } | |
| }, | |
| { | |
| "name": "Qwen 1", | |
| "url": "https://rajhuggingface4253-qwen.hf.space", | |
| "endpoint": "/chat", | |
| "type": "streaming_chat", | |
| "timeout": 45.0, | |
| "payload": { | |
| "prompt": "--- HISTORY START ---\nUser: Say 'ready' if you're working\n--- HISTORY END ---\n\nUser's latest message: \"Say 'ready' if you're working\"", | |
| "max_new_tokens": 50, | |
| "temperature": 0.1, | |
| "enable_code_execution": False, | |
| "enable_web_search": False, | |
| "enable_thinking": False | |
| } | |
| }, | |
| { | |
| "name": "Qwen 2", | |
| "url": "https://rajhuggingface4253-qwe.hf.space", | |
| "endpoint": "/chat", | |
| "type": "streaming_chat", | |
| "timeout": 45.0, | |
| "payload": { | |
| "prompt": "--- HISTORY START ---\nUser: Say 'ready' if you're working\n--- HISTORY END ---\n\nUser's latest message: \"Say 'ready' if you're working\"", | |
| "max_new_tokens": 50, | |
| "temperature": 0.1, | |
| "enable_code_execution": False, | |
| "enable_web_search": False, | |
| "enable_thinking": False | |
| } | |
| }, | |
| { | |
| "name": "Qwen 3", | |
| "url": "https://rajhuggingface4253-qwen3.hf.space", | |
| "endpoint": "/chat", | |
| "type": "streaming_chat", | |
| "timeout": 45.0, | |
| "payload": { | |
| "prompt": "--- HISTORY START ---\nUser: Say 'ready' if you're working\n--- HISTORY END ---\n\nUser's latest message: \"Say 'ready' if you're working\"", | |
| "max_new_tokens": 50, | |
| "temperature": 0.1, | |
| "enable_code_execution": False, | |
| "enable_web_search": False, | |
| "enable_thinking": False | |
| } | |
| }, | |
| { | |
| "name": "Kokoro TTS", | |
| "url": "https://rajhuggingface4253-koko.hf.space", | |
| "endpoint": "/health", | |
| "type": "health_check", | |
| "timeout": 15.0 | |
| }, | |
| { | |
| "name": "Kitten TTS", | |
| "url": "https://rajhuggingface4253-kitten.hf.space", | |
| "endpoint": "/health", | |
| "type": "health_check", | |
| "timeout": 15.0 | |
| } | |
| ] | |
| # Global state | |
| ping_results: Dict[str, Dict] = {} | |
| model_warmup_results: Dict[str, Dict] = {} | |
| health_results: Dict[str, Dict] = {} | |
| last_ping_run: datetime = None | |
| last_model_warmup: datetime = None | |
| last_health_check_time: float = 0 | |
| class ParallelWarmer: | |
| def __init__(self): | |
| self.max_retries = 3 | |
| self.retry_delay = 1 # Base delay in seconds for exponential backoff | |
| async def ping_server_with_retry(self, url: str) -> Dict: | |
| """Ping a server with automatic retry on failure""" | |
| last_error = None | |
| for attempt in range(self.max_retries): | |
| start_time = time.time() # Defined BEFORE try block | |
| try: | |
| async with httpx.AsyncClient(timeout=10.0) as client: | |
| response = await client.get(url) | |
| response_time = round((time.time() - start_time) * 1000, 1) | |
| if response.status_code < 500: # Only retry on server errors | |
| return { | |
| 'status': 'success', | |
| 'response_time_ms': response_time, | |
| 'status_code': response.status_code, | |
| 'timestamp': datetime.now().isoformat(), | |
| 'attempts': attempt + 1 | |
| } | |
| else: | |
| last_error = f"HTTP {response.status_code}" | |
| except Exception as e: | |
| last_error = str(e) | |
| response_time = round((time.time() - start_time) * 1000, 1) | |
| # Exponential backoff for retries | |
| if attempt < self.max_retries - 1: | |
| await asyncio.sleep(self.retry_delay * (2 ** attempt)) | |
| return { | |
| 'status': 'error', | |
| 'error': str(last_error) if last_error else 'Max retries exceeded', | |
| 'timestamp': datetime.now().isoformat(), | |
| 'attempts': self.max_retries, | |
| 'response_time_ms': response_time if 'response_time' in locals() else 0 | |
| } | |
| async def warmup_chat_model_with_retry(self, model_config: Dict) -> Dict: | |
| """Warm up chat models with retry logic""" | |
| last_error = None | |
| for attempt in range(self.max_retries): | |
| start_time = time.time() # ✅ Defined BEFORE try block | |
| try: | |
| async with httpx.AsyncClient(timeout=model_config.get('timeout', 45.0)) as client: | |
| payload = model_config['payload'] | |
| api_url = f"{model_config['url']}{model_config['endpoint']}" | |
| response = await client.post(api_url, json=payload) | |
| response_time = round((time.time() - start_time) * 1000, 1) | |
| if response.status_code == 200: | |
| try: | |
| collected_response = "" | |
| async for chunk in response.aiter_text(): | |
| if chunk.strip(): | |
| collected_response += chunk | |
| if collected_response and len(collected_response.strip()) > 5: | |
| return { | |
| 'status': 'success', | |
| 'response_time_ms': response_time, | |
| 'status_code': response.status_code, | |
| 'ai_response': collected_response[:100].strip(), | |
| 'got_ai_response': True, | |
| 'timestamp': datetime.now().isoformat(), | |
| 'attempts': attempt + 1 | |
| } | |
| else: | |
| return { | |
| 'status': 'success', | |
| 'response_time_ms': response_time, | |
| 'status_code': response.status_code, | |
| 'ai_response': 'Empty response', | |
| 'got_ai_response': False, | |
| 'timestamp': datetime.now().isoformat(), | |
| 'attempts': attempt + 1 | |
| } | |
| except Exception as e: | |
| # Stream error but HTTP 200 OK | |
| return { | |
| 'status': 'success', | |
| 'response_time_ms': response_time, | |
| 'status_code': response.status_code, | |
| 'ai_response': f'Stream error: {str(e)}', | |
| 'got_ai_response': False, | |
| 'timestamp': datetime.now().isoformat(), | |
| 'attempts': attempt + 1 | |
| } | |
| else: | |
| last_error = f"HTTP {response.status_code}" | |
| except asyncio.TimeoutError: | |
| last_error = 'Request timeout' | |
| except Exception as e: | |
| last_error = str(e) | |
| # Calculate response time even on error | |
| response_time = round((time.time() - start_time) * 1000, 1) | |
| # Exponential backoff for retries | |
| if attempt < self.max_retries - 1: | |
| await asyncio.sleep(self.retry_delay * (2 ** attempt)) | |
| return { | |
| 'status': 'error', | |
| 'error': str(last_error) if last_error else 'Max retries exceeded', | |
| 'response_time_ms': response_time, | |
| 'timestamp': datetime.now().isoformat(), | |
| 'attempts': self.max_retries | |
| } | |
| async def warmup_health_model_with_retry(self, model_config: Dict) -> Dict: | |
| """Warm up health endpoint models with retry""" | |
| last_error = None | |
| for attempt in range(self.max_retries): | |
| start_time = time.time() # ✅ Defined BEFORE try block | |
| try: | |
| async with httpx.AsyncClient(timeout=model_config.get('timeout', 15.0)) as client: | |
| api_url = f"{model_config['url']}{model_config['endpoint']}" | |
| response = await client.get(api_url) | |
| response_time = round((time.time() - start_time) * 1000, 1) | |
| if response.status_code == 200: | |
| return { | |
| 'status': 'success', | |
| 'response_time_ms': response_time, | |
| 'status_code': response.status_code, | |
| 'timestamp': datetime.now().isoformat(), | |
| 'attempts': attempt + 1 | |
| } | |
| else: | |
| last_error = f"HTTP {response.status_code}" | |
| except Exception as e: | |
| last_error = str(e) | |
| # Calculate response time even on error | |
| response_time = round((time.time() - start_time) * 1000, 1) | |
| # Exponential backoff for retries | |
| if attempt < self.max_retries - 1: | |
| await asyncio.sleep(self.retry_delay * (2 ** attempt)) | |
| return { | |
| 'status': 'error', | |
| 'error': str(last_error) if last_error else 'Max retries exceeded', | |
| 'response_time_ms': response_time, | |
| 'timestamp': datetime.now().isoformat(), | |
| 'attempts': self.max_retries | |
| } | |
| async def warmup_single_model_with_retry(self, model_config: Dict) -> Dict: | |
| """Route to appropriate warming method with retry""" | |
| if model_config.get('type') == 'streaming_chat': | |
| return await self.warmup_chat_model_with_retry(model_config) | |
| else: | |
| return await self.warmup_health_model_with_retry(model_config) | |
| # Initialize the parallel warmer | |
| warmer = ParallelWarmer() | |
| async def ping_all_in_parallel(): | |
| """Ping ALL targets in parallel with isolated error handling""" | |
| global ping_results, model_warmup_results, health_results, last_ping_run, last_model_warmup, last_health_check_time | |
| all_tasks = [] | |
| task_mapping = {} | |
| # 1. Create tasks for regular servers | |
| for server in regular_servers: | |
| task = asyncio.create_task(warmer.ping_server_with_retry(server)) | |
| all_tasks.append(task) | |
| task_mapping[task] = ('server', server) | |
| # 2. Create tasks for model warmups | |
| for model in models_to_warm: | |
| task = asyncio.create_task(warmer.warmup_single_model_with_retry(model)) | |
| all_tasks.append(task) | |
| task_mapping[task] = ('model', model['url']) | |
| # 3. Create tasks for health checks (if needed) | |
| current_time = time.time() | |
| if (current_time - last_health_check_time) >= HEALTH_CHECK_INTERVAL and pinger_spaces: | |
| for space_url in pinger_spaces: | |
| health_url = f"{space_url}/health" | |
| task = asyncio.create_task(warmer.ping_server_with_retry(health_url)) | |
| all_tasks.append(task) | |
| task_mapping[task] = ('health', space_url) | |
| last_health_check_time = current_time | |
| # 4. Execute ALL tasks in parallel | |
| if all_tasks: | |
| results = await asyncio.gather(*all_tasks, return_exceptions=True) | |
| # 5. Process results (isolated - failures don't affect others) | |
| for task, result in zip(all_tasks, results): | |
| task_type, identifier = task_mapping[task] | |
| if isinstance(result, Exception): | |
| # Task crashed but we isolate the failure | |
| print(f"⚠️ Task crashed with exception: {type(result).__name__}: {result}") | |
| error_result = { | |
| 'status': 'error', | |
| 'error': f"{type(result).__name__}: {str(result)}", | |
| 'timestamp': datetime.now().isoformat() | |
| } | |
| result = error_result | |
| if task_type == 'server': | |
| ping_results[identifier] = result | |
| elif task_type == 'model': | |
| # Find model name for the URL | |
| model_name = next((m['name'] for m in models_to_warm if m['url'] == identifier), identifier) | |
| model_warmup_results[identifier] = { | |
| 'model_info': {'name': model_name, 'url': identifier}, | |
| 'health_check': result | |
| } | |
| elif task_type == 'health': | |
| health_results[identifier] = result | |
| # Update timestamps | |
| last_ping_run = datetime.now() | |
| last_model_warmup = datetime.now() | |
| # Log summary | |
| server_success = sum(1 for r in ping_results.values() if r.get('status') == 'success') | |
| model_success = sum(1 for r in model_warmup_results.values() | |
| if r['health_check'].get('status') == 'success') | |
| model_ai_response = sum(1 for r in model_warmup_results.values() | |
| if r['health_check'].get('got_ai_response')) | |
| print(f"✅ {datetime.now().strftime('%H:%M:%S')} - Parallel ping complete: " | |
| f"{server_success}/{len(regular_servers)} servers OK, " | |
| f"{model_success}/{len(models_to_warm)} models healthy " | |
| f"({model_ai_response} AI responding)") | |
| async def continuous_parallel_pinging(): | |
| """Main pinging loop with full parallel execution""" | |
| print("🚀 Smart Model Warmer Started (Fully Parallel)!") | |
| print(f"🌐 Regular servers: {len(regular_servers)}") | |
| print(f"🤖 Models to warm: {len(models_to_warm)}") | |
| print(f"🔗 Pinger network: {len(pinger_spaces)}") | |
| while True: | |
| try: | |
| start_cycle = time.time() | |
| await ping_all_in_parallel() | |
| # Calculate sleep time (ensure exactly PING_INTERVAL between starts) | |
| cycle_duration = time.time() - start_cycle | |
| sleep_time = max(0, PING_INTERVAL - cycle_duration) | |
| if sleep_time > 0: | |
| await asyncio.sleep(sleep_time) | |
| else: | |
| print(f"⚠️ Warning: Ping cycle took {cycle_duration:.1f}s (longer than {PING_INTERVAL}s interval)!") | |
| await asyncio.sleep(1) # Minimum delay | |
| except Exception as e: | |
| print(f"❌ Error in main loop: {e}") | |
| await asyncio.sleep(60) # Recover after error | |
| async def lifespan(app: FastAPI): | |
| # Startup | |
| asyncio.create_task(continuous_parallel_pinging()) | |
| yield | |
| # Shutdown | |
| print("Shutting down...") | |
| app = FastAPI(title="Smart Model Warmer", lifespan=lifespan) | |
| async def home(): | |
| """Dashboard showing warming status""" | |
| regular_success = sum(1 for r in ping_results.values() if r.get('status') == 'success') | |
| model_success = sum(1 for r in model_warmup_results.values() if r['health_check'].get('status') == 'success') | |
| health_success = sum(1 for r in health_results.values() if r.get('status') == 'success') | |
| ai_success = sum(1 for r in model_warmup_results.values() if r['health_check'].get('got_ai_response')) | |
| # Get model details for display | |
| model_statuses = [] | |
| for url, data in model_warmup_results.items(): | |
| model_info = data['model_info'] | |
| health = data['health_check'] | |
| status_display = "success" if health['status'] == 'success' else "error" | |
| ai_indicator = " ✓AI" if health.get('got_ai_response') else "" | |
| error_display = f" - {health['error']}" if health.get('error') else "" | |
| preview = f" - '{health.get('ai_response', '')[:50]}...'" if health.get('ai_response') else "" | |
| model_statuses.append({ | |
| 'name': model_info['name'], | |
| 'type': next((m.get('type', 'health_check') for m in models_to_warm if m['url'] == url), 'health_check'), | |
| 'status_display': status_display, | |
| 'response_time': health.get('response_time_ms', 0), | |
| 'ai_indicator': ai_indicator, | |
| 'error_display': error_display, | |
| 'preview': preview | |
| }) | |
| model_status_html = "".join([ | |
| f"<li>{m['name']} ({m['type']}): <span class='{m['status_display']}'>{m['status_display']}</span> ({m['response_time']}ms){m['ai_indicator']}{m['error_display']}{m['preview']}</li>" | |
| for m in model_statuses | |
| ]) | |
| # Get server status for display | |
| server_statuses = [] | |
| for server, result in ping_results.items(): | |
| status_display = "success" if result.get('status') == 'success' else "error" | |
| server_name = server.split('//')[-1].split('.')[0] # Extract name from URL | |
| error_display = f" - {result['error']}" if result.get('error') else "" | |
| server_statuses.append({ | |
| 'name': server_name, | |
| 'status_display': status_display, | |
| 'response_time': result.get('response_time_ms', 0), | |
| 'error_display': error_display | |
| }) | |
| server_status_html = "".join([ | |
| f"<li>{s['name']}: <span class='{s['status_display']}'>{s['status_display']}</span> ({s['response_time']}ms){s['error_display']}</li>" | |
| for s in server_statuses | |
| ]) | |
| html_content = f""" | |
| <html> | |
| <head> | |
| <title>Smart Model Warmer</title> | |
| <style> | |
| body {{ font-family: Arial, sans-serif; margin: 40px; }} | |
| .success {{ color: green; font-weight: bold; }} | |
| .error {{ color: red; font-weight: bold; }} | |
| .container {{ max-width: 1200px; margin: 0 auto; }} | |
| .model-list {{ background: #f5f5f5; padding: 15px; border-radius: 5px; margin-bottom: 20px; }} | |
| .server-list {{ background: #e8f5e8; padding: 15px; border-radius: 5px; margin-bottom: 20px; }} | |
| .stats-grid {{ display: grid; grid-template-columns: 1fr 1fr 1fr; gap: 20px; margin-bottom: 30px; }} | |
| .stat-box {{ padding: 15px; border-radius: 8px; }} | |
| .servers-stat {{ background: #e8f5e8; }} | |
| .models-stat {{ background: #e3f2fd; }} | |
| .network-stat {{ background: #fff3e0; }} | |
| </style> | |
| </head> | |
| <body> | |
| <div class="container"> | |
| <h1>🤖 Smart Model Warmer (Parallel)</h1> | |
| <div class="stats-grid"> | |
| <div class="stat-box servers-stat"> | |
| <h3>🌐 Regular Servers</h3> | |
| <p><strong>{regular_success}/{len(regular_servers)} OK</strong></p> | |
| </div> | |
| <div class="stat-box models-stat"> | |
| <h3>🤖 AI Models</h3> | |
| <p><strong>{model_success}/{len(models_to_warm)} Healthy</strong></p> | |
| <p><strong>{ai_success}/{len(models_to_warm)} AI Responding</strong></p> | |
| </div> | |
| <div class="stat-box network-stat"> | |
| <h3>🔗 Pinger Network</h3> | |
| <p><strong>{health_success}/{len(pinger_spaces)} OK</strong></p> | |
| </div> | |
| </div> | |
| <div class="server-list"> | |
| <h3>Server Status</h3> | |
| <ul> | |
| {server_status_html if server_statuses else "<li>No server data yet</li>"} | |
| </ul> | |
| </div> | |
| <div class="model-list"> | |
| <h3>Model Status</h3> | |
| <ul> | |
| {model_status_html if model_statuses else "<li>No model data yet</li>"} | |
| </ul> | |
| </div> | |
| <p><strong>Last Model Check:</strong> {last_model_warmup.strftime('%Y-%m-%d %H:%M:%S') if last_model_warmup else 'Never'}</p> | |
| <p><strong>Last Server Check:</strong> {last_ping_run.strftime('%Y-%m-%d %H:%M:%S') if last_ping_run else 'Never'}</p> | |
| <p><strong>Next check in:</strong> ~{PING_INTERVAL // 60} minutes</p> | |
| <p><em>All checks run in parallel with automatic retries</em></p> | |
| </div> | |
| </body> | |
| </html> | |
| """ | |
| return HTMLResponse(content=html_content) | |
| async def health(): | |
| """Health endpoint for other pingers""" | |
| return JSONResponse({ | |
| "status": "healthy", | |
| "service": "smart-model-warmer", | |
| "regular_servers": len(regular_servers), | |
| "ai_models": len(models_to_warm), | |
| "last_model_warmup": last_model_warmup.isoformat() if last_model_warmup else None, | |
| "last_ping_run": last_ping_run.isoformat() if last_ping_run else None, | |
| "parallel_execution": True, | |
| "retry_enabled": True, | |
| "timestamp": datetime.now().isoformat() | |
| }) | |
| async def status(): | |
| """Detailed status endpoint""" | |
| return JSONResponse({ | |
| "regular_servers": ping_results, | |
| "ai_models": model_warmup_results, | |
| "pinger_network": health_results, | |
| "timestamp": datetime.now().isoformat() | |
| }) | |
| async def ping_now(): | |
| """Manually trigger immediate warming""" | |
| await ping_all_in_parallel() | |
| regular_success = sum(1 for r in ping_results.values() if r.get('status') == 'success') | |
| model_success = sum(1 for r in model_warmup_results.values() if r['health_check'].get('status') == 'success') | |
| ai_response_count = sum(1 for r in model_warmup_results.values() if r['health_check'].get('got_ai_response')) | |
| return JSONResponse({ | |
| "message": "Manual warming completed", | |
| "execution_mode": "parallel", | |
| "servers_ok": f"{regular_success}/{len(regular_servers)}", | |
| "models_healthy": f"{model_success}/{len(models_to_warm)}", | |
| "ai_responding": f"{ai_response_count}/{len(models_to_warm)}", | |
| "timestamp": datetime.now().isoformat() | |
| }) | |
| if __name__ == "__main__": | |
| import uvicorn | |
| uvicorn.run(app, host="0.0.0.0", port=7860) |