Spaces:

Rajhuggingface4253
/

ping2

Running

App Files Files Community

Rajhuggingface4253 commited on Sep 25, 2025

Commit

098eb43

verified ·

1 Parent(s): 8dbef7a

Update app.py

Browse files

Files changed (1) hide show

app.py +169 -195

app.py CHANGED Viewed

@@ -16,50 +16,50 @@ pinger_spaces = [
     "https://rajhuggingface4253-ping.hf.space",
 ]
-# Regular servers to ping
 regular_servers = [
     "https://rajhuggingface4253-backend-compressorpro.hf.space",
     "https://rajhuggingface4253-backend-compressorpro2.hf.space",
     "https://rajhuggingface4253-compressor3pro.hf.space"
 ]
-# Chat models that need warmup messages
-chat_models = [
     {
         "url": "https://rajhuggingface4253-qwen.hf.space",
-        "api_endpoint": "/chat",
-        "warmup_message": "Say 'active' in one word?",
-        "type": "qwen"
     },
     {
-        "url": "https://rajhuggingface4253-qwen2.hf.space",
-        "api_endpoint": "/chat",
-        "warmup_message": "Say 'active' in one word",
-        "type": "qwen2"
     },
     {
         "url": "https://rajhuggingface4253-qwen3.hf.space",
-        "api_endpoint": "/chat",
-        "warmup_message": "Just say OK",
-        "type": "qwen3"
     },
     {
-        "url": "https://rajhuggingface4253-koko.hf.space",
-        "api_endpoint": "/tts",
-        "warmup_message": "Active",
-        "type": "tts"
     }
 ]
 # Global state
 ping_results: Dict[str, Dict] = {}
-chat_warmup_results: Dict[str, Dict] = {}
 health_results: Dict[str, Dict] = {}
 last_ping_run: datetime = None
-last_chat_warmup: datetime = None
 async def ping_server(url: str) -> Dict:
-    """Ping a regular server"""
     try:
         start_time = time.time()
         async with httpx.AsyncClient(timeout=10.0) as client:
@@ -79,110 +79,54 @@ async def ping_server(url: str) -> Dict:
             'timestamp': datetime.now().isoformat()
         }
-async def warmup_chat_model(model_config: Dict) -> Dict:
-    """Send a warmup message to a chat model with streaming support"""
     try:
         start_time = time.time()
-        # Try both streaming and non-streaming approaches
-        async with httpx.AsyncClient(timeout=25.0) as client:
-            # First try: Non-streaming request (preferred for warming)
-            payload = {
-                "messages": [{"role": "user", "content": model_config["warmup_message"]}],
-                "max_tokens": 15,
-                "stream": False  # Explicitly disable streaming for warming
-            }
-            # Adjust payload for specific model types
-            if model_config["type"].startswith("qwen"):
-                payload = {
-                    "message": model_config["warmup_message"],
-                    "max_tokens": 15,
-                    "stream": False
-                }
-            api_url = f"{model_config['url']}{model_config['api_endpoint']}"
-            try:
-                response = await client.post(
-                    api_url,
-                    json=payload,
-                    headers={"Content-Type": "application/json"}
-                )
-                response_time = round((time.time() - start_time) * 1000, 1)
-                response_text = await response.atext() if response.status_code == 200 else ""
-                # Check if we actually got a meaningful response
-                got_valid_response = len(response_text.strip()) > 0 and response.status_code == 200
-                result = {
-                    'status': 'success' if got_valid_response else 'error',
-                    'response_time_ms': response_time,
-                    'status_code': response.status_code,
-                    'response_preview': response_text[:100] if got_valid_response else "No response",
-                    'got_response': got_valid_response,
-                    'method': 'non-streaming',
-                    'timestamp': datetime.now().isoformat()
-                }
-                if got_valid_response:
-                    return result
-            except (httpx.ReadTimeout, asyncio.TimeoutError):
-                # Non-streaming failed, try streaming approach
-                pass
-            # Second try: Streaming approach (if non-streaming fails)
-            print(f"🔄 Trying streaming approach for {model_config['url']}")
-            try:
-                streaming_start = time.time()
-                streaming_payload = payload.copy()
-                streaming_payload["stream"] = True  # Enable streaming
-                async with client.stream(
-                    'POST',
-                    api_url,
-                    json=streaming_payload,
-                    timeout=20.0
-                ) as response:
-                    first_chunk_received = False
-                    full_response = ""
-                    async for line in response.aiter_lines():
-                        if line.strip() and 'data:' in line:
-                            first_chunk_received = True
-                            # Try to extract actual content from streaming format
-                            if '"content":"' in line:
-                                try:
-                                    content_start = line.find('"content":"') + 10
-                                    content_end = line.find('"', content_start)
-                                    if content_end > content_start:
-                                        content = line[content_start:content_end]
-                                        full_response += content
-                                except:
-                                    pass
-                            break  # Got at least one chunk
-                    streaming_time = round((time.time() - streaming_start) * 1000, 1)
                     return {
-                        'status': 'success' if first_chunk_received else 'error',
-                        'response_time_ms': streaming_time,
                         'status_code': response.status_code,
-                        'response_preview': full_response[:100] if full_response else "Streaming chunk received",
-                        'got_response': first_chunk_received,
-                        'method': 'streaming',
                         'timestamp': datetime.now().isoformat()
                     }
-            except Exception as stream_error:
                 return {
                     'status': 'error',
-                    'error': f"Both methods failed: {str(stream_error)}",
-                    'response_time_ms': round((time.time() - start_time) * 1000, 1),
                     'timestamp': datetime.now().isoformat()
                 }
@@ -190,55 +134,13 @@ async def warmup_chat_model(model_config: Dict) -> Dict:
         return {
             'status': 'error',
             'error': str(e),
             'timestamp': datetime.now().isoformat()
         }
-async def warmup_tts_model(model_config: Dict) -> Dict:
-    """Warm up TTS model with proper response handling"""
-    try:
-        start_time = time.time()
-        async with httpx.AsyncClient(timeout=15.0) as client:
-            payload = {
-                "text": model_config["warmup_message"],
-                "voice": "default"
-            }
-            api_url = f"{model_config['url']}{model_config['api_endpoint']}"
-            response = await client.post(
-                api_url,
-                json=payload,
-                headers={"Content-Type": "application/json"}
-            )
-            response_time = round((time.time() - start_time) * 1000, 1)
-            # For TTS, success means we got any response (audio data or confirmation)
-            success = response.status_code == 200 and len(response.content) > 0
-            return {
-                'status': 'success' if success else 'error',
-                'response_time_ms': response_time,
-                'status_code': response.status_code,
-                'response_preview': f"TTS response: {len(response.content)} bytes" if success else "No audio data",
-                'got_response': success,
-                'timestamp': datetime.now().isoformat()
-            }
-    except Exception as e:
-        return {
-            'status': 'error',
-            'error': str(e),
-            'timestamp': datetime.now().isoformat()
-        }
-async def warmup_single_model(model_config: Dict) -> Dict:
-    """Route to appropriate warming function based on model type"""
-    if model_config["type"] == "tts":
-        return await warmup_tts_model(model_config)
-    else:
-        return await warmup_chat_model(model_config)
 async def ping_all_servers():
     """Ping all regular servers"""
     global ping_results, last_ping_run
@@ -259,30 +161,34 @@ async def ping_all_servers():
     return results
-async def warmup_all_chat_models():
-    """Warm up all chat models with actual messages and response verification"""
-    global chat_warmup_results, last_chat_warmup
-    if not chat_models:
         return []
-    tasks = [warmup_single_model(model) for model in chat_models]
     results = await asyncio.gather(*tasks)
-    for i, model in enumerate(chat_models):
-        chat_warmup_results[model['url']] = results[i]
-        # Log detailed results for debugging
-        if results[i]['status'] == 'success' and results[i].get('got_response'):
-            print(f"✅ {model['url']}: {results[i]['response_time_ms']}ms - {results[i]['response_preview']}")
         else:
-            print(f"❌ {model['url']}: {results[i].get('error', 'No response')}")
-    last_chat_warmup = datetime.now()
-    # Only count as success if we actually got a response
-    success_count = sum(1 for result in results if result.get('got_response', False))
-    print(f"🤖 {datetime.now().strftime('%H:%M:%S')} - Chat models: {success_count}/{len(chat_models)} Actually responded")
     return results
@@ -311,20 +217,22 @@ async def ping_single_health(client: httpx.AsyncClient, space_url: str, health_u
             'status_code': response.status_code,
             'last_ping': datetime.now().isoformat()
         }
     except Exception as e:
         health_results[space_url] = {
             'status': 'error',
             'error': str(e),
             'last_ping': datetime.now().isoformat()
         }
 async def continuous_pinging():
-    """Main pinging loop with chat model warming"""
-    print("🚀 Enhanced Chat Model Warmer Started!")
     print(f"🌐 Regular servers: {len(regular_servers)}")
-    print(f"🤖 Chat models: {len(chat_models)}")
     print(f"🔗 Pinger network: {len(pinger_spaces)}")
-    print("⏰ Warming with response verification every 5 minutes")
     last_health_check = 0
@@ -334,13 +242,14 @@ async def continuous_pinging():
             if regular_servers:
                 await ping_all_servers()
-            # Warm up chat models with response verification
-            if chat_models:
-                await warmup_all_chat_models()
             # Ping health endpoints every 30 minutes
             current_time = time.time()
             if current_time - last_health_check >= HEALTH_CHECK_INTERVAL and pinger_spaces:
                 await ping_health_endpoints()
                 last_health_check = current_time
@@ -353,31 +262,80 @@ async def continuous_pinging():
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     # Startup
-    print("Starting up Enhanced Chat Model Warmer...")
     asyncio.create_task(continuous_pinging())
     yield
     # Shutdown
     print("Shutting down...")
-app = FastAPI(title="Enhanced Chat Model Warmer", lifespan=lifespan)
 @app.get("/", response_class=HTMLResponse)
 async def home():
-    """Minimal dashboard"""
     regular_success = sum(1 for r in ping_results.values() if r.get('status') == 'success')
-    # Only count chat models that actually responded
-    chat_success = sum(1 for r in chat_warmup_results.values() if r.get('got_response', False))
     health_success = sum(1 for r in health_results.values() if r.get('status') == 'success')
     html_content = f"""
     <html>
-        <head><title>Enhanced Chat Model Warmer</title></head>
         <body>
-            <h1>🤖 Enhanced Chat Model Warmer</h1>
-            <p><strong>Regular Servers:</strong> {regular_success}/{len(regular_servers)} OK</p>
-            <p><strong>Chat Models (Responded):</strong> {chat_success}/{len(chat_models)} Actually Warm</p>
-            <p><strong>Last Chat Warmup:</strong> {last_chat_warmup.strftime('%H:%M:%S') if last_chat_warmup else 'Never'}</p>
-            <p><strong>Network:</strong> {health_success}/{len(pinger_spaces)} OK</p>
         </body>
     </html>
     """
@@ -385,19 +343,35 @@ async def home():
 @app.get("/health")
 async def health():
     return JSONResponse({
         "status": "healthy",
-        "service": "enhanced-chat-model-warmer",
         "regular_servers": len(regular_servers),
-        "chat_models": len(chat_models),
-        "last_chat_warmup": last_chat_warmup.isoformat() if last_chat_warmup else None
     })
 @app.get("/status")
 async def status():
     return JSONResponse({
         "regular_servers": ping_results,
-        "chat_models": chat_warmup_results,
         "timestamp": datetime.now().isoformat()
     })

     "https://rajhuggingface4253-ping.hf.space",
 ]
+# Regular servers to ping (HTTP GET)
 regular_servers = [
     "https://rajhuggingface4253-backend-compressorpro.hf.space",
     "https://rajhuggingface4253-backend-compressorpro2.hf.space",
     "https://rajhuggingface4253-compressor3pro.hf.space"
 ]
+# Models to warm using health endpoints
+models_to_warm = [
     {
+        "name": "Qwen 1",
         "url": "https://rajhuggingface4253-qwen.hf.space",
+        "endpoint": "/health",
+        "type": "health_check"
     },
     {
+        "name": "Qwen 2",
+        "url": "https://rajhuggingface4253-qwen2.hf.space",
+        "endpoint": "/health",
+        "type": "health_check"
     },
     {
+        "name": "Qwen 3",
         "url": "https://rajhuggingface4253-qwen3.hf.space",
+        "endpoint": "/health",
+        "type": "health_check"
     },
     {
+        "name": "Kokoro TTS",
+        "url": "https://rajhuggingface4253-koko.hf.space",
+        "endpoint": "/health",  # Assuming it has a health endpoint
+        "type": "health_check"
     }
 ]
 # Global state
 ping_results: Dict[str, Dict] = {}
+model_warmup_results: Dict[str, Dict] = {}
 health_results: Dict[str, Dict] = {}
 last_ping_run: datetime = None
+last_model_warmup: datetime = None
 async def ping_server(url: str) -> Dict:
+    """Ping a regular server with HTTP GET"""
     try:
         start_time = time.time()
         async with httpx.AsyncClient(timeout=10.0) as client:
             'timestamp': datetime.now().isoformat()
         }
+async def warmup_model_health(model_config: Dict) -> Dict:
+    """Warm up a model by calling its health endpoint"""
     try:
         start_time = time.time()
+        async with httpx.AsyncClient(timeout=15.0) as client:
+            api_url = f"{model_config['url']}{model_config['endpoint']}"
+            response = await client.get(api_url)
+            response_time = round((time.time() - start_time) * 1000, 1)
+            # Check if health endpoint returned successful status
+            if response.status_code == 200:
+                try:
+                    # Try to parse JSON response
+                    health_data = response.json()
+                    model_status = health_data.get('status', 'unknown')
+                    model_name = health_data.get('model', 'Unknown')
+                    is_healthy = model_status in ['ok', 'healthy', 'ready']
                     return {
+                        'status': 'success' if is_healthy else 'error',
+                        'response_time_ms': response_time,
                         'status_code': response.status_code,
+                        'model_status': model_status,
+                        'model_name': model_name,
+                        'got_response': True,
                         'timestamp': datetime.now().isoformat()
                     }
+                except:
+                    # If JSON parsing fails but status is 200, consider it successful
+                    return {
+                        'status': 'success',
+                        'response_time_ms': response_time,
+                        'status_code': response.status_code,
+                        'model_status': 'ok',
+                        'model_name': 'Unknown',
+                        'got_response': True,
+                        'timestamp': datetime.now().isoformat()
+                    }
+            else:
                 return {
                     'status': 'error',
+                    'response_time_ms': response_time,
+                    'status_code': response.status_code,
+                    'model_status': 'unhealthy',
+                    'model_name': 'Unknown',
+                    'got_response': False,
                     'timestamp': datetime.now().isoformat()
                 }
         return {
             'status': 'error',
             'error': str(e),
+            'response_time_ms': round((time.time() - start_time) * 1000, 1),
+            'model_status': 'connection_failed',
+            'model_name': 'Unknown',
+            'got_response': False,
             'timestamp': datetime.now().isoformat()
         }
 async def ping_all_servers():
     """Ping all regular servers"""
     global ping_results, last_ping_run
     return results
+async def warmup_all_models():
+    """Warm up all models using health endpoints"""
+    global model_warmup_results, last_model_warmup
+    if not models_to_warm:
         return []
+    tasks = [warmup_model_health(model) for model in models_to_warm]
     results = await asyncio.gather(*tasks)
+    for i, model in enumerate(models_to_warm):
+        model_warmup_results[model['url']] = {
+            'model_info': model,
+            'health_check': results[i]
+        }
+        # Log detailed results
+        result = results[i]
+        if result['status'] == 'success' and result.get('got_response'):
+            print(f"✅ {model['name']}: {result['response_time_ms']}ms - {result['model_name']} ({result['model_status']})")
         else:
+            print(f"❌ {model['name']}: {result.get('error', 'Health check failed')}")
+    last_model_warmup = datetime.now()
+    # Count as success only if health check passed
+    success_count = sum(1 for result in results if result['status'] == 'success')
+    print(f"🤖 {datetime.now().strftime('%H:%M:%S')} - Models: {success_count}/{len(models_to_warm)} Healthy")
     return results
             'status_code': response.status_code,
             'last_ping': datetime.now().isoformat()
         }
+        print(f"🔗 Health ping to {space_url}: {response_time}ms")
     except Exception as e:
         health_results[space_url] = {
             'status': 'error',
             'error': str(e),
             'last_ping': datetime.now().isoformat()
         }
+        print(f"🔗 Health ping failed for {space_url}: {e}")
 async def continuous_pinging():
+    """Main pinging loop with model health checking"""
+    print("🚀 Smart Model Warmer Started!")
     print(f"🌐 Regular servers: {len(regular_servers)}")
+    print(f"🤖 Models to warm: {len(models_to_warm)}")
     print(f"🔗 Pinger network: {len(pinger_spaces)}")
+    print("⏰ Health checks every 5 minutes")
     last_health_check = 0
             if regular_servers:
                 await ping_all_servers()
+            # Warm up models using health endpoints
+            if models_to_warm:
+                await warmup_all_models()
             # Ping health endpoints every 30 minutes
             current_time = time.time()
             if current_time - last_health_check >= HEALTH_CHECK_INTERVAL and pinger_spaces:
+                print("🔄 Pinging other pinger spaces...")
                 await ping_health_endpoints()
                 last_health_check = current_time
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     # Startup
+    print("Starting up Smart Model Warmer...")
     asyncio.create_task(continuous_pinging())
     yield
     # Shutdown
     print("Shutting down...")
+app = FastAPI(title="Smart Model Warmer", lifespan=lifespan)
 @app.get("/", response_class=HTMLResponse)
 async def home():
+    """Dashboard showing warming status"""
     regular_success = sum(1 for r in ping_results.values() if r.get('status') == 'success')
+    model_success = sum(1 for r in model_warmup_results.values() if r['health_check'].get('status') == 'success')
     health_success = sum(1 for r in health_results.values() if r.get('status') == 'success')
+    # Get model details for display
+    model_statuses = []
+    for url, data in model_warmup_results.items():
+        model_info = data['model_info']
+        health = data['health_check']
+        model_statuses.append({
+            'name': model_info['name'],
+            'status': health['status'],
+            'response_time': health.get('response_time_ms', 0),
+            'model_name': health.get('model_name', 'Unknown')
+        })
+    model_status_html = "".join([
+        f"<li>{m['name']}: <span class={'success' if m['status'] == 'success' else 'error'}>{m['status']}</span> "
+        f"({m['response_time']}ms) - {m['model_name']}</li>"
+        for m in model_statuses
+    ])
     html_content = f"""
     <html>
+        <head>
+            <title>Smart Model Warmer</title>
+            <style>
+                body {{ font-family: Arial, sans-serif; margin: 40px; }}
+                .success {{ color: green; font-weight: bold; }}
+                .error {{ color: red; font-weight: bold; }}
+                .container {{ max-width: 1000px; margin: 0 auto; }}
+                .model-list {{ background: #f5f5f5; padding: 15px; border-radius: 5px; }}
+            </style>
+        </head>
         <body>
+            <div class="container">
+                <h1>🤖 Smart Model Warmer</h1>
+                <div style="display: grid; grid-template-columns: 1fr 1fr 1fr; gap: 20px; margin-bottom: 30px;">
+                    <div style="background: #e8f5e8; padding: 15px; border-radius: 8px;">
+                        <h3>🌐 Regular Servers</h3>
+                        <p><strong>{regular_success}/{len(regular_servers)} OK</strong></p>
+                    </div>
+                    <div style="background: #e3f2fd; padding: 15px; border-radius: 8px;">
+                        <h3>🤖 AI Models</h3>
+                        <p><strong>{model_success}/{len(models_to_warm)} Healthy</strong></p>
+                    </div>
+                    <div style="background: #fff3e0; padding: 15px; border-radius: 8px;">
+                        <h3>🔗 Pinger Network</h3>
+                        <p><strong>{health_success}/{len(pinger_spaces)} OK</strong></p>
+                    </div>
+                </div>
+                <div class="model-list">
+                    <h3>Model Health Status</h3>
+                    <ul>
+                        {model_status_html if model_statuses else "<li>No model data yet</li>"}
+                    </ul>
+                </div>
+                <p><strong>Last Model Check:</strong> {last_model_warmup.strftime('%Y-%m-%d %H:%M:%S') if last_model_warmup else 'Never'}</p>
+                <p><strong>Next check in:</strong> ~5 minutes</p>
+            </div>
         </body>
     </html>
     """
 @app.get("/health")
 async def health():
+    """Health endpoint for other pingers"""
     return JSONResponse({
         "status": "healthy",
+        "service": "smart-model-warmer",
         "regular_servers": len(regular_servers),
+        "ai_models": len(models_to_warm),
+        "last_model_warmup": last_model_warmup.isoformat() if last_model_warmup else None,
+        "timestamp": datetime.now().isoformat()
     })
 @app.get("/status")
 async def status():
+    """Detailed status endpoint"""
     return JSONResponse({
         "regular_servers": ping_results,
+        "ai_models": model_warmup_results,
+        "pinger_network": health_results,
+        "timestamp": datetime.now().isoformat()
+    })
+@app.get("/ping-now")
+async def ping_now():
+    """Manually trigger immediate warming"""
+    results = await warmup_all_models()
+    success_count = sum(1 for result in results if result['status'] == 'success')
+    return JSONResponse({
+        "message": "Manual warming completed",
+        "models_healthy": f"{success_count}/{len(models_to_warm)}",
         "timestamp": datetime.now().isoformat()
     })