Spaces:

Rajhuggingface4253
/

ping

Running

App Files Files Community

Rajhuggingface4253 commited on Oct 13, 2025

Commit

a3f3af9

verified ·

1 Parent(s): 1eda88b

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -93

app.py CHANGED Viewed

@@ -4,9 +4,8 @@ import httpx
 import asyncio
 import time
 from datetime import datetime
-from typing import Dict, List, Optional
 from contextlib import asynccontextmanager
-import json
 # Configuration
 PING_INTERVAL = 300  # 5 minutes
@@ -24,39 +23,54 @@ regular_servers = [
     "https://rajhuggingface4253-compressor3pro.hf.space"
 ]
-# Enhanced models configuration with streaming support
 models_to_warm = [
     {
         "name": "Qwen 1",
         "url": "https://rajhuggingface4253-qwen.hf.space",
         "endpoint": "/chat",
         "type": "streaming_chat",
-        "warmup_prompt": "Hello, are you ready? Respond with just 'OK'.",
         "timeout": 30.0,
-        "expected_response": "OK"
     },
     {
         "name": "Qwen 2",
         "url": "https://rajhuggingface4253-qwen2.hf.space",
         "endpoint": "/chat",
         "type": "streaming_chat",
-        "warmup_prompt": "Say 'ready' if you're working.",
         "timeout": 30.0,
-        "expected_response": "ready"
     },
     {
         "name": "Qwen 3",
         "url": "https://rajhuggingface4253-qwen3.hf.space",
         "endpoint": "/chat",
         "type": "streaming_chat",
-        "warmup_prompt": "Ping! Respond with 'pong'.",
         "timeout": 30.0,
-        "expected_response": "pong"
     },
     {
         "name": "Kokoro TTS",
         "url": "https://rajhuggingface4253-koko.hf.space",
-        "endpoint": "/health",  # TTS might still use health endpoint
         "type": "health_check",
         "timeout": 15.0
     }
@@ -91,23 +105,12 @@ async def ping_server(url: str) -> Dict:
         }
 async def warmup_streaming_model(model_config: Dict) -> Dict:
-    """
-    Warm up streaming models by calling their chat endpoints
-    and reading the streaming response
-    """
     try:
         start_time = time.time()
         async with httpx.AsyncClient(timeout=model_config.get('timeout', 30.0)) as client:
-            # Prepare the request payload for chat endpoint
-            payload = {
-                "prompt": model_config['warmup_prompt'],
-                "max_new_tokens": 50,
-                "temperature": 0.1,  # Low temperature for consistent responses
-                "enable_code_execution": False,
-                "enable_web_search": False
-            }
             api_url = f"{model_config['url']}{model_config['endpoint']}"
             # Make the streaming request
@@ -133,15 +136,8 @@ async def warmup_streaming_model(model_config: Dict) -> Dict:
             # Check if we got a valid response
             if collected_response:
-                # Check for expected response if specified
-                expected_response = model_config.get('expected_response', '').lower()
-                if expected_response and expected_response in collected_response.lower():
-                    status = 'success'
-                else:
-                    status = 'success'  # Still success if we got any response
                 return {
-                    'status': status,
                     'response_time_ms': response_time,
                     'status_code': response.status_code,
                     'model_response': collected_response[:200] + "..." if len(collected_response) > 200 else collected_response,
@@ -177,7 +173,7 @@ async def warmup_streaming_model(model_config: Dict) -> Dict:
         }
 async def warmup_model_health(model_config: Dict) -> Dict:
-    """Warm up models using health endpoints (for non-streaming models)"""
     try:
         start_time = time.time()
         async with httpx.AsyncClient(timeout=model_config.get('timeout', 15.0)) as client:
@@ -278,7 +274,7 @@ async def warmup_all_models():
             'health_check': results[i]
         }
-        # Log detailed results
         result = results[i]
         model_name = model['name']
@@ -288,13 +284,8 @@ async def warmup_all_models():
                 if result.get('response_length'):
                     response_info += f" ({result['response_length']} chars)"
                 print(f"✅ {model_name}: {response_info}")
-                # Log snippet of model response for streaming models
-                if result.get('model_response'):
-                    snippet = result['model_response'].replace('\n', ' ')[:50]
-                    print(f"   📝 Response: {snippet}...")
             else:
-                print(f"✅ {model_name}: {result['response_time_ms']}ms (health check)")
         else:
             error_msg = result.get('error', 'Unknown error')
             print(f"❌ {model_name}: {error_msg}")
@@ -341,13 +332,11 @@ async def ping_single_health(client: httpx.AsyncClient, space_url: str, health_u
         print(f"🔗 Health ping failed for {space_url}: {e}")
 async def continuous_pinging():
-    """Main pinging loop with enhanced model warming"""
-    print("🚀 Enhanced Smart Model Warmer Started!")
     print(f"🌐 Regular servers: {len(regular_servers)}")
     print(f"🤖 Models to warm: {len(models_to_warm)}")
     print(f"🔗 Pinger network: {len(pinger_spaces)}")
-    print("⏰ Health checks every 5 minutes")
-    print("🎯 Using streaming endpoints for model warming")
     last_health_check = 0
@@ -357,14 +346,13 @@ async def continuous_pinging():
             if regular_servers:
                 await ping_all_servers()
-            # Warm up models using appropriate endpoints
             if models_to_warm:
                 await warmup_all_models()
             # Ping health endpoints every 30 minutes
             current_time = time.time()
             if current_time - last_health_check >= HEALTH_CHECK_INTERVAL and pinger_spaces:
-                print("🔄 Pinging other pinger spaces...")
                 await ping_health_endpoints()
                 last_health_check = current_time
@@ -377,13 +365,12 @@ async def continuous_pinging():
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     # Startup
-    print("Starting up Enhanced Smart Model Warmer...")
     asyncio.create_task(continuous_pinging())
     yield
     # Shutdown
     print("Shutting down...")
-app = FastAPI(title="Enhanced Smart Model Warmer", lifespan=lifespan)
 @app.get("/", response_class=HTMLResponse)
 async def home():
@@ -397,57 +384,36 @@ async def home():
     for url, data in model_warmup_results.items():
         model_info = data['model_info']
         health = data['health_check']
-        status_info = {
             'name': model_info['name'],
             'type': model_info.get('type', 'health_check'),
             'status': health['status'],
             'response_time': health.get('response_time_ms', 0),
             'error': health.get('error', '')
-        }
-        # Add response info for streaming models
-        if health.get('got_response') and health.get('model_response'):
-            status_info['response_snippet'] = health['model_response']
-            status_info['response_length'] = health.get('response_length', 0)
-        model_statuses.append(status_info)
     model_status_html = "".join([
-        f"""
-        <li>
-            <strong>{m['name']}</strong> ({m['type']}):
-            <span class="{'success' if m['status'] == 'success' else 'error'}">{m['status']}</span>
-            ({m['response_time']}ms)
-            {f" - {m['response_length']} chars" if m.get('response_length') else ''}
-            {f"<br><small>Error: {m['error']}</small>" if m['error'] else ''}
-            {f"<br><small>Snippet: {m.get('response_snippet', '')}</small>" if m.get('response_snippet') else ''}
-        </li>
-        """
         for m in model_statuses
     ])
     html_content = f"""
     <html>
         <head>
-            <title>Enhanced Smart Model Warmer</title>
             <style>
                 body {{ font-family: Arial, sans-serif; margin: 40px; }}
                 .success {{ color: green; font-weight: bold; }}
                 .error {{ color: red; font-weight: bold; }}
-                .container {{ max-width: 1200px; margin: 0 auto; }}
                 .model-list {{ background: #f5f5f5; padding: 15px; border-radius: 5px; }}
-                .status-grid {{ display: grid; grid-template-columns: 1fr 1fr 1fr; gap: 20px; margin-bottom: 30px; }}
-                .model-item {{ margin: 10px 0; padding: 10px; background: white; border-radius: 5px; }}
-                small {{ color: #666; }}
             </style>
         </head>
         <body>
             <div class="container">
-                <h1>🚀 Enhanced Smart Model Warmer</h1>
-                <p><em>Now with streaming endpoint support for AI models</em></p>
-                <div class="status-grid">
                     <div style="background: #e8f5e8; padding: 15px; border-radius: 8px;">
                         <h3>🌐 Regular Servers</h3>
                         <p><strong>{regular_success}/{len(regular_servers)} OK</strong></p>
@@ -463,7 +429,7 @@ async def home():
                 </div>
                 <div class="model-list">
-                    <h3>Model Warming Status</h3>
                     <ul>
                         {model_status_html if model_statuses else "<li>No model data yet</li>"}
                     </ul>
@@ -471,7 +437,6 @@ async def home():
                 <p><strong>Last Model Check:</strong> {last_model_warmup.strftime('%Y-%m-%d %H:%M:%S') if last_model_warmup else 'Never'}</p>
                 <p><strong>Next check in:</strong> ~5 minutes</p>
-                <p><strong>Streaming endpoints:</strong> Using /chat for Qwen models</p>
             </div>
         </body>
     </html>
@@ -483,10 +448,9 @@ async def health():
     """Health endpoint for other pingers"""
     return JSONResponse({
         "status": "healthy",
-        "service": "enhanced-smart-model-warmer",
         "regular_servers": len(regular_servers),
         "ai_models": len(models_to_warm),
-        "streaming_models": sum(1 for m in models_to_warm if m.get('type') == 'streaming_chat'),
         "last_model_warmup": last_model_warmup.isoformat() if last_model_warmup else None,
         "timestamp": datetime.now().isoformat()
     })
@@ -513,20 +477,6 @@ async def ping_now():
         "timestamp": datetime.now().isoformat()
     })
-@app.get("/test-streaming/{model_index}")
-async def test_streaming(model_index: int = 0):
-    """Test a specific streaming model"""
-    if model_index >= len(models_to_warm):
-        return JSONResponse({"error": "Model index out of range"}, status_code=400)
-    model_config = models_to_warm[model_index]
-    result = await warmup_single_model(model_config)
-    return JSONResponse({
-        "model": model_config['name'],
-        "test_result": result
-    })
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)

 import asyncio
 import time
 from datetime import datetime
+from typing import Dict, List
 from contextlib import asynccontextmanager
 # Configuration
 PING_INTERVAL = 300  # 5 minutes
     "https://rajhuggingface4253-compressor3pro.hf.space"
 ]
+# Models to warm with correct payloads
 models_to_warm = [
     {
         "name": "Qwen 1",
         "url": "https://rajhuggingface4253-qwen.hf.space",
         "endpoint": "/chat",
         "type": "streaming_chat",
         "timeout": 30.0,
+        "payload": {
+            "prompt": "Hello, are you ready? Respond with just 'OK'.",
+            "max_new_tokens": 50,
+            "temperature": 0.1,
+            "enable_code_execution": False,
+            "enable_web_search": False
+        }
     },
     {
         "name": "Qwen 2",
         "url": "https://rajhuggingface4253-qwen2.hf.space",
         "endpoint": "/chat",
         "type": "streaming_chat",
         "timeout": 30.0,
+        "payload": {
+            "prompt": "Say 'ready' if you're working.",
+            "max_new_tokens": 50,
+            "temperature": 0.1,
+            "enable_code_execution": False,
+            "enable_web_search": False
+        }
     },
     {
         "name": "Qwen 3",
         "url": "https://rajhuggingface4253-qwen3.hf.space",
         "endpoint": "/chat",
         "type": "streaming_chat",
         "timeout": 30.0,
+        "payload": {
+            "prompt": "Ping! Respond with 'pong'.",
+            "max_new_tokens": 50,
+            "temperature": 0.1,
+            "enable_code_execution": False,
+            "enable_web_search": False
+        }
     },
     {
         "name": "Kokoro TTS",
         "url": "https://rajhuggingface4253-koko.hf.space",
+        "endpoint": "/health",
         "type": "health_check",
         "timeout": 15.0
     }
         }
 async def warmup_streaming_model(model_config: Dict) -> Dict:
+    """Warm up streaming models by calling their chat endpoints"""
     try:
         start_time = time.time()
         async with httpx.AsyncClient(timeout=model_config.get('timeout', 30.0)) as client:
+            payload = model_config['payload']
             api_url = f"{model_config['url']}{model_config['endpoint']}"
             # Make the streaming request
             # Check if we got a valid response
             if collected_response:
                 return {
+                    'status': 'success',
                     'response_time_ms': response_time,
                     'status_code': response.status_code,
                     'model_response': collected_response[:200] + "..." if len(collected_response) > 200 else collected_response,
         }
 async def warmup_model_health(model_config: Dict) -> Dict:
+    """Warm up models using health endpoints"""
     try:
         start_time = time.time()
         async with httpx.AsyncClient(timeout=model_config.get('timeout', 15.0)) as client:
             'health_check': results[i]
         }
+        # Log results
         result = results[i]
         model_name = model['name']
                 if result.get('response_length'):
                     response_info += f" ({result['response_length']} chars)"
                 print(f"✅ {model_name}: {response_info}")
             else:
+                print(f"✅ {model_name}: {result['response_time_ms']}ms")
         else:
             error_msg = result.get('error', 'Unknown error')
             print(f"❌ {model_name}: {error_msg}")
         print(f"🔗 Health ping failed for {space_url}: {e}")
 async def continuous_pinging():
+    """Main pinging loop with model warming"""
+    print("🚀 Smart Model Warmer Started!")
     print(f"🌐 Regular servers: {len(regular_servers)}")
     print(f"🤖 Models to warm: {len(models_to_warm)}")
     print(f"🔗 Pinger network: {len(pinger_spaces)}")
     last_health_check = 0
             if regular_servers:
                 await ping_all_servers()
+            # Warm up models
             if models_to_warm:
                 await warmup_all_models()
             # Ping health endpoints every 30 minutes
             current_time = time.time()
             if current_time - last_health_check >= HEALTH_CHECK_INTERVAL and pinger_spaces:
                 await ping_health_endpoints()
                 last_health_check = current_time
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     # Startup
     asyncio.create_task(continuous_pinging())
     yield
     # Shutdown
     print("Shutting down...")
+app = FastAPI(title="Smart Model Warmer", lifespan=lifespan)
 @app.get("/", response_class=HTMLResponse)
 async def home():
     for url, data in model_warmup_results.items():
         model_info = data['model_info']
         health = data['health_check']
+        model_statuses.append({
             'name': model_info['name'],
             'type': model_info.get('type', 'health_check'),
             'status': health['status'],
             'response_time': health.get('response_time_ms', 0),
             'error': health.get('error', '')
+        })
     model_status_html = "".join([
+        f"<li>{m['name']} ({m['type']}): <span class={'success' if m['status'] == 'success' else 'error'}>{m['status']}</span> ({m['response_time']}ms){f' - {m[\"error\"]}' if m['error'] else ''}</li>"
         for m in model_statuses
     ])
     html_content = f"""
     <html>
         <head>
+            <title>Smart Model Warmer</title>
             <style>
                 body {{ font-family: Arial, sans-serif; margin: 40px; }}
                 .success {{ color: green; font-weight: bold; }}
                 .error {{ color: red; font-weight: bold; }}
+                .container {{ max-width: 1000px; margin: 0 auto; }}
                 .model-list {{ background: #f5f5f5; padding: 15px; border-radius: 5px; }}
             </style>
         </head>
         <body>
             <div class="container">
+                <h1>🤖 Smart Model Warmer</h1>
+                <div style="display: grid; grid-template-columns: 1fr 1fr 1fr; gap: 20px; margin-bottom: 30px;">
                     <div style="background: #e8f5e8; padding: 15px; border-radius: 8px;">
                         <h3>🌐 Regular Servers</h3>
                         <p><strong>{regular_success}/{len(regular_servers)} OK</strong></p>
                 </div>
                 <div class="model-list">
+                    <h3>Model Status</h3>
                     <ul>
                         {model_status_html if model_statuses else "<li>No model data yet</li>"}
                     </ul>
                 <p><strong>Last Model Check:</strong> {last_model_warmup.strftime('%Y-%m-%d %H:%M:%S') if last_model_warmup else 'Never'}</p>
                 <p><strong>Next check in:</strong> ~5 minutes</p>
             </div>
         </body>
     </html>
     """Health endpoint for other pingers"""
     return JSONResponse({
         "status": "healthy",
+        "service": "smart-model-warmer",
         "regular_servers": len(regular_servers),
         "ai_models": len(models_to_warm),
         "last_model_warmup": last_model_warmup.isoformat() if last_model_warmup else None,
         "timestamp": datetime.now().isoformat()
     })
         "timestamp": datetime.now().isoformat()
     })
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)