Spaces:

Rajhuggingface4253
/

ping2

Running

App Files Files Community

Rajhuggingface4253 commited on Sep 25, 2025

Commit

8dbef7a

verified ·

1 Parent(s): f5cc238

Update app.py

Browse files

Files changed (1) hide show

app.py +154 -26

app.py CHANGED Viewed

@@ -27,7 +27,7 @@ regular_servers = [
 chat_models = [
     {
         "url": "https://rajhuggingface4253-qwen.hf.space",
-        "api_endpoint": "/chat",  # Adjust based on your API
         "warmup_message": "Say 'active' in one word?",
         "type": "qwen"
     },
@@ -80,20 +80,129 @@ async def ping_server(url: str) -> Dict:
         }
 async def warmup_chat_model(model_config: Dict) -> Dict:
-    """Send a warmup message to a chat model"""
     try:
         start_time = time.time()
-        async with httpx.AsyncClient(timeout=30.0) as client:
             if model_config["type"].startswith("qwen"):
                 payload = {
                     "message": model_config["warmup_message"],
-                    "max_tokens": 10
                 }
-            else:
-                payload = {
-                    "messages": [{"role": "user", "content": model_config["warmup_message"]}],
-                    "max_tokens": 10
                 }
             api_url = f"{model_config['url']}{model_config['api_endpoint']}"
             response = await client.post(
@@ -104,11 +213,15 @@ async def warmup_chat_model(model_config: Dict) -> Dict:
             response_time = round((time.time() - start_time) * 1000, 1)
             return {
-                'status': 'success',
                 'response_time_ms': response_time,
                 'status_code': response.status_code,
-                'response_preview': str(response.text)[:100],
                 'timestamp': datetime.now().isoformat()
             }
@@ -119,6 +232,13 @@ async def warmup_chat_model(model_config: Dict) -> Dict:
             'timestamp': datetime.now().isoformat()
         }
 async def ping_all_servers():
     """Ping all regular servers"""
     global ping_results, last_ping_run
@@ -140,22 +260,29 @@ async def ping_all_servers():
     return results
 async def warmup_all_chat_models():
-    """Warm up all chat models with actual messages"""
     global chat_warmup_results, last_chat_warmup
     if not chat_models:
         return []
-    tasks = [warmup_chat_model(model) for model in chat_models]
     results = await asyncio.gather(*tasks)
     for i, model in enumerate(chat_models):
         chat_warmup_results[model['url']] = results[i]
     last_chat_warmup = datetime.now()
-    success_count = sum(1 for result in results if result['status'] == 'success')
-    print(f"🤖 {datetime.now().strftime('%H:%M:%S')} - Chat models: {success_count}/{len(chat_models)} Warmed up")
     return results
@@ -193,11 +320,11 @@ async def ping_single_health(client: httpx.AsyncClient, space_url: str, health_u
 async def continuous_pinging():
     """Main pinging loop with chat model warming"""
-    print("🚀 Chat Model Warmer Started!")
     print(f"🌐 Regular servers: {len(regular_servers)}")
     print(f"🤖 Chat models: {len(chat_models)}")
     print(f"🔗 Pinger network: {len(pinger_spaces)}")
-    print("⏰ Chat warmup every 5 minutes")
     last_health_check = 0
@@ -207,11 +334,11 @@ async def continuous_pinging():
             if regular_servers:
                 await ping_all_servers()
-            # Warm up chat models (most important!)
             if chat_models:
                 await warmup_all_chat_models()
-            # Ping health endpoints every 30 minutes (FIXED: use the defined constant)
             current_time = time.time()
             if current_time - last_health_check >= HEALTH_CHECK_INTERVAL and pinger_spaces:
                 await ping_health_endpoints()
@@ -220,34 +347,35 @@ async def continuous_pinging():
             await asyncio.sleep(PING_INTERVAL)
         except Exception as e:
-            print(f"❌ Error: {e}")
             await asyncio.sleep(60)
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     # Startup
-    print("Starting up Chat Model Warmer...")
     asyncio.create_task(continuous_pinging())
     yield
     # Shutdown
     print("Shutting down...")
-app = FastAPI(title="Chat Model Warmer", lifespan=lifespan)
 @app.get("/", response_class=HTMLResponse)
 async def home():
     """Minimal dashboard"""
     regular_success = sum(1 for r in ping_results.values() if r.get('status') == 'success')
-    chat_success = sum(1 for r in chat_warmup_results.values() if r.get('status') == 'success')
     health_success = sum(1 for r in health_results.values() if r.get('status') == 'success')
     html_content = f"""
     <html>
-        <head><title>Chat Model Warmer</title></head>
         <body>
-            <h1>🤖 Chat Model Warmer</h1>
             <p><strong>Regular Servers:</strong> {regular_success}/{len(regular_servers)} OK</p>
-            <p><strong>Chat Models:</strong> {chat_success}/{len(chat_models)} Warmed up</p>
             <p><strong>Last Chat Warmup:</strong> {last_chat_warmup.strftime('%H:%M:%S') if last_chat_warmup else 'Never'}</p>
             <p><strong>Network:</strong> {health_success}/{len(pinger_spaces)} OK</p>
         </body>
@@ -259,7 +387,7 @@ async def home():
 async def health():
     return JSONResponse({
         "status": "healthy",
-        "service": "chat-model-warmer",
         "regular_servers": len(regular_servers),
         "chat_models": len(chat_models),
         "last_chat_warmup": last_chat_warmup.isoformat() if last_chat_warmup else None

 chat_models = [
     {
         "url": "https://rajhuggingface4253-qwen.hf.space",
+        "api_endpoint": "/chat",
         "warmup_message": "Say 'active' in one word?",
         "type": "qwen"
     },
         }
 async def warmup_chat_model(model_config: Dict) -> Dict:
+    """Send a warmup message to a chat model with streaming support"""
     try:
         start_time = time.time()
+        # Try both streaming and non-streaming approaches
+        async with httpx.AsyncClient(timeout=25.0) as client:
+            # First try: Non-streaming request (preferred for warming)
+            payload = {
+                "messages": [{"role": "user", "content": model_config["warmup_message"]}],
+                "max_tokens": 15,
+                "stream": False  # Explicitly disable streaming for warming
+            }
+            # Adjust payload for specific model types
             if model_config["type"].startswith("qwen"):
                 payload = {
                     "message": model_config["warmup_message"],
+                    "max_tokens": 15,
+                    "stream": False
                 }
+            api_url = f"{model_config['url']}{model_config['api_endpoint']}"
+            try:
+                response = await client.post(
+                    api_url,
+                    json=payload,
+                    headers={"Content-Type": "application/json"}
+                )
+                response_time = round((time.time() - start_time) * 1000, 1)
+                response_text = await response.atext() if response.status_code == 200 else ""
+                # Check if we actually got a meaningful response
+                got_valid_response = len(response_text.strip()) > 0 and response.status_code == 200
+                result = {
+                    'status': 'success' if got_valid_response else 'error',
+                    'response_time_ms': response_time,
+                    'status_code': response.status_code,
+                    'response_preview': response_text[:100] if got_valid_response else "No response",
+                    'got_response': got_valid_response,
+                    'method': 'non-streaming',
+                    'timestamp': datetime.now().isoformat()
                 }
+                if got_valid_response:
+                    return result
+            except (httpx.ReadTimeout, asyncio.TimeoutError):
+                # Non-streaming failed, try streaming approach
+                pass
+            # Second try: Streaming approach (if non-streaming fails)
+            print(f"🔄 Trying streaming approach for {model_config['url']}")
+            try:
+                streaming_start = time.time()
+                streaming_payload = payload.copy()
+                streaming_payload["stream"] = True  # Enable streaming
+                async with client.stream(
+                    'POST',
+                    api_url,
+                    json=streaming_payload,
+                    timeout=20.0
+                ) as response:
+                    first_chunk_received = False
+                    full_response = ""
+                    async for line in response.aiter_lines():
+                        if line.strip() and 'data:' in line:
+                            first_chunk_received = True
+                            # Try to extract actual content from streaming format
+                            if '"content":"' in line:
+                                try:
+                                    content_start = line.find('"content":"') + 10
+                                    content_end = line.find('"', content_start)
+                                    if content_end > content_start:
+                                        content = line[content_start:content_end]
+                                        full_response += content
+                                except:
+                                    pass
+                            break  # Got at least one chunk
+                    streaming_time = round((time.time() - streaming_start) * 1000, 1)
+                    return {
+                        'status': 'success' if first_chunk_received else 'error',
+                        'response_time_ms': streaming_time,
+                        'status_code': response.status_code,
+                        'response_preview': full_response[:100] if full_response else "Streaming chunk received",
+                        'got_response': first_chunk_received,
+                        'method': 'streaming',
+                        'timestamp': datetime.now().isoformat()
+                    }
+            except Exception as stream_error:
+                return {
+                    'status': 'error',
+                    'error': f"Both methods failed: {str(stream_error)}",
+                    'response_time_ms': round((time.time() - start_time) * 1000, 1),
+                    'timestamp': datetime.now().isoformat()
+                }
+    except Exception as e:
+        return {
+            'status': 'error',
+            'error': str(e),
+            'timestamp': datetime.now().isoformat()
+        }
+async def warmup_tts_model(model_config: Dict) -> Dict:
+    """Warm up TTS model with proper response handling"""
+    try:
+        start_time = time.time()
+        async with httpx.AsyncClient(timeout=15.0) as client:
+            payload = {
+                "text": model_config["warmup_message"],
+                "voice": "default"
+            }
             api_url = f"{model_config['url']}{model_config['api_endpoint']}"
             response = await client.post(
             response_time = round((time.time() - start_time) * 1000, 1)
+            # For TTS, success means we got any response (audio data or confirmation)
+            success = response.status_code == 200 and len(response.content) > 0
             return {
+                'status': 'success' if success else 'error',
                 'response_time_ms': response_time,
                 'status_code': response.status_code,
+                'response_preview': f"TTS response: {len(response.content)} bytes" if success else "No audio data",
+                'got_response': success,
                 'timestamp': datetime.now().isoformat()
             }
             'timestamp': datetime.now().isoformat()
         }
+async def warmup_single_model(model_config: Dict) -> Dict:
+    """Route to appropriate warming function based on model type"""
+    if model_config["type"] == "tts":
+        return await warmup_tts_model(model_config)
+    else:
+        return await warmup_chat_model(model_config)
 async def ping_all_servers():
     """Ping all regular servers"""
     global ping_results, last_ping_run
     return results
 async def warmup_all_chat_models():
+    """Warm up all chat models with actual messages and response verification"""
     global chat_warmup_results, last_chat_warmup
     if not chat_models:
         return []
+    tasks = [warmup_single_model(model) for model in chat_models]
     results = await asyncio.gather(*tasks)
     for i, model in enumerate(chat_models):
         chat_warmup_results[model['url']] = results[i]
+        # Log detailed results for debugging
+        if results[i]['status'] == 'success' and results[i].get('got_response'):
+            print(f"✅ {model['url']}: {results[i]['response_time_ms']}ms - {results[i]['response_preview']}")
+        else:
+            print(f"❌ {model['url']}: {results[i].get('error', 'No response')}")
     last_chat_warmup = datetime.now()
+    # Only count as success if we actually got a response
+    success_count = sum(1 for result in results if result.get('got_response', False))
+    print(f"🤖 {datetime.now().strftime('%H:%M:%S')} - Chat models: {success_count}/{len(chat_models)} Actually responded")
     return results
 async def continuous_pinging():
     """Main pinging loop with chat model warming"""
+    print("🚀 Enhanced Chat Model Warmer Started!")
     print(f"🌐 Regular servers: {len(regular_servers)}")
     print(f"🤖 Chat models: {len(chat_models)}")
     print(f"🔗 Pinger network: {len(pinger_spaces)}")
+    print("⏰ Warming with response verification every 5 minutes")
     last_health_check = 0
             if regular_servers:
                 await ping_all_servers()
+            # Warm up chat models with response verification
             if chat_models:
                 await warmup_all_chat_models()
+            # Ping health endpoints every 30 minutes
             current_time = time.time()
             if current_time - last_health_check >= HEALTH_CHECK_INTERVAL and pinger_spaces:
                 await ping_health_endpoints()
             await asyncio.sleep(PING_INTERVAL)
         except Exception as e:
+            print(f"❌ Error in main loop: {e}")
             await asyncio.sleep(60)
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     # Startup
+    print("Starting up Enhanced Chat Model Warmer...")
     asyncio.create_task(continuous_pinging())
     yield
     # Shutdown
     print("Shutting down...")
+app = FastAPI(title="Enhanced Chat Model Warmer", lifespan=lifespan)
 @app.get("/", response_class=HTMLResponse)
 async def home():
     """Minimal dashboard"""
     regular_success = sum(1 for r in ping_results.values() if r.get('status') == 'success')
+    # Only count chat models that actually responded
+    chat_success = sum(1 for r in chat_warmup_results.values() if r.get('got_response', False))
     health_success = sum(1 for r in health_results.values() if r.get('status') == 'success')
     html_content = f"""
     <html>
+        <head><title>Enhanced Chat Model Warmer</title></head>
         <body>
+            <h1>🤖 Enhanced Chat Model Warmer</h1>
             <p><strong>Regular Servers:</strong> {regular_success}/{len(regular_servers)} OK</p>
+            <p><strong>Chat Models (Responded):</strong> {chat_success}/{len(chat_models)} Actually Warm</p>
             <p><strong>Last Chat Warmup:</strong> {last_chat_warmup.strftime('%H:%M:%S') if last_chat_warmup else 'Never'}</p>
             <p><strong>Network:</strong> {health_success}/{len(pinger_spaces)} OK</p>
         </body>
 async def health():
     return JSONResponse({
         "status": "healthy",
+        "service": "enhanced-chat-model-warmer",
         "regular_servers": len(regular_servers),
         "chat_models": len(chat_models),
         "last_chat_warmup": last_chat_warmup.isoformat() if last_chat_warmup else None