Spaces:

Rajhuggingface4253
/

ping

Running

App Files Files Community

Rajhuggingface4253 commited on Oct 13, 2025

Commit

e51d0e4

verified ·

1 Parent(s): 3f62f30

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -19

app.py CHANGED Viewed

@@ -23,16 +23,17 @@ regular_servers = [
     "https://rajhuggingface4253-compressor3pro.hf.space"
 ]
-# FIXED: Models with CORRECT prompt format that matches your model's expectations
 models_to_warm = [
     {
         "name": "Qwen 1",
         "url": "https://rajhuggingface4253-qwen.hf.space",
         "endpoint": "/chat",
         "type": "streaming_chat",
         "payload": {
-            "prompt": "--- HISTORY START ---\nUser: Hello, are you ready? Respond with just 'OK'\n--- HISTORY END ---\n\nUser's latest message: \"Hello, are you ready? Respond with just 'OK'\"",
-            "max_new_tokens": 50,
             "temperature": 0.1,
             "enable_code_execution": False,
             "enable_web_search": False
@@ -43,9 +44,10 @@ models_to_warm = [
         "url": "https://rajhuggingface4253-qwen2.hf.space",
         "endpoint": "/chat",
         "type": "streaming_chat",
         "payload": {
-            "prompt": "--- HISTORY START ---\nUser: Say 'ready' if you're working\n--- HISTORY END ---\n\nUser's latest message: \"Say 'ready' if you're working\"",
-            "max_new_tokens": 50,
             "temperature": 0.1,
             "enable_code_execution": False,
             "enable_web_search": False
@@ -56,9 +58,10 @@ models_to_warm = [
         "url": "https://rajhuggingface4253-qwen3.hf.space",
         "endpoint": "/chat",
         "type": "streaming_chat",
         "payload": {
-            "prompt": "--- HISTORY START ---\nUser: Ping! Respond with 'pong'\n--- HISTORY END ---\n\nUser's latest message: \"Ping! Respond with 'pong'\"",
-            "max_new_tokens": 50,
             "temperature": 0.1,
             "enable_code_execution": False,
             "enable_web_search": False
@@ -68,7 +71,8 @@ models_to_warm = [
         "name": "Kokoro TTS",
         "url": "https://rajhuggingface4253-koko.hf.space",
         "endpoint": "/health",
-        "type": "health_check"
     }
 ]
@@ -101,10 +105,10 @@ async def ping_server(url: str) -> Dict:
         }
 async def warmup_chat_model(model_config: Dict) -> Dict:
-    """Warm up chat models with CORRECT prompt format"""
     try:
         start_time = time.time()
-        async with httpx.AsyncClient(timeout=30.0) as client:
             payload = model_config['payload']
             api_url = f"{model_config['url']}{model_config['endpoint']}"
@@ -113,7 +117,7 @@ async def warmup_chat_model(model_config: Dict) -> Dict:
             response_time = round((time.time() - start_time) * 1000, 1)
             if response.status_code == 200:
-                # Try to read streaming response to verify AI is actually responding
                 try:
                     collected_response = ""
                     async for chunk in response.aiter_text():
@@ -121,12 +125,12 @@ async def warmup_chat_model(model_config: Dict) -> Dict:
                             collected_response += chunk
                     # Check if we got a meaningful AI response
-                    if collected_response and len(collected_response) > 10:
                         return {
                             'status': 'success',
                             'response_time_ms': response_time,
                             'status_code': response.status_code,
-                            'ai_response': collected_response[:100],
                             'got_ai_response': True,
                             'timestamp': datetime.now().isoformat()
                         }
@@ -135,29 +139,43 @@ async def warmup_chat_model(model_config: Dict) -> Dict:
                             'status': 'success',
                             'response_time_ms': response_time,
                             'status_code': response.status_code,
-                            'ai_response': 'No content in stream',
                             'got_ai_response': False,
                             'timestamp': datetime.now().isoformat()
                         }
-                except:
-                    # If streaming read fails, but status is 200, still consider it success
                     return {
                         'status': 'success',
                         'response_time_ms': response_time,
                         'status_code': response.status_code,
-                        'ai_response': 'Stream endpoint responded',
                         'got_ai_response': False,
                         'timestamp': datetime.now().isoformat()
                     }
             else:
                 return {
                     'status': 'error',
                     'response_time_ms': response_time,
                     'status_code': response.status_code,
-                    'error': f"HTTP {response.status_code}",
                     'timestamp': datetime.now().isoformat()
                 }
     except Exception as e:
         return {
             'status': 'error',
@@ -170,7 +188,7 @@ async def warmup_health_model(model_config: Dict) -> Dict:
     """Warm up models using health endpoints"""
     try:
         start_time = time.time()
-        async with httpx.AsyncClient(timeout=15.0) as client:
             api_url = f"{model_config['url']}{model_config['endpoint']}"
             response = await client.get(api_url)

     "https://rajhuggingface4253-compressor3pro.hf.space"
 ]
+# OPTIMIZED: Simpler prompts for faster responses
 models_to_warm = [
     {
         "name": "Qwen 1",
         "url": "https://rajhuggingface4253-qwen.hf.space",
         "endpoint": "/chat",
         "type": "streaming_chat",
+        "timeout": 45.0,  # Increased timeout
         "payload": {
+            "prompt": "Hello, respond with OK",
+            "max_new_tokens": 10,  # Shorter response
             "temperature": 0.1,
             "enable_code_execution": False,
             "enable_web_search": False
         "url": "https://rajhuggingface4253-qwen2.hf.space",
         "endpoint": "/chat",
         "type": "streaming_chat",
+        "timeout": 45.0,
         "payload": {
+            "prompt": "Hello, respond with OK",
+            "max_new_tokens": 10,
             "temperature": 0.1,
             "enable_code_execution": False,
             "enable_web_search": False
         "url": "https://rajhuggingface4253-qwen3.hf.space",
         "endpoint": "/chat",
         "type": "streaming_chat",
+        "timeout": 45.0,
         "payload": {
+            "prompt": "Hello, respond with OK",
+            "max_new_tokens": 10,
             "temperature": 0.1,
             "enable_code_execution": False,
             "enable_web_search": False
         "name": "Kokoro TTS",
         "url": "https://rajhuggingface4253-koko.hf.space",
         "endpoint": "/health",
+        "type": "health_check",
+        "timeout": 15.0
     }
 ]
         }
 async def warmup_chat_model(model_config: Dict) -> Dict:
+    """Warm up chat models with optimized approach"""
     try:
         start_time = time.time()
+        async with httpx.AsyncClient(timeout=model_config.get('timeout', 45.0)) as client:
             payload = model_config['payload']
             api_url = f"{model_config['url']}{model_config['endpoint']}"
             response_time = round((time.time() - start_time) * 1000, 1)
             if response.status_code == 200:
+                # Try to read streaming response
                 try:
                     collected_response = ""
                     async for chunk in response.aiter_text():
                             collected_response += chunk
                     # Check if we got a meaningful AI response
+                    if collected_response and len(collected_response.strip()) > 5:
                         return {
                             'status': 'success',
                             'response_time_ms': response_time,
                             'status_code': response.status_code,
+                            'ai_response': collected_response[:100].strip(),
                             'got_ai_response': True,
                             'timestamp': datetime.now().isoformat()
                         }
                             'status': 'success',
                             'response_time_ms': response_time,
                             'status_code': response.status_code,
+                            'ai_response': 'Empty response',
                             'got_ai_response': False,
                             'timestamp': datetime.now().isoformat()
                         }
+                except Exception as e:
                     return {
                         'status': 'success',
                         'response_time_ms': response_time,
                         'status_code': response.status_code,
+                        'ai_response': f'Stream error: {str(e)}',
                         'got_ai_response': False,
                         'timestamp': datetime.now().isoformat()
                     }
             else:
+                # Try to get error details
+                error_detail = ""
+                try:
+                    error_text = await response.text()
+                    error_detail = f" - {error_text[:100]}"
+                except:
+                    pass
                 return {
                     'status': 'error',
                     'response_time_ms': response_time,
                     'status_code': response.status_code,
+                    'error': f"HTTP {response.status_code}{error_detail}",
                     'timestamp': datetime.now().isoformat()
                 }
+    except asyncio.TimeoutError:
+        return {
+            'status': 'error',
+            'error': 'Request timeout',
+            'response_time_ms': round((time.time() - start_time) * 1000, 1),
+            'timestamp': datetime.now().isoformat()
+        }
     except Exception as e:
         return {
             'status': 'error',
     """Warm up models using health endpoints"""
     try:
         start_time = time.time()
+        async with httpx.AsyncClient(timeout=model_config.get('timeout', 15.0)) as client:
             api_url = f"{model_config['url']}{model_config['endpoint']}"
             response = await client.get(api_url)