Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -23,16 +23,17 @@ regular_servers = [
|
|
| 23 |
"https://rajhuggingface4253-compressor3pro.hf.space"
|
| 24 |
]
|
| 25 |
|
| 26 |
-
#
|
| 27 |
models_to_warm = [
|
| 28 |
{
|
| 29 |
"name": "Qwen 1",
|
| 30 |
"url": "https://rajhuggingface4253-qwen.hf.space",
|
| 31 |
"endpoint": "/chat",
|
| 32 |
"type": "streaming_chat",
|
|
|
|
| 33 |
"payload": {
|
| 34 |
-
"prompt": "
|
| 35 |
-
"max_new_tokens":
|
| 36 |
"temperature": 0.1,
|
| 37 |
"enable_code_execution": False,
|
| 38 |
"enable_web_search": False
|
|
@@ -43,9 +44,10 @@ models_to_warm = [
|
|
| 43 |
"url": "https://rajhuggingface4253-qwen2.hf.space",
|
| 44 |
"endpoint": "/chat",
|
| 45 |
"type": "streaming_chat",
|
|
|
|
| 46 |
"payload": {
|
| 47 |
-
"prompt": "
|
| 48 |
-
"max_new_tokens":
|
| 49 |
"temperature": 0.1,
|
| 50 |
"enable_code_execution": False,
|
| 51 |
"enable_web_search": False
|
|
@@ -56,9 +58,10 @@ models_to_warm = [
|
|
| 56 |
"url": "https://rajhuggingface4253-qwen3.hf.space",
|
| 57 |
"endpoint": "/chat",
|
| 58 |
"type": "streaming_chat",
|
|
|
|
| 59 |
"payload": {
|
| 60 |
-
"prompt": "
|
| 61 |
-
"max_new_tokens":
|
| 62 |
"temperature": 0.1,
|
| 63 |
"enable_code_execution": False,
|
| 64 |
"enable_web_search": False
|
|
@@ -68,7 +71,8 @@ models_to_warm = [
|
|
| 68 |
"name": "Kokoro TTS",
|
| 69 |
"url": "https://rajhuggingface4253-koko.hf.space",
|
| 70 |
"endpoint": "/health",
|
| 71 |
-
"type": "health_check"
|
|
|
|
| 72 |
}
|
| 73 |
]
|
| 74 |
|
|
@@ -101,10 +105,10 @@ async def ping_server(url: str) -> Dict:
|
|
| 101 |
}
|
| 102 |
|
| 103 |
async def warmup_chat_model(model_config: Dict) -> Dict:
|
| 104 |
-
"""Warm up chat models with
|
| 105 |
try:
|
| 106 |
start_time = time.time()
|
| 107 |
-
async with httpx.AsyncClient(timeout=
|
| 108 |
|
| 109 |
payload = model_config['payload']
|
| 110 |
api_url = f"{model_config['url']}{model_config['endpoint']}"
|
|
@@ -113,7 +117,7 @@ async def warmup_chat_model(model_config: Dict) -> Dict:
|
|
| 113 |
response_time = round((time.time() - start_time) * 1000, 1)
|
| 114 |
|
| 115 |
if response.status_code == 200:
|
| 116 |
-
# Try to read streaming response
|
| 117 |
try:
|
| 118 |
collected_response = ""
|
| 119 |
async for chunk in response.aiter_text():
|
|
@@ -121,12 +125,12 @@ async def warmup_chat_model(model_config: Dict) -> Dict:
|
|
| 121 |
collected_response += chunk
|
| 122 |
|
| 123 |
# Check if we got a meaningful AI response
|
| 124 |
-
if collected_response and len(collected_response) >
|
| 125 |
return {
|
| 126 |
'status': 'success',
|
| 127 |
'response_time_ms': response_time,
|
| 128 |
'status_code': response.status_code,
|
| 129 |
-
'ai_response': collected_response[:100],
|
| 130 |
'got_ai_response': True,
|
| 131 |
'timestamp': datetime.now().isoformat()
|
| 132 |
}
|
|
@@ -135,29 +139,43 @@ async def warmup_chat_model(model_config: Dict) -> Dict:
|
|
| 135 |
'status': 'success',
|
| 136 |
'response_time_ms': response_time,
|
| 137 |
'status_code': response.status_code,
|
| 138 |
-
'ai_response': '
|
| 139 |
'got_ai_response': False,
|
| 140 |
'timestamp': datetime.now().isoformat()
|
| 141 |
}
|
| 142 |
-
except:
|
| 143 |
-
# If streaming read fails, but status is 200, still consider it success
|
| 144 |
return {
|
| 145 |
'status': 'success',
|
| 146 |
'response_time_ms': response_time,
|
| 147 |
'status_code': response.status_code,
|
| 148 |
-
'ai_response': 'Stream
|
| 149 |
'got_ai_response': False,
|
| 150 |
'timestamp': datetime.now().isoformat()
|
| 151 |
}
|
| 152 |
else:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
return {
|
| 154 |
'status': 'error',
|
| 155 |
'response_time_ms': response_time,
|
| 156 |
'status_code': response.status_code,
|
| 157 |
-
'error': f"HTTP {response.status_code}",
|
| 158 |
'timestamp': datetime.now().isoformat()
|
| 159 |
}
|
| 160 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
except Exception as e:
|
| 162 |
return {
|
| 163 |
'status': 'error',
|
|
@@ -170,7 +188,7 @@ async def warmup_health_model(model_config: Dict) -> Dict:
|
|
| 170 |
"""Warm up models using health endpoints"""
|
| 171 |
try:
|
| 172 |
start_time = time.time()
|
| 173 |
-
async with httpx.AsyncClient(timeout=15.0) as client:
|
| 174 |
|
| 175 |
api_url = f"{model_config['url']}{model_config['endpoint']}"
|
| 176 |
response = await client.get(api_url)
|
|
|
|
| 23 |
"https://rajhuggingface4253-compressor3pro.hf.space"
|
| 24 |
]
|
| 25 |
|
| 26 |
+
# OPTIMIZED: Simpler prompts for faster responses
|
| 27 |
models_to_warm = [
|
| 28 |
{
|
| 29 |
"name": "Qwen 1",
|
| 30 |
"url": "https://rajhuggingface4253-qwen.hf.space",
|
| 31 |
"endpoint": "/chat",
|
| 32 |
"type": "streaming_chat",
|
| 33 |
+
"timeout": 45.0, # Increased timeout
|
| 34 |
"payload": {
|
| 35 |
+
"prompt": "Hello, respond with OK",
|
| 36 |
+
"max_new_tokens": 10, # Shorter response
|
| 37 |
"temperature": 0.1,
|
| 38 |
"enable_code_execution": False,
|
| 39 |
"enable_web_search": False
|
|
|
|
| 44 |
"url": "https://rajhuggingface4253-qwen2.hf.space",
|
| 45 |
"endpoint": "/chat",
|
| 46 |
"type": "streaming_chat",
|
| 47 |
+
"timeout": 45.0,
|
| 48 |
"payload": {
|
| 49 |
+
"prompt": "Hello, respond with OK",
|
| 50 |
+
"max_new_tokens": 10,
|
| 51 |
"temperature": 0.1,
|
| 52 |
"enable_code_execution": False,
|
| 53 |
"enable_web_search": False
|
|
|
|
| 58 |
"url": "https://rajhuggingface4253-qwen3.hf.space",
|
| 59 |
"endpoint": "/chat",
|
| 60 |
"type": "streaming_chat",
|
| 61 |
+
"timeout": 45.0,
|
| 62 |
"payload": {
|
| 63 |
+
"prompt": "Hello, respond with OK",
|
| 64 |
+
"max_new_tokens": 10,
|
| 65 |
"temperature": 0.1,
|
| 66 |
"enable_code_execution": False,
|
| 67 |
"enable_web_search": False
|
|
|
|
| 71 |
"name": "Kokoro TTS",
|
| 72 |
"url": "https://rajhuggingface4253-koko.hf.space",
|
| 73 |
"endpoint": "/health",
|
| 74 |
+
"type": "health_check",
|
| 75 |
+
"timeout": 15.0
|
| 76 |
}
|
| 77 |
]
|
| 78 |
|
|
|
|
| 105 |
}
|
| 106 |
|
| 107 |
async def warmup_chat_model(model_config: Dict) -> Dict:
|
| 108 |
+
"""Warm up chat models with optimized approach"""
|
| 109 |
try:
|
| 110 |
start_time = time.time()
|
| 111 |
+
async with httpx.AsyncClient(timeout=model_config.get('timeout', 45.0)) as client:
|
| 112 |
|
| 113 |
payload = model_config['payload']
|
| 114 |
api_url = f"{model_config['url']}{model_config['endpoint']}"
|
|
|
|
| 117 |
response_time = round((time.time() - start_time) * 1000, 1)
|
| 118 |
|
| 119 |
if response.status_code == 200:
|
| 120 |
+
# Try to read streaming response
|
| 121 |
try:
|
| 122 |
collected_response = ""
|
| 123 |
async for chunk in response.aiter_text():
|
|
|
|
| 125 |
collected_response += chunk
|
| 126 |
|
| 127 |
# Check if we got a meaningful AI response
|
| 128 |
+
if collected_response and len(collected_response.strip()) > 5:
|
| 129 |
return {
|
| 130 |
'status': 'success',
|
| 131 |
'response_time_ms': response_time,
|
| 132 |
'status_code': response.status_code,
|
| 133 |
+
'ai_response': collected_response[:100].strip(),
|
| 134 |
'got_ai_response': True,
|
| 135 |
'timestamp': datetime.now().isoformat()
|
| 136 |
}
|
|
|
|
| 139 |
'status': 'success',
|
| 140 |
'response_time_ms': response_time,
|
| 141 |
'status_code': response.status_code,
|
| 142 |
+
'ai_response': 'Empty response',
|
| 143 |
'got_ai_response': False,
|
| 144 |
'timestamp': datetime.now().isoformat()
|
| 145 |
}
|
| 146 |
+
except Exception as e:
|
|
|
|
| 147 |
return {
|
| 148 |
'status': 'success',
|
| 149 |
'response_time_ms': response_time,
|
| 150 |
'status_code': response.status_code,
|
| 151 |
+
'ai_response': f'Stream error: {str(e)}',
|
| 152 |
'got_ai_response': False,
|
| 153 |
'timestamp': datetime.now().isoformat()
|
| 154 |
}
|
| 155 |
else:
|
| 156 |
+
# Try to get error details
|
| 157 |
+
error_detail = ""
|
| 158 |
+
try:
|
| 159 |
+
error_text = await response.text()
|
| 160 |
+
error_detail = f" - {error_text[:100]}"
|
| 161 |
+
except:
|
| 162 |
+
pass
|
| 163 |
+
|
| 164 |
return {
|
| 165 |
'status': 'error',
|
| 166 |
'response_time_ms': response_time,
|
| 167 |
'status_code': response.status_code,
|
| 168 |
+
'error': f"HTTP {response.status_code}{error_detail}",
|
| 169 |
'timestamp': datetime.now().isoformat()
|
| 170 |
}
|
| 171 |
|
| 172 |
+
except asyncio.TimeoutError:
|
| 173 |
+
return {
|
| 174 |
+
'status': 'error',
|
| 175 |
+
'error': 'Request timeout',
|
| 176 |
+
'response_time_ms': round((time.time() - start_time) * 1000, 1),
|
| 177 |
+
'timestamp': datetime.now().isoformat()
|
| 178 |
+
}
|
| 179 |
except Exception as e:
|
| 180 |
return {
|
| 181 |
'status': 'error',
|
|
|
|
| 188 |
"""Warm up models using health endpoints"""
|
| 189 |
try:
|
| 190 |
start_time = time.time()
|
| 191 |
+
async with httpx.AsyncClient(timeout=model_config.get('timeout', 15.0)) as client:
|
| 192 |
|
| 193 |
api_url = f"{model_config['url']}{model_config['endpoint']}"
|
| 194 |
response = await client.get(api_url)
|