Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -23,17 +23,17 @@ regular_servers = [
|
|
| 23 |
"https://rajhuggingface4253-compressor3pro.hf.space"
|
| 24 |
]
|
| 25 |
|
| 26 |
-
#
|
| 27 |
models_to_warm = [
|
| 28 |
{
|
| 29 |
"name": "Qwen 1",
|
| 30 |
"url": "https://rajhuggingface4253-qwen.hf.space",
|
| 31 |
"endpoint": "/chat",
|
| 32 |
"type": "streaming_chat",
|
| 33 |
-
"timeout": 45.0,
|
| 34 |
"payload": {
|
| 35 |
-
"prompt": "
|
| 36 |
-
"max_new_tokens":
|
| 37 |
"temperature": 0.1,
|
| 38 |
"enable_code_execution": False,
|
| 39 |
"enable_web_search": False
|
|
@@ -46,8 +46,8 @@ models_to_warm = [
|
|
| 46 |
"type": "streaming_chat",
|
| 47 |
"timeout": 45.0,
|
| 48 |
"payload": {
|
| 49 |
-
"prompt": "
|
| 50 |
-
"max_new_tokens":
|
| 51 |
"temperature": 0.1,
|
| 52 |
"enable_code_execution": False,
|
| 53 |
"enable_web_search": False
|
|
@@ -60,8 +60,8 @@ models_to_warm = [
|
|
| 60 |
"type": "streaming_chat",
|
| 61 |
"timeout": 45.0,
|
| 62 |
"payload": {
|
| 63 |
-
"prompt": "
|
| 64 |
-
"max_new_tokens":
|
| 65 |
"temperature": 0.1,
|
| 66 |
"enable_code_execution": False,
|
| 67 |
"enable_web_search": False
|
|
@@ -105,7 +105,7 @@ async def ping_server(url: str) -> Dict:
|
|
| 105 |
}
|
| 106 |
|
| 107 |
async def warmup_chat_model(model_config: Dict) -> Dict:
|
| 108 |
-
"""Warm up chat models with
|
| 109 |
try:
|
| 110 |
start_time = time.time()
|
| 111 |
async with httpx.AsyncClient(timeout=model_config.get('timeout', 45.0)) as client:
|
|
@@ -153,11 +153,11 @@ async def warmup_chat_model(model_config: Dict) -> Dict:
|
|
| 153 |
'timestamp': datetime.now().isoformat()
|
| 154 |
}
|
| 155 |
else:
|
| 156 |
-
# Try to get error details
|
| 157 |
error_detail = ""
|
| 158 |
try:
|
| 159 |
error_text = await response.text()
|
| 160 |
-
error_detail = f" - {error_text[:
|
| 161 |
except:
|
| 162 |
pass
|
| 163 |
|
|
|
|
| 23 |
"https://rajhuggingface4253-compressor3pro.hf.space"
|
| 24 |
]
|
| 25 |
|
| 26 |
+
# REVERTED: Use the EXACT format that worked for Qwen 2
|
| 27 |
models_to_warm = [
|
| 28 |
{
|
| 29 |
"name": "Qwen 1",
|
| 30 |
"url": "https://rajhuggingface4253-qwen.hf.space",
|
| 31 |
"endpoint": "/chat",
|
| 32 |
"type": "streaming_chat",
|
| 33 |
+
"timeout": 45.0,
|
| 34 |
"payload": {
|
| 35 |
+
"prompt": "--- HISTORY START ---\nUser: Say 'ready' if you're working\n--- HISTORY END ---\n\nUser's latest message: \"Say 'ready' if you're working\"",
|
| 36 |
+
"max_new_tokens": 50,
|
| 37 |
"temperature": 0.1,
|
| 38 |
"enable_code_execution": False,
|
| 39 |
"enable_web_search": False
|
|
|
|
| 46 |
"type": "streaming_chat",
|
| 47 |
"timeout": 45.0,
|
| 48 |
"payload": {
|
| 49 |
+
"prompt": "--- HISTORY START ---\nUser: Say 'ready' if you're working\n--- HISTORY END ---\n\nUser's latest message: \"Say 'ready' if you're working\"",
|
| 50 |
+
"max_new_tokens": 50,
|
| 51 |
"temperature": 0.1,
|
| 52 |
"enable_code_execution": False,
|
| 53 |
"enable_web_search": False
|
|
|
|
| 60 |
"type": "streaming_chat",
|
| 61 |
"timeout": 45.0,
|
| 62 |
"payload": {
|
| 63 |
+
"prompt": "--- HISTORY START ---\nUser: Say 'ready' if you're working\n--- HISTORY END ---\n\nUser's latest message: \"Say 'ready' if you're working\"",
|
| 64 |
+
"max_new_tokens": 50,
|
| 65 |
"temperature": 0.1,
|
| 66 |
"enable_code_execution": False,
|
| 67 |
"enable_web_search": False
|
|
|
|
| 105 |
}
|
| 106 |
|
| 107 |
async def warmup_chat_model(model_config: Dict) -> Dict:
|
| 108 |
+
"""Warm up chat models with PROPER error handling"""
|
| 109 |
try:
|
| 110 |
start_time = time.time()
|
| 111 |
async with httpx.AsyncClient(timeout=model_config.get('timeout', 45.0)) as client:
|
|
|
|
| 153 |
'timestamp': datetime.now().isoformat()
|
| 154 |
}
|
| 155 |
else:
|
| 156 |
+
# Try to get error details for debugging
|
| 157 |
error_detail = ""
|
| 158 |
try:
|
| 159 |
error_text = await response.text()
|
| 160 |
+
error_detail = f" - {error_text[:200]}"
|
| 161 |
except:
|
| 162 |
pass
|
| 163 |
|