Rajhuggingface4253 commited on
Commit
241a7f9
·
verified ·
1 Parent(s): e51d0e4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -11
app.py CHANGED
@@ -23,17 +23,17 @@ regular_servers = [
23
  "https://rajhuggingface4253-compressor3pro.hf.space"
24
  ]
25
 
26
- # OPTIMIZED: Simpler prompts for faster responses
27
  models_to_warm = [
28
  {
29
  "name": "Qwen 1",
30
  "url": "https://rajhuggingface4253-qwen.hf.space",
31
  "endpoint": "/chat",
32
  "type": "streaming_chat",
33
- "timeout": 45.0, # Increased timeout
34
  "payload": {
35
- "prompt": "Hello, respond with OK",
36
- "max_new_tokens": 10, # Shorter response
37
  "temperature": 0.1,
38
  "enable_code_execution": False,
39
  "enable_web_search": False
@@ -46,8 +46,8 @@ models_to_warm = [
46
  "type": "streaming_chat",
47
  "timeout": 45.0,
48
  "payload": {
49
- "prompt": "Hello, respond with OK",
50
- "max_new_tokens": 10,
51
  "temperature": 0.1,
52
  "enable_code_execution": False,
53
  "enable_web_search": False
@@ -60,8 +60,8 @@ models_to_warm = [
60
  "type": "streaming_chat",
61
  "timeout": 45.0,
62
  "payload": {
63
- "prompt": "Hello, respond with OK",
64
- "max_new_tokens": 10,
65
  "temperature": 0.1,
66
  "enable_code_execution": False,
67
  "enable_web_search": False
@@ -105,7 +105,7 @@ async def ping_server(url: str) -> Dict:
105
  }
106
 
107
  async def warmup_chat_model(model_config: Dict) -> Dict:
108
- """Warm up chat models with optimized approach"""
109
  try:
110
  start_time = time.time()
111
  async with httpx.AsyncClient(timeout=model_config.get('timeout', 45.0)) as client:
@@ -153,11 +153,11 @@ async def warmup_chat_model(model_config: Dict) -> Dict:
153
  'timestamp': datetime.now().isoformat()
154
  }
155
  else:
156
- # Try to get error details
157
  error_detail = ""
158
  try:
159
  error_text = await response.text()
160
- error_detail = f" - {error_text[:100]}"
161
  except:
162
  pass
163
 
 
23
  "https://rajhuggingface4253-compressor3pro.hf.space"
24
  ]
25
 
26
+ # REVERTED: Use the EXACT format that worked for Qwen 2
27
  models_to_warm = [
28
  {
29
  "name": "Qwen 1",
30
  "url": "https://rajhuggingface4253-qwen.hf.space",
31
  "endpoint": "/chat",
32
  "type": "streaming_chat",
33
+ "timeout": 45.0,
34
  "payload": {
35
+ "prompt": "--- HISTORY START ---\nUser: Say 'ready' if you're working\n--- HISTORY END ---\n\nUser's latest message: \"Say 'ready' if you're working\"",
36
+ "max_new_tokens": 50,
37
  "temperature": 0.1,
38
  "enable_code_execution": False,
39
  "enable_web_search": False
 
46
  "type": "streaming_chat",
47
  "timeout": 45.0,
48
  "payload": {
49
+ "prompt": "--- HISTORY START ---\nUser: Say 'ready' if you're working\n--- HISTORY END ---\n\nUser's latest message: \"Say 'ready' if you're working\"",
50
+ "max_new_tokens": 50,
51
  "temperature": 0.1,
52
  "enable_code_execution": False,
53
  "enable_web_search": False
 
60
  "type": "streaming_chat",
61
  "timeout": 45.0,
62
  "payload": {
63
+ "prompt": "--- HISTORY START ---\nUser: Say 'ready' if you're working\n--- HISTORY END ---\n\nUser's latest message: \"Say 'ready' if you're working\"",
64
+ "max_new_tokens": 50,
65
  "temperature": 0.1,
66
  "enable_code_execution": False,
67
  "enable_web_search": False
 
105
  }
106
 
107
  async def warmup_chat_model(model_config: Dict) -> Dict:
108
+ """Warm up chat models with PROPER error handling"""
109
  try:
110
  start_time = time.time()
111
  async with httpx.AsyncClient(timeout=model_config.get('timeout', 45.0)) as client:
 
153
  'timestamp': datetime.now().isoformat()
154
  }
155
  else:
156
+ # Try to get error details for debugging
157
  error_detail = ""
158
  try:
159
  error_text = await response.text()
160
+ error_detail = f" - {error_text[:200]}"
161
  except:
162
  pass
163