Rajhuggingface4253 commited on
Commit
d649fab
Β·
verified Β·
1 Parent(s): d0b0d1f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -57
app.py CHANGED
@@ -23,7 +23,7 @@ regular_servers = [
23
  "https://rajhuggingface4253-compressor3pro.hf.space"
24
  ]
25
 
26
- # Models to warm with SIMPLE payloads - let's test what works
27
  models_to_warm = [
28
  {
29
  "name": "Qwen 1",
@@ -32,7 +32,9 @@ models_to_warm = [
32
  "type": "streaming_chat",
33
  "timeout": 30.0,
34
  "payload": {
35
- "prompt": "Hello, are you ready?"
 
 
36
  }
37
  },
38
  {
@@ -42,7 +44,9 @@ models_to_warm = [
42
  "type": "streaming_chat",
43
  "timeout": 30.0,
44
  "payload": {
45
- "prompt": "Say 'ready' if you're working."
 
 
46
  }
47
  },
48
  {
@@ -52,7 +56,9 @@ models_to_warm = [
52
  "type": "streaming_chat",
53
  "timeout": 30.0,
54
  "payload": {
55
- "prompt": "Ping! Respond with 'pong'."
 
 
56
  }
57
  },
58
  {
@@ -93,7 +99,7 @@ async def ping_server(url: str) -> Dict:
93
  }
94
 
95
  async def warmup_streaming_model(model_config: Dict) -> Dict:
96
- """Warm up streaming models by calling their chat endpoints"""
97
  try:
98
  start_time = time.time()
99
  async with httpx.AsyncClient(timeout=model_config.get('timeout', 30.0)) as client:
@@ -101,21 +107,13 @@ async def warmup_streaming_model(model_config: Dict) -> Dict:
101
  payload = model_config['payload']
102
  api_url = f"{model_config['url']}{model_config['endpoint']}"
103
 
104
- # Make the streaming request
105
  response = await client.post(api_url, json=payload)
106
 
107
  if response.status_code != 200:
108
- # Try to get error details for debugging
109
- error_detail = ""
110
- try:
111
- error_response = await response.text()
112
- error_detail = f" - {error_response[:100]}"
113
- except:
114
- pass
115
-
116
  return {
117
  'status': 'error',
118
- 'error': f"HTTP {response.status_code}{error_detail}",
119
  'status_code': response.status_code,
120
  'response_time_ms': round((time.time() - start_time) * 1000, 1),
121
  'got_response': False,
@@ -130,13 +128,13 @@ async def warmup_streaming_model(model_config: Dict) -> Dict:
130
 
131
  response_time = round((time.time() - start_time) * 1000, 1)
132
 
133
- # Check if we got a valid response
134
  if collected_response:
135
  return {
136
  'status': 'success',
137
  'response_time_ms': response_time,
138
  'status_code': response.status_code,
139
- 'model_response': collected_response[:200] + "..." if len(collected_response) > 200 else collected_response,
140
  'response_length': len(collected_response),
141
  'got_response': True,
142
  'timestamp': datetime.now().isoformat()
@@ -144,7 +142,7 @@ async def warmup_streaming_model(model_config: Dict) -> Dict:
144
  else:
145
  return {
146
  'status': 'error',
147
- 'error': 'Empty streaming response',
148
  'response_time_ms': response_time,
149
  'status_code': response.status_code,
150
  'got_response': False,
@@ -179,39 +177,18 @@ async def warmup_model_health(model_config: Dict) -> Dict:
179
  response_time = round((time.time() - start_time) * 1000, 1)
180
 
181
  if response.status_code == 200:
182
- try:
183
- health_data = response.json()
184
- model_status = health_data.get('status', 'unknown')
185
- model_name = health_data.get('model', 'Unknown')
186
-
187
- is_healthy = model_status in ['ok', 'healthy', 'ready']
188
-
189
- return {
190
- 'status': 'success' if is_healthy else 'error',
191
- 'response_time_ms': response_time,
192
- 'status_code': response.status_code,
193
- 'model_status': model_status,
194
- 'model_name': model_name,
195
- 'got_response': True,
196
- 'timestamp': datetime.now().isoformat()
197
- }
198
- except:
199
- return {
200
- 'status': 'success',
201
- 'response_time_ms': response_time,
202
- 'status_code': response.status_code,
203
- 'model_status': 'ok',
204
- 'model_name': 'Unknown',
205
- 'got_response': True,
206
- 'timestamp': datetime.now().isoformat()
207
- }
208
  else:
209
  return {
210
  'status': 'error',
211
  'response_time_ms': response_time,
212
  'status_code': response.status_code,
213
- 'model_status': 'unhealthy',
214
- 'model_name': 'Unknown',
215
  'got_response': False,
216
  'timestamp': datetime.now().isoformat()
217
  }
@@ -221,14 +198,12 @@ async def warmup_model_health(model_config: Dict) -> Dict:
221
  'status': 'error',
222
  'error': str(e),
223
  'response_time_ms': round((time.time() - start_time) * 1000, 1),
224
- 'model_status': 'connection_failed',
225
- 'model_name': 'Unknown',
226
  'got_response': False,
227
  'timestamp': datetime.now().isoformat()
228
  }
229
 
230
  async def warmup_single_model(model_config: Dict) -> Dict:
231
- """Route to appropriate warming method based on model type"""
232
  if model_config.get('type') == 'streaming_chat':
233
  return await warmup_streaming_model(model_config)
234
  else:
@@ -255,7 +230,7 @@ async def ping_all_servers():
255
  return results
256
 
257
  async def warmup_all_models():
258
- """Warm up all models using appropriate endpoints"""
259
  global model_warmup_results, last_model_warmup
260
 
261
  if not models_to_warm:
@@ -270,18 +245,15 @@ async def warmup_all_models():
270
  'health_check': results[i]
271
  }
272
 
273
- # Log results
274
  result = results[i]
275
  model_name = model['name']
276
 
277
  if result['status'] == 'success':
 
278
  if result.get('got_response'):
279
- response_info = f"{result['response_time_ms']}ms"
280
- if result.get('response_length'):
281
- response_info += f" ({result['response_length']} chars)"
282
- print(f"βœ… {model_name}: {response_info}")
283
  else:
284
- print(f"βœ… {model_name}: {result['response_time_ms']}ms")
285
  else:
286
  error_msg = result.get('error', 'Unknown error')
287
  print(f"❌ {model_name}: {error_msg}")
@@ -328,7 +300,7 @@ async def ping_single_health(client: httpx.AsyncClient, space_url: str, health_u
328
  print(f"πŸ”— Health ping failed for {space_url}: {e}")
329
 
330
  async def continuous_pinging():
331
- """Main pinging loop with model warming"""
332
  print("πŸš€ Smart Model Warmer Started!")
333
  print(f"🌐 Regular servers: {len(regular_servers)}")
334
  print(f"πŸ€– Models to warm: {len(models_to_warm)}")
 
23
  "https://rajhuggingface4253-compressor3pro.hf.space"
24
  ]
25
 
26
+ # Models to warm - SIMPLE and CLEAN
27
  models_to_warm = [
28
  {
29
  "name": "Qwen 1",
 
32
  "type": "streaming_chat",
33
  "timeout": 30.0,
34
  "payload": {
35
+ "prompt": "Hello, respond with OK if ready",
36
+ "max_new_tokens": 10,
37
+ "temperature": 0.1
38
  }
39
  },
40
  {
 
44
  "type": "streaming_chat",
45
  "timeout": 30.0,
46
  "payload": {
47
+ "prompt": "Hello, respond with OK if ready",
48
+ "max_new_tokens": 10,
49
+ "temperature": 0.1
50
  }
51
  },
52
  {
 
56
  "type": "streaming_chat",
57
  "timeout": 30.0,
58
  "payload": {
59
+ "prompt": "Hello, respond with OK if ready",
60
+ "max_new_tokens": 10,
61
+ "temperature": 0.1
62
  }
63
  },
64
  {
 
99
  }
100
 
101
  async def warmup_streaming_model(model_config: Dict) -> Dict:
102
+ """Warm up streaming models - SIMPLE and EFFECTIVE"""
103
  try:
104
  start_time = time.time()
105
  async with httpx.AsyncClient(timeout=model_config.get('timeout', 30.0)) as client:
 
107
  payload = model_config['payload']
108
  api_url = f"{model_config['url']}{model_config['endpoint']}"
109
 
110
+ # Simple POST request to chat endpoint
111
  response = await client.post(api_url, json=payload)
112
 
113
  if response.status_code != 200:
 
 
 
 
 
 
 
 
114
  return {
115
  'status': 'error',
116
+ 'error': f"HTTP {response.status_code}",
117
  'status_code': response.status_code,
118
  'response_time_ms': round((time.time() - start_time) * 1000, 1),
119
  'got_response': False,
 
128
 
129
  response_time = round((time.time() - start_time) * 1000, 1)
130
 
131
+ # Success if we got any response
132
  if collected_response:
133
  return {
134
  'status': 'success',
135
  'response_time_ms': response_time,
136
  'status_code': response.status_code,
137
+ 'model_response': collected_response[:100],
138
  'response_length': len(collected_response),
139
  'got_response': True,
140
  'timestamp': datetime.now().isoformat()
 
142
  else:
143
  return {
144
  'status': 'error',
145
+ 'error': 'No response received',
146
  'response_time_ms': response_time,
147
  'status_code': response.status_code,
148
  'got_response': False,
 
177
  response_time = round((time.time() - start_time) * 1000, 1)
178
 
179
  if response.status_code == 200:
180
+ return {
181
+ 'status': 'success',
182
+ 'response_time_ms': response_time,
183
+ 'status_code': response.status_code,
184
+ 'got_response': True,
185
+ 'timestamp': datetime.now().isoformat()
186
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  else:
188
  return {
189
  'status': 'error',
190
  'response_time_ms': response_time,
191
  'status_code': response.status_code,
 
 
192
  'got_response': False,
193
  'timestamp': datetime.now().isoformat()
194
  }
 
198
  'status': 'error',
199
  'error': str(e),
200
  'response_time_ms': round((time.time() - start_time) * 1000, 1),
 
 
201
  'got_response': False,
202
  'timestamp': datetime.now().isoformat()
203
  }
204
 
205
  async def warmup_single_model(model_config: Dict) -> Dict:
206
+ """Route to appropriate warming method"""
207
  if model_config.get('type') == 'streaming_chat':
208
  return await warmup_streaming_model(model_config)
209
  else:
 
230
  return results
231
 
232
  async def warmup_all_models():
233
+ """Warm up all models"""
234
  global model_warmup_results, last_model_warmup
235
 
236
  if not models_to_warm:
 
245
  'health_check': results[i]
246
  }
247
 
 
248
  result = results[i]
249
  model_name = model['name']
250
 
251
  if result['status'] == 'success':
252
+ response_time = result['response_time_ms']
253
  if result.get('got_response'):
254
+ print(f"βœ… {model_name}: {response_time}ms")
 
 
 
255
  else:
256
+ print(f"βœ… {model_name}: {response_time}ms")
257
  else:
258
  error_msg = result.get('error', 'Unknown error')
259
  print(f"❌ {model_name}: {error_msg}")
 
300
  print(f"πŸ”— Health ping failed for {space_url}: {e}")
301
 
302
  async def continuous_pinging():
303
+ """Main pinging loop"""
304
  print("πŸš€ Smart Model Warmer Started!")
305
  print(f"🌐 Regular servers: {len(regular_servers)}")
306
  print(f"πŸ€– Models to warm: {len(models_to_warm)}")