Rajhuggingface4253 commited on
Commit
8dbef7a
Β·
verified Β·
1 Parent(s): f5cc238

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +154 -26
app.py CHANGED
@@ -27,7 +27,7 @@ regular_servers = [
27
  chat_models = [
28
  {
29
  "url": "https://rajhuggingface4253-qwen.hf.space",
30
- "api_endpoint": "/chat", # Adjust based on your API
31
  "warmup_message": "Say 'active' in one word?",
32
  "type": "qwen"
33
  },
@@ -80,20 +80,129 @@ async def ping_server(url: str) -> Dict:
80
  }
81
 
82
  async def warmup_chat_model(model_config: Dict) -> Dict:
83
- """Send a warmup message to a chat model"""
84
  try:
85
  start_time = time.time()
86
- async with httpx.AsyncClient(timeout=30.0) as client:
 
 
 
 
 
 
 
 
 
 
 
87
  if model_config["type"].startswith("qwen"):
88
  payload = {
89
  "message": model_config["warmup_message"],
90
- "max_tokens": 10
 
91
  }
92
- else:
93
- payload = {
94
- "messages": [{"role": "user", "content": model_config["warmup_message"]}],
95
- "max_tokens": 10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
  api_url = f"{model_config['url']}{model_config['api_endpoint']}"
99
  response = await client.post(
@@ -104,11 +213,15 @@ async def warmup_chat_model(model_config: Dict) -> Dict:
104
 
105
  response_time = round((time.time() - start_time) * 1000, 1)
106
 
 
 
 
107
  return {
108
- 'status': 'success',
109
  'response_time_ms': response_time,
110
  'status_code': response.status_code,
111
- 'response_preview': str(response.text)[:100],
 
112
  'timestamp': datetime.now().isoformat()
113
  }
114
 
@@ -119,6 +232,13 @@ async def warmup_chat_model(model_config: Dict) -> Dict:
119
  'timestamp': datetime.now().isoformat()
120
  }
121
 
 
 
 
 
 
 
 
122
  async def ping_all_servers():
123
  """Ping all regular servers"""
124
  global ping_results, last_ping_run
@@ -140,22 +260,29 @@ async def ping_all_servers():
140
  return results
141
 
142
  async def warmup_all_chat_models():
143
- """Warm up all chat models with actual messages"""
144
  global chat_warmup_results, last_chat_warmup
145
 
146
  if not chat_models:
147
  return []
148
 
149
- tasks = [warmup_chat_model(model) for model in chat_models]
150
  results = await asyncio.gather(*tasks)
151
 
152
  for i, model in enumerate(chat_models):
153
  chat_warmup_results[model['url']] = results[i]
 
 
 
 
 
 
154
 
155
  last_chat_warmup = datetime.now()
156
 
157
- success_count = sum(1 for result in results if result['status'] == 'success')
158
- print(f"πŸ€– {datetime.now().strftime('%H:%M:%S')} - Chat models: {success_count}/{len(chat_models)} Warmed up")
 
159
 
160
  return results
161
 
@@ -193,11 +320,11 @@ async def ping_single_health(client: httpx.AsyncClient, space_url: str, health_u
193
 
194
  async def continuous_pinging():
195
  """Main pinging loop with chat model warming"""
196
- print("πŸš€ Chat Model Warmer Started!")
197
  print(f"🌐 Regular servers: {len(regular_servers)}")
198
  print(f"πŸ€– Chat models: {len(chat_models)}")
199
  print(f"πŸ”— Pinger network: {len(pinger_spaces)}")
200
- print("⏰ Chat warmup every 5 minutes")
201
 
202
  last_health_check = 0
203
 
@@ -207,11 +334,11 @@ async def continuous_pinging():
207
  if regular_servers:
208
  await ping_all_servers()
209
 
210
- # Warm up chat models (most important!)
211
  if chat_models:
212
  await warmup_all_chat_models()
213
 
214
- # Ping health endpoints every 30 minutes (FIXED: use the defined constant)
215
  current_time = time.time()
216
  if current_time - last_health_check >= HEALTH_CHECK_INTERVAL and pinger_spaces:
217
  await ping_health_endpoints()
@@ -220,34 +347,35 @@ async def continuous_pinging():
220
  await asyncio.sleep(PING_INTERVAL)
221
 
222
  except Exception as e:
223
- print(f"❌ Error: {e}")
224
  await asyncio.sleep(60)
225
 
226
  @asynccontextmanager
227
  async def lifespan(app: FastAPI):
228
  # Startup
229
- print("Starting up Chat Model Warmer...")
230
  asyncio.create_task(continuous_pinging())
231
  yield
232
  # Shutdown
233
  print("Shutting down...")
234
 
235
- app = FastAPI(title="Chat Model Warmer", lifespan=lifespan)
236
 
237
  @app.get("/", response_class=HTMLResponse)
238
  async def home():
239
  """Minimal dashboard"""
240
  regular_success = sum(1 for r in ping_results.values() if r.get('status') == 'success')
241
- chat_success = sum(1 for r in chat_warmup_results.values() if r.get('status') == 'success')
 
242
  health_success = sum(1 for r in health_results.values() if r.get('status') == 'success')
243
 
244
  html_content = f"""
245
  <html>
246
- <head><title>Chat Model Warmer</title></head>
247
  <body>
248
- <h1>πŸ€– Chat Model Warmer</h1>
249
  <p><strong>Regular Servers:</strong> {regular_success}/{len(regular_servers)} OK</p>
250
- <p><strong>Chat Models:</strong> {chat_success}/{len(chat_models)} Warmed up</p>
251
  <p><strong>Last Chat Warmup:</strong> {last_chat_warmup.strftime('%H:%M:%S') if last_chat_warmup else 'Never'}</p>
252
  <p><strong>Network:</strong> {health_success}/{len(pinger_spaces)} OK</p>
253
  </body>
@@ -259,7 +387,7 @@ async def home():
259
  async def health():
260
  return JSONResponse({
261
  "status": "healthy",
262
- "service": "chat-model-warmer",
263
  "regular_servers": len(regular_servers),
264
  "chat_models": len(chat_models),
265
  "last_chat_warmup": last_chat_warmup.isoformat() if last_chat_warmup else None
 
27
  chat_models = [
28
  {
29
  "url": "https://rajhuggingface4253-qwen.hf.space",
30
+ "api_endpoint": "/chat",
31
  "warmup_message": "Say 'active' in one word?",
32
  "type": "qwen"
33
  },
 
80
  }
81
 
82
  async def warmup_chat_model(model_config: Dict) -> Dict:
83
+ """Send a warmup message to a chat model with streaming support"""
84
  try:
85
  start_time = time.time()
86
+
87
+ # Try both streaming and non-streaming approaches
88
+ async with httpx.AsyncClient(timeout=25.0) as client:
89
+
90
+ # First try: Non-streaming request (preferred for warming)
91
+ payload = {
92
+ "messages": [{"role": "user", "content": model_config["warmup_message"]}],
93
+ "max_tokens": 15,
94
+ "stream": False # Explicitly disable streaming for warming
95
+ }
96
+
97
+ # Adjust payload for specific model types
98
  if model_config["type"].startswith("qwen"):
99
  payload = {
100
  "message": model_config["warmup_message"],
101
+ "max_tokens": 15,
102
+ "stream": False
103
  }
104
+
105
+ api_url = f"{model_config['url']}{model_config['api_endpoint']}"
106
+
107
+ try:
108
+ response = await client.post(
109
+ api_url,
110
+ json=payload,
111
+ headers={"Content-Type": "application/json"}
112
+ )
113
+
114
+ response_time = round((time.time() - start_time) * 1000, 1)
115
+ response_text = await response.atext() if response.status_code == 200 else ""
116
+
117
+ # Check if we actually got a meaningful response
118
+ got_valid_response = len(response_text.strip()) > 0 and response.status_code == 200
119
+
120
+ result = {
121
+ 'status': 'success' if got_valid_response else 'error',
122
+ 'response_time_ms': response_time,
123
+ 'status_code': response.status_code,
124
+ 'response_preview': response_text[:100] if got_valid_response else "No response",
125
+ 'got_response': got_valid_response,
126
+ 'method': 'non-streaming',
127
+ 'timestamp': datetime.now().isoformat()
128
  }
129
+
130
+ if got_valid_response:
131
+ return result
132
+
133
+ except (httpx.ReadTimeout, asyncio.TimeoutError):
134
+ # Non-streaming failed, try streaming approach
135
+ pass
136
+
137
+ # Second try: Streaming approach (if non-streaming fails)
138
+ print(f"πŸ”„ Trying streaming approach for {model_config['url']}")
139
+ try:
140
+ streaming_start = time.time()
141
+ streaming_payload = payload.copy()
142
+ streaming_payload["stream"] = True # Enable streaming
143
+
144
+ async with client.stream(
145
+ 'POST',
146
+ api_url,
147
+ json=streaming_payload,
148
+ timeout=20.0
149
+ ) as response:
150
+
151
+ first_chunk_received = False
152
+ full_response = ""
153
+
154
+ async for line in response.aiter_lines():
155
+ if line.strip() and 'data:' in line:
156
+ first_chunk_received = True
157
+ # Try to extract actual content from streaming format
158
+ if '"content":"' in line:
159
+ try:
160
+ content_start = line.find('"content":"') + 10
161
+ content_end = line.find('"', content_start)
162
+ if content_end > content_start:
163
+ content = line[content_start:content_end]
164
+ full_response += content
165
+ except:
166
+ pass
167
+ break # Got at least one chunk
168
+
169
+ streaming_time = round((time.time() - streaming_start) * 1000, 1)
170
+
171
+ return {
172
+ 'status': 'success' if first_chunk_received else 'error',
173
+ 'response_time_ms': streaming_time,
174
+ 'status_code': response.status_code,
175
+ 'response_preview': full_response[:100] if full_response else "Streaming chunk received",
176
+ 'got_response': first_chunk_received,
177
+ 'method': 'streaming',
178
+ 'timestamp': datetime.now().isoformat()
179
+ }
180
+
181
+ except Exception as stream_error:
182
+ return {
183
+ 'status': 'error',
184
+ 'error': f"Both methods failed: {str(stream_error)}",
185
+ 'response_time_ms': round((time.time() - start_time) * 1000, 1),
186
+ 'timestamp': datetime.now().isoformat()
187
+ }
188
+
189
+ except Exception as e:
190
+ return {
191
+ 'status': 'error',
192
+ 'error': str(e),
193
+ 'timestamp': datetime.now().isoformat()
194
+ }
195
+
196
+ async def warmup_tts_model(model_config: Dict) -> Dict:
197
+ """Warm up TTS model with proper response handling"""
198
+ try:
199
+ start_time = time.time()
200
+ async with httpx.AsyncClient(timeout=15.0) as client:
201
+
202
+ payload = {
203
+ "text": model_config["warmup_message"],
204
+ "voice": "default"
205
+ }
206
 
207
  api_url = f"{model_config['url']}{model_config['api_endpoint']}"
208
  response = await client.post(
 
213
 
214
  response_time = round((time.time() - start_time) * 1000, 1)
215
 
216
+ # For TTS, success means we got any response (audio data or confirmation)
217
+ success = response.status_code == 200 and len(response.content) > 0
218
+
219
  return {
220
+ 'status': 'success' if success else 'error',
221
  'response_time_ms': response_time,
222
  'status_code': response.status_code,
223
+ 'response_preview': f"TTS response: {len(response.content)} bytes" if success else "No audio data",
224
+ 'got_response': success,
225
  'timestamp': datetime.now().isoformat()
226
  }
227
 
 
232
  'timestamp': datetime.now().isoformat()
233
  }
234
 
235
+ async def warmup_single_model(model_config: Dict) -> Dict:
236
+ """Route to appropriate warming function based on model type"""
237
+ if model_config["type"] == "tts":
238
+ return await warmup_tts_model(model_config)
239
+ else:
240
+ return await warmup_chat_model(model_config)
241
+
242
  async def ping_all_servers():
243
  """Ping all regular servers"""
244
  global ping_results, last_ping_run
 
260
  return results
261
 
262
  async def warmup_all_chat_models():
263
+ """Warm up all chat models with actual messages and response verification"""
264
  global chat_warmup_results, last_chat_warmup
265
 
266
  if not chat_models:
267
  return []
268
 
269
+ tasks = [warmup_single_model(model) for model in chat_models]
270
  results = await asyncio.gather(*tasks)
271
 
272
  for i, model in enumerate(chat_models):
273
  chat_warmup_results[model['url']] = results[i]
274
+
275
+ # Log detailed results for debugging
276
+ if results[i]['status'] == 'success' and results[i].get('got_response'):
277
+ print(f"βœ… {model['url']}: {results[i]['response_time_ms']}ms - {results[i]['response_preview']}")
278
+ else:
279
+ print(f"❌ {model['url']}: {results[i].get('error', 'No response')}")
280
 
281
  last_chat_warmup = datetime.now()
282
 
283
+ # Only count as success if we actually got a response
284
+ success_count = sum(1 for result in results if result.get('got_response', False))
285
+ print(f"πŸ€– {datetime.now().strftime('%H:%M:%S')} - Chat models: {success_count}/{len(chat_models)} Actually responded")
286
 
287
  return results
288
 
 
320
 
321
  async def continuous_pinging():
322
  """Main pinging loop with chat model warming"""
323
+ print("πŸš€ Enhanced Chat Model Warmer Started!")
324
  print(f"🌐 Regular servers: {len(regular_servers)}")
325
  print(f"πŸ€– Chat models: {len(chat_models)}")
326
  print(f"πŸ”— Pinger network: {len(pinger_spaces)}")
327
+ print("⏰ Warming with response verification every 5 minutes")
328
 
329
  last_health_check = 0
330
 
 
334
  if regular_servers:
335
  await ping_all_servers()
336
 
337
+ # Warm up chat models with response verification
338
  if chat_models:
339
  await warmup_all_chat_models()
340
 
341
+ # Ping health endpoints every 30 minutes
342
  current_time = time.time()
343
  if current_time - last_health_check >= HEALTH_CHECK_INTERVAL and pinger_spaces:
344
  await ping_health_endpoints()
 
347
  await asyncio.sleep(PING_INTERVAL)
348
 
349
  except Exception as e:
350
+ print(f"❌ Error in main loop: {e}")
351
  await asyncio.sleep(60)
352
 
353
  @asynccontextmanager
354
  async def lifespan(app: FastAPI):
355
  # Startup
356
+ print("Starting up Enhanced Chat Model Warmer...")
357
  asyncio.create_task(continuous_pinging())
358
  yield
359
  # Shutdown
360
  print("Shutting down...")
361
 
362
+ app = FastAPI(title="Enhanced Chat Model Warmer", lifespan=lifespan)
363
 
364
  @app.get("/", response_class=HTMLResponse)
365
  async def home():
366
  """Minimal dashboard"""
367
  regular_success = sum(1 for r in ping_results.values() if r.get('status') == 'success')
368
+ # Only count chat models that actually responded
369
+ chat_success = sum(1 for r in chat_warmup_results.values() if r.get('got_response', False))
370
  health_success = sum(1 for r in health_results.values() if r.get('status') == 'success')
371
 
372
  html_content = f"""
373
  <html>
374
+ <head><title>Enhanced Chat Model Warmer</title></head>
375
  <body>
376
+ <h1>πŸ€– Enhanced Chat Model Warmer</h1>
377
  <p><strong>Regular Servers:</strong> {regular_success}/{len(regular_servers)} OK</p>
378
+ <p><strong>Chat Models (Responded):</strong> {chat_success}/{len(chat_models)} Actually Warm</p>
379
  <p><strong>Last Chat Warmup:</strong> {last_chat_warmup.strftime('%H:%M:%S') if last_chat_warmup else 'Never'}</p>
380
  <p><strong>Network:</strong> {health_success}/{len(pinger_spaces)} OK</p>
381
  </body>
 
387
  async def health():
388
  return JSONResponse({
389
  "status": "healthy",
390
+ "service": "enhanced-chat-model-warmer",
391
  "regular_servers": len(regular_servers),
392
  "chat_models": len(chat_models),
393
  "last_chat_warmup": last_chat_warmup.isoformat() if last_chat_warmup else None