Rajhuggingface4253 commited on
Commit
2d87d50
·
verified ·
1 Parent(s): d649fab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -31
app.py CHANGED
@@ -23,42 +23,36 @@ regular_servers = [
23
  "https://rajhuggingface4253-compressor3pro.hf.space"
24
  ]
25
 
26
- # Models to warm - SIMPLE and CLEAN
27
  models_to_warm = [
28
  {
29
  "name": "Qwen 1",
30
  "url": "https://rajhuggingface4253-qwen.hf.space",
31
  "endpoint": "/chat",
32
- "type": "streaming_chat",
33
  "timeout": 30.0,
34
  "payload": {
35
- "prompt": "Hello, respond with OK if ready",
36
- "max_new_tokens": 10,
37
- "temperature": 0.1
38
  }
39
  },
40
  {
41
  "name": "Qwen 2",
42
  "url": "https://rajhuggingface4253-qwen2.hf.space",
43
  "endpoint": "/chat",
44
- "type": "streaming_chat",
45
  "timeout": 30.0,
46
  "payload": {
47
- "prompt": "Hello, respond with OK if ready",
48
- "max_new_tokens": 10,
49
- "temperature": 0.1
50
  }
51
  },
52
  {
53
  "name": "Qwen 3",
54
  "url": "https://rajhuggingface4253-qwen3.hf.space",
55
  "endpoint": "/chat",
56
- "type": "streaming_chat",
57
  "timeout": 30.0,
58
  "payload": {
59
- "prompt": "Hello, respond with OK if ready",
60
- "max_new_tokens": 10,
61
- "temperature": 0.1
62
  }
63
  },
64
  {
@@ -98,8 +92,8 @@ async def ping_server(url: str) -> Dict:
98
  'timestamp': datetime.now().isoformat()
99
  }
100
 
101
- async def warmup_streaming_model(model_config: Dict) -> Dict:
102
- """Warm up streaming models - SIMPLE and EFFECTIVE"""
103
  try:
104
  start_time = time.time()
105
  async with httpx.AsyncClient(timeout=model_config.get('timeout', 30.0)) as client:
@@ -107,7 +101,7 @@ async def warmup_streaming_model(model_config: Dict) -> Dict:
107
  payload = model_config['payload']
108
  api_url = f"{model_config['url']}{model_config['endpoint']}"
109
 
110
- # Simple POST request to chat endpoint
111
  response = await client.post(api_url, json=payload)
112
 
113
  if response.status_code != 200:
@@ -120,32 +114,45 @@ async def warmup_streaming_model(model_config: Dict) -> Dict:
120
  'timestamp': datetime.now().isoformat()
121
  }
122
 
123
- # Read streaming response
124
  collected_response = ""
125
- async for chunk in response.aiter_text():
126
- if chunk.strip():
127
- collected_response += chunk
 
 
 
 
 
 
 
128
 
129
  response_time = round((time.time() - start_time) * 1000, 1)
130
 
131
- # Success if we got any response
132
  if collected_response:
 
 
 
 
133
  return {
134
- 'status': 'success',
135
  'response_time_ms': response_time,
136
  'status_code': response.status_code,
137
- 'model_response': collected_response[:100],
138
  'response_length': len(collected_response),
139
  'got_response': True,
 
140
  'timestamp': datetime.now().isoformat()
141
  }
142
  else:
143
  return {
144
  'status': 'error',
145
- 'error': 'No response received',
146
  'response_time_ms': response_time,
147
  'status_code': response.status_code,
148
  'got_response': False,
 
149
  'timestamp': datetime.now().isoformat()
150
  }
151
 
@@ -155,6 +162,7 @@ async def warmup_streaming_model(model_config: Dict) -> Dict:
155
  'error': 'Request timeout',
156
  'response_time_ms': round((time.time() - start_time) * 1000, 1),
157
  'got_response': False,
 
158
  'timestamp': datetime.now().isoformat()
159
  }
160
  except Exception as e:
@@ -163,6 +171,7 @@ async def warmup_streaming_model(model_config: Dict) -> Dict:
163
  'error': str(e),
164
  'response_time_ms': round((time.time() - start_time) * 1000, 1),
165
  'got_response': False,
 
166
  'timestamp': datetime.now().isoformat()
167
  }
168
 
@@ -204,8 +213,8 @@ async def warmup_model_health(model_config: Dict) -> Dict:
204
 
205
  async def warmup_single_model(model_config: Dict) -> Dict:
206
  """Route to appropriate warming method"""
207
- if model_config.get('type') == 'streaming_chat':
208
- return await warmup_streaming_model(model_config)
209
  else:
210
  return await warmup_model_health(model_config)
211
 
@@ -251,7 +260,9 @@ async def warmup_all_models():
251
  if result['status'] == 'success':
252
  response_time = result['response_time_ms']
253
  if result.get('got_response'):
254
- print(f" {model_name}: {response_time}ms")
 
 
255
  else:
256
  print(f"✅ {model_name}: {response_time}ms")
257
  else:
@@ -261,7 +272,8 @@ async def warmup_all_models():
261
  last_model_warmup = datetime.now()
262
 
263
  success_count = sum(1 for result in results if result['status'] == 'success')
264
- print(f"🤖 {datetime.now().strftime('%H:%M:%S')} - Models: {success_count}/{len(models_to_warm)} Healthy")
 
265
 
266
  return results
267
 
@@ -346,6 +358,7 @@ async def home():
346
  regular_success = sum(1 for r in ping_results.values() if r.get('status') == 'success')
347
  model_success = sum(1 for r in model_warmup_results.values() if r['health_check'].get('status') == 'success')
348
  health_success = sum(1 for r in health_results.values() if r.get('status') == 'success')
 
349
 
350
  # Get model details for display
351
  model_statuses = []
@@ -354,18 +367,22 @@ async def home():
354
  health = data['health_check']
355
 
356
  status_display = "success" if health['status'] == 'success' else "error"
 
357
  error_display = f" - {health['error']}" if health.get('error') else ""
 
358
 
359
  model_statuses.append({
360
  'name': model_info['name'],
361
  'type': model_info.get('type', 'health_check'),
362
  'status_display': status_display,
363
  'response_time': health.get('response_time_ms', 0),
364
- 'error_display': error_display
 
 
365
  })
366
 
367
  model_status_html = "".join([
368
- f"<li>{m['name']} ({m['type']}): <span class='{m['status_display']}'>{m['status_display']}</span> ({m['response_time']}ms){m['error_display']}</li>"
369
  for m in model_statuses
370
  ])
371
 
@@ -377,7 +394,7 @@ async def home():
377
  body {{ font-family: Arial, sans-serif; margin: 40px; }}
378
  .success {{ color: green; font-weight: bold; }}
379
  .error {{ color: red; font-weight: bold; }}
380
- .container {{ max-width: 1000px; margin: 0 auto; }}
381
  .model-list {{ background: #f5f5f5; padding: 15px; border-radius: 5px; }}
382
  </style>
383
  </head>
@@ -393,6 +410,7 @@ async def home():
393
  <div style="background: #e3f2fd; padding: 15px; border-radius: 8px;">
394
  <h3>🤖 AI Models</h3>
395
  <p><strong>{model_success}/{len(models_to_warm)} Healthy</strong></p>
 
396
  </div>
397
  <div style="background: #fff3e0; padding: 15px; border-radius: 8px;">
398
  <h3>🔗 Pinger Network</h3>
@@ -442,10 +460,12 @@ async def ping_now():
442
  """Manually trigger immediate warming"""
443
  results = await warmup_all_models()
444
  success_count = sum(1 for result in results if result['status'] == 'success')
 
445
 
446
  return JSONResponse({
447
  "message": "Manual warming completed",
448
  "models_healthy": f"{success_count}/{len(models_to_warm)}",
 
449
  "timestamp": datetime.now().isoformat()
450
  })
451
 
 
23
  "https://rajhuggingface4253-compressor3pro.hf.space"
24
  ]
25
 
26
+ # Models to warm - SIMPLE prompts like frontend
27
  models_to_warm = [
28
  {
29
  "name": "Qwen 1",
30
  "url": "https://rajhuggingface4253-qwen.hf.space",
31
  "endpoint": "/chat",
32
+ "type": "sse_streaming",
33
  "timeout": 30.0,
34
  "payload": {
35
+ "prompt": "Hello, respond with OK if ready"
 
 
36
  }
37
  },
38
  {
39
  "name": "Qwen 2",
40
  "url": "https://rajhuggingface4253-qwen2.hf.space",
41
  "endpoint": "/chat",
42
+ "type": "sse_streaming",
43
  "timeout": 30.0,
44
  "payload": {
45
+ "prompt": "Hello, respond with OK if ready"
 
 
46
  }
47
  },
48
  {
49
  "name": "Qwen 3",
50
  "url": "https://rajhuggingface4253-qwen3.hf.space",
51
  "endpoint": "/chat",
52
+ "type": "sse_streaming",
53
  "timeout": 30.0,
54
  "payload": {
55
+ "prompt": "Hello, respond with OK if ready"
 
 
56
  }
57
  },
58
  {
 
92
  'timestamp': datetime.now().isoformat()
93
  }
94
 
95
+ async def warmup_sse_streaming_model(model_config: Dict) -> Dict:
96
+ """Warm up models using PROPER SSE streaming (same as frontend)"""
97
  try:
98
  start_time = time.time()
99
  async with httpx.AsyncClient(timeout=model_config.get('timeout', 30.0)) as client:
 
101
  payload = model_config['payload']
102
  api_url = f"{model_config['url']}{model_config['endpoint']}"
103
 
104
+ # Make the SSE streaming request (same as frontend)
105
  response = await client.post(api_url, json=payload)
106
 
107
  if response.status_code != 200:
 
114
  'timestamp': datetime.now().isoformat()
115
  }
116
 
117
+ # PROPER SSE STREAMING READING (same as frontend logic)
118
  collected_response = ""
119
+
120
+ # Read the stream exactly like frontend does
121
+ async for chunk in response.aiter_bytes():
122
+ if chunk:
123
+ try:
124
+ text_chunk = chunk.decode('utf-8')
125
+ collected_response += text_chunk
126
+ except UnicodeDecodeError:
127
+ # Skip binary chunks, focus on text
128
+ continue
129
 
130
  response_time = round((time.time() - start_time) * 1000, 1)
131
 
132
+ # Check if we got a valid AI response
133
  if collected_response:
134
+ # Simple validation - check if we got actual text content
135
+ is_ai_response = any(keyword in collected_response.lower() for keyword in
136
+ ['hello', 'hi', 'ok', 'ready', 'yes', 'sure', 'pong'])
137
+
138
  return {
139
+ 'status': 'success' if is_ai_response else 'error',
140
  'response_time_ms': response_time,
141
  'status_code': response.status_code,
142
+ 'model_response': collected_response[:150],
143
  'response_length': len(collected_response),
144
  'got_response': True,
145
+ 'is_ai_response': is_ai_response,
146
  'timestamp': datetime.now().isoformat()
147
  }
148
  else:
149
  return {
150
  'status': 'error',
151
+ 'error': 'Empty streaming response',
152
  'response_time_ms': response_time,
153
  'status_code': response.status_code,
154
  'got_response': False,
155
+ 'is_ai_response': False,
156
  'timestamp': datetime.now().isoformat()
157
  }
158
 
 
162
  'error': 'Request timeout',
163
  'response_time_ms': round((time.time() - start_time) * 1000, 1),
164
  'got_response': False,
165
+ 'is_ai_response': False,
166
  'timestamp': datetime.now().isoformat()
167
  }
168
  except Exception as e:
 
171
  'error': str(e),
172
  'response_time_ms': round((time.time() - start_time) * 1000, 1),
173
  'got_response': False,
174
+ 'is_ai_response': False,
175
  'timestamp': datetime.now().isoformat()
176
  }
177
 
 
213
 
214
  async def warmup_single_model(model_config: Dict) -> Dict:
215
  """Route to appropriate warming method"""
216
+ if model_config.get('type') == 'sse_streaming':
217
+ return await warmup_sse_streaming_model(model_config)
218
  else:
219
  return await warmup_model_health(model_config)
220
 
 
260
  if result['status'] == 'success':
261
  response_time = result['response_time_ms']
262
  if result.get('got_response'):
263
+ ai_indicator = "✓AI" if result.get('is_ai_response') else "✗AI"
264
+ preview = result.get('model_response', '')[:50].replace('\n', ' ')
265
+ print(f"✅ {model_name}: {response_time}ms {ai_indicator} - '{preview}...'")
266
  else:
267
  print(f"✅ {model_name}: {response_time}ms")
268
  else:
 
272
  last_model_warmup = datetime.now()
273
 
274
  success_count = sum(1 for result in results if result['status'] == 'success')
275
+ ai_success_count = sum(1 for result in results if result.get('is_ai_response'))
276
+ print(f"🤖 {datetime.now().strftime('%H:%M:%S')} - Models: {success_count}/{len(models_to_warm)} Healthy ({ai_success_count} AI responding)")
277
 
278
  return results
279
 
 
358
  regular_success = sum(1 for r in ping_results.values() if r.get('status') == 'success')
359
  model_success = sum(1 for r in model_warmup_results.values() if r['health_check'].get('status') == 'success')
360
  health_success = sum(1 for r in health_results.values() if r.get('status') == 'success')
361
+ ai_success = sum(1 for r in model_warmup_results.values() if r['health_check'].get('is_ai_response'))
362
 
363
  # Get model details for display
364
  model_statuses = []
 
367
  health = data['health_check']
368
 
369
  status_display = "success" if health['status'] == 'success' else "error"
370
+ ai_indicator = " ✓AI" if health.get('is_ai_response') else ""
371
  error_display = f" - {health['error']}" if health.get('error') else ""
372
+ preview = f" - '{health.get('model_response', '')[:50]}...'" if health.get('model_response') else ""
373
 
374
  model_statuses.append({
375
  'name': model_info['name'],
376
  'type': model_info.get('type', 'health_check'),
377
  'status_display': status_display,
378
  'response_time': health.get('response_time_ms', 0),
379
+ 'ai_indicator': ai_indicator,
380
+ 'error_display': error_display,
381
+ 'preview': preview
382
  })
383
 
384
  model_status_html = "".join([
385
+ f"<li>{m['name']} ({m['type']}): <span class='{m['status_display']}'>{m['status_display']}</span> ({m['response_time']}ms){m['ai_indicator']}{m['error_display']}{m['preview']}</li>"
386
  for m in model_statuses
387
  ])
388
 
 
394
  body {{ font-family: Arial, sans-serif; margin: 40px; }}
395
  .success {{ color: green; font-weight: bold; }}
396
  .error {{ color: red; font-weight: bold; }}
397
+ .container {{ max-width: 1200px; margin: 0 auto; }}
398
  .model-list {{ background: #f5f5f5; padding: 15px; border-radius: 5px; }}
399
  </style>
400
  </head>
 
410
  <div style="background: #e3f2fd; padding: 15px; border-radius: 8px;">
411
  <h3>🤖 AI Models</h3>
412
  <p><strong>{model_success}/{len(models_to_warm)} Healthy</strong></p>
413
+ <p><strong>{ai_success}/{len(models_to_warm)} AI Responding</strong></p>
414
  </div>
415
  <div style="background: #fff3e0; padding: 15px; border-radius: 8px;">
416
  <h3>🔗 Pinger Network</h3>
 
460
  """Manually trigger immediate warming"""
461
  results = await warmup_all_models()
462
  success_count = sum(1 for result in results if result['status'] == 'success')
463
+ ai_success_count = sum(1 for result in results if result.get('is_ai_response'))
464
 
465
  return JSONResponse({
466
  "message": "Manual warming completed",
467
  "models_healthy": f"{success_count}/{len(models_to_warm)}",
468
+ "ai_responding": f"{ai_success_count}/{len(models_to_warm)}",
469
  "timestamp": datetime.now().isoformat()
470
  })
471