File size: 23,850 Bytes
f6b0635
56af151
 
 
0af9bf2
 
a3f3af9
f6b0635
0af9bf2
56af151
0d3cb86
626b101
56af151
f6b0635
0eeb61b
f580204
0eeb61b
 
908cd1e
f6b0635
56af151
1e1ac6b
76f6559
ec2eb71
8fd82ec
c3159d2
8fd82ec
0af9bf2
 
c3159d2
908cd1e
824e0a1
 
 
 
c3159d2
824e0a1
 
c3159d2
824e0a1
 
98b4e69
 
 
 
c3159d2
98b4e69
 
 
 
 
 
 
f6b0635
908cd1e
f6b0635
1eda88b
3f62f30
241a7f9
a3f3af9
241a7f9
 
3f62f30
 
de534d5
 
a3f3af9
f6b0635
 
c3159d2
003ba77
1eda88b
3f62f30
e51d0e4
a3f3af9
241a7f9
 
3f62f30
 
de534d5
 
a3f3af9
f6b0635
 
908cd1e
f6b0635
1eda88b
3f62f30
e51d0e4
a3f3af9
241a7f9
 
3f62f30
 
de534d5
 
a3f3af9
d1ef9f8
 
908cd1e
 
a3f3af9
e51d0e4
 
9e920bf
 
 
 
 
 
 
f6b0635
 
 
56af151
 
908cd1e
56af151
 
908cd1e
c3159d2
0af9bf2
c3159d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56af151
c3159d2
 
 
 
0af9bf2
56af151
c3159d2
 
 
 
56af151
c3159d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1eda88b
c3159d2
1eda88b
 
c3159d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3f62f30
c3159d2
3f62f30
 
 
 
c3159d2
 
3f62f30
 
c3159d2
 
 
f6b0635
c3159d2
f6b0635
 
c3159d2
 
 
 
f6b0635
 
c3159d2
 
 
 
f6b0635
c3159d2
 
 
 
 
 
 
f6b0635
c3159d2
 
1eda88b
c3159d2
 
 
f6b0635
c3159d2
 
56af151
c3159d2
 
 
 
 
56af151
c3159d2
 
 
 
 
f6b0635
c3159d2
 
 
 
 
 
 
 
 
f6b0635
c3159d2
 
 
1eda88b
c3159d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f6b0635
c3159d2
 
908cd1e
f6b0635
c3159d2
 
 
 
 
 
56af151
c3159d2
 
 
 
0eeb61b
c3159d2
 
 
f6b0635
908cd1e
f6b0635
56af151
 
0eeb61b
c3159d2
 
0eeb61b
c3159d2
 
 
0eeb61b
c3159d2
 
 
 
 
 
0eeb61b
908cd1e
c3159d2
0af9bf2
f6b0635
 
 
c3159d2
f6b0635
 
 
 
a3f3af9
0af9bf2
56af151
 
908cd1e
f6b0635
908cd1e
f6b0635
3f62f30
0eeb61b
908cd1e
 
 
 
 
8d3f716
 
3f62f30
8d3f716
3f62f30
8d3f716
a3f3af9
908cd1e
c3159d2
8d3f716
908cd1e
2d87d50
 
 
a3f3af9
908cd1e
 
2d87d50
908cd1e
 
 
c3159d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56af151
0eeb61b
908cd1e
a3f3af9
908cd1e
 
 
 
2d87d50
c3159d2
 
 
 
 
 
 
908cd1e
 
0eeb61b
908cd1e
c3159d2
908cd1e
c3159d2
 
908cd1e
 
 
c3159d2
908cd1e
 
2d87d50
908cd1e
c3159d2
908cd1e
 
 
 
 
c3159d2
 
 
 
 
 
 
908cd1e
a3f3af9
908cd1e
 
 
 
 
 
c3159d2
 
 
908cd1e
0eeb61b
 
0af9bf2
56af151
0af9bf2
56af151
 
908cd1e
56af151
 
a3f3af9
f6b0635
908cd1e
 
c3159d2
 
 
908cd1e
0eeb61b
 
f6b0635
 
908cd1e
56af151
f6b0635
908cd1e
 
 
 
 
 
 
 
c3159d2
 
 
 
 
908cd1e
 
 
c3159d2
 
 
3f62f30
56af151
0eeb61b
 
56af151
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
from fastapi import FastAPI
from fastapi.responses import HTMLResponse, JSONResponse
import httpx
import asyncio
import time
from datetime import datetime
from typing import Dict, List
from contextlib import asynccontextmanager

# Configuration
PING_INTERVAL = 600  
HEALTH_CHECK_INTERVAL = 1800  # 30 minutes

# List of other pinger Spaces
pinger_spaces = [
    "https://rajhuggingface4253-ping2.hf.space",
]

# Regular servers to ping (HTTP GET)
regular_servers = [
    "https://rajhuggingface4253-backend-compressorpro.hf.space",
    "https://rajhuggingface4253-backend-compressorpro2.hf.space",
    "https://rajhuggingface4253-compressor3pro.hf.space",
    "https://rajhuggingface4253-bgr.hf.space",
    "https://rajhuggingface4253-real.hf.space",
    "https://rajhuggingface4253-cmy.hf.space",
    "https://rajhuggingface4253-waif.hf.space"
]

# Models to warm with proper configuration
models_to_warm = [
    {
        "name": "gemma",
        "url": "https://rajhuggingface4253-gemma-checking.hf.space",
        "endpoint": "/fix",
        "type": "streaming_chat",
        "timeout": 45.0,
        "payload": {
            "text": "warmup",
        }
    },
    {
        "name": "NLLB Translator",
        "url": "https://rajhuggingface4253-translate.hf.space",
        "endpoint": "/translate",
        "type": "streaming_chat",
        "timeout": 45.0,
        "payload": {
            "text": "warmup",
            "src_lang": "eng_Latn",
            "tgt_lang": "hin_Deva"
        }
    },
    {
        "name": "Qwen 1",
        "url": "https://rajhuggingface4253-qwen.hf.space",
        "endpoint": "/chat",
        "type": "streaming_chat",
        "timeout": 45.0,
        "payload": {
            "prompt": "--- HISTORY START ---\nUser: Say 'ready' if you're working\n--- HISTORY END ---\n\nUser's latest message: \"Say 'ready' if you're working\"",
            "max_new_tokens": 50,
            "temperature": 0.1,
            "enable_code_execution": False,
            "enable_web_search": False,
            "enable_thinking": False
        }
    },
    {
        "name": "Qwen 2", 
        "url": "https://rajhuggingface4253-qwe.hf.space",
        "endpoint": "/chat",
        "type": "streaming_chat",
        "timeout": 45.0,
        "payload": {
            "prompt": "--- HISTORY START ---\nUser: Say 'ready' if you're working\n--- HISTORY END ---\n\nUser's latest message: \"Say 'ready' if you're working\"",
            "max_new_tokens": 50,
            "temperature": 0.1,
            "enable_code_execution": False,
            "enable_web_search": False,
            "enable_thinking": False
        }
    },
    {
        "name": "Qwen 3",
        "url": "https://rajhuggingface4253-qwen3.hf.space",
        "endpoint": "/chat",
        "type": "streaming_chat",
        "timeout": 45.0,
        "payload": {
            "prompt": "--- HISTORY START ---\nUser: Say 'ready' if you're working\n--- HISTORY END ---\n\nUser's latest message: \"Say 'ready' if you're working\"",
            "max_new_tokens": 50,
            "temperature": 0.1,
            "enable_code_execution": False,
            "enable_web_search": False,
            "enable_thinking": False
        }
    },
    {
        "name": "Kokoro TTS",
        "url": "https://rajhuggingface4253-koko.hf.space", 
        "endpoint": "/health",
        "type": "health_check",
        "timeout": 15.0
    },
    {
        "name": "Kitten TTS",
        "url": "https://rajhuggingface4253-kitten.hf.space", 
        "endpoint": "/health",
        "type": "health_check",
        "timeout": 15.0
    }
]

# Global state
ping_results: Dict[str, Dict] = {}
model_warmup_results: Dict[str, Dict] = {}
health_results: Dict[str, Dict] = {}
last_ping_run: datetime = None
last_model_warmup: datetime = None
last_health_check_time: float = 0

class ParallelWarmer:
    def __init__(self):
        self.max_retries = 3
        self.retry_delay = 1  # Base delay in seconds for exponential backoff
    
    async def ping_server_with_retry(self, url: str) -> Dict:
        """Ping a server with automatic retry on failure"""
        last_error = None
        
        for attempt in range(self.max_retries):
            start_time = time.time()  # Defined BEFORE try block
            try:
                async with httpx.AsyncClient(timeout=10.0) as client:
                    response = await client.get(url)
                    response_time = round((time.time() - start_time) * 1000, 1)
                    
                    if response.status_code < 500:  # Only retry on server errors
                        return {
                            'status': 'success',
                            'response_time_ms': response_time,
                            'status_code': response.status_code,
                            'timestamp': datetime.now().isoformat(),
                            'attempts': attempt + 1
                        }
                    else:
                        last_error = f"HTTP {response.status_code}"
            except Exception as e:
                last_error = str(e)
                response_time = round((time.time() - start_time) * 1000, 1)
            
            # Exponential backoff for retries
            if attempt < self.max_retries - 1:
                await asyncio.sleep(self.retry_delay * (2 ** attempt))
        
        return {
            'status': 'error',
            'error': str(last_error) if last_error else 'Max retries exceeded',
            'timestamp': datetime.now().isoformat(),
            'attempts': self.max_retries,
            'response_time_ms': response_time if 'response_time' in locals() else 0
        }
    
    async def warmup_chat_model_with_retry(self, model_config: Dict) -> Dict:
        """Warm up chat models with retry logic"""
        last_error = None
        
        for attempt in range(self.max_retries):
            start_time = time.time()  # βœ… Defined BEFORE try block
            try:
                async with httpx.AsyncClient(timeout=model_config.get('timeout', 45.0)) as client:
                    payload = model_config['payload']
                    api_url = f"{model_config['url']}{model_config['endpoint']}"
                    
                    response = await client.post(api_url, json=payload)
                    response_time = round((time.time() - start_time) * 1000, 1)
                    
                    if response.status_code == 200:
                        try:
                            collected_response = ""
                            async for chunk in response.aiter_text():
                                if chunk.strip():
                                    collected_response += chunk
                            
                            if collected_response and len(collected_response.strip()) > 5:
                                return {
                                    'status': 'success',
                                    'response_time_ms': response_time,
                                    'status_code': response.status_code,
                                    'ai_response': collected_response[:100].strip(),
                                    'got_ai_response': True,
                                    'timestamp': datetime.now().isoformat(),
                                    'attempts': attempt + 1
                                }
                            else:
                                return {
                                    'status': 'success',
                                    'response_time_ms': response_time,
                                    'status_code': response.status_code,
                                    'ai_response': 'Empty response',
                                    'got_ai_response': False,
                                    'timestamp': datetime.now().isoformat(),
                                    'attempts': attempt + 1
                                }
                        except Exception as e:
                            # Stream error but HTTP 200 OK
                            return {
                                'status': 'success',
                                'response_time_ms': response_time,
                                'status_code': response.status_code,
                                'ai_response': f'Stream error: {str(e)}',
                                'got_ai_response': False,
                                'timestamp': datetime.now().isoformat(),
                                'attempts': attempt + 1
                            }
                    else:
                        last_error = f"HTTP {response.status_code}"
            except asyncio.TimeoutError:
                last_error = 'Request timeout'
            except Exception as e:
                last_error = str(e)
            
            # Calculate response time even on error
            response_time = round((time.time() - start_time) * 1000, 1)
            
            # Exponential backoff for retries
            if attempt < self.max_retries - 1:
                await asyncio.sleep(self.retry_delay * (2 ** attempt))
        
        return {
            'status': 'error',
            'error': str(last_error) if last_error else 'Max retries exceeded',
            'response_time_ms': response_time,
            'timestamp': datetime.now().isoformat(),
            'attempts': self.max_retries
        }
    
    async def warmup_health_model_with_retry(self, model_config: Dict) -> Dict:
        """Warm up health endpoint models with retry"""
        last_error = None
        
        for attempt in range(self.max_retries):
            start_time = time.time()  # βœ… Defined BEFORE try block
            try:
                async with httpx.AsyncClient(timeout=model_config.get('timeout', 15.0)) as client:
                    api_url = f"{model_config['url']}{model_config['endpoint']}"
                    response = await client.get(api_url)
                    response_time = round((time.time() - start_time) * 1000, 1)
                    
                    if response.status_code == 200:
                        return {
                            'status': 'success',
                            'response_time_ms': response_time,
                            'status_code': response.status_code,
                            'timestamp': datetime.now().isoformat(),
                            'attempts': attempt + 1
                        }
                    else:
                        last_error = f"HTTP {response.status_code}"
            except Exception as e:
                last_error = str(e)
            
            # Calculate response time even on error
            response_time = round((time.time() - start_time) * 1000, 1)
            
            # Exponential backoff for retries
            if attempt < self.max_retries - 1:
                await asyncio.sleep(self.retry_delay * (2 ** attempt))
        
        return {
            'status': 'error',
            'error': str(last_error) if last_error else 'Max retries exceeded',
            'response_time_ms': response_time,
            'timestamp': datetime.now().isoformat(),
            'attempts': self.max_retries
        }
    
    async def warmup_single_model_with_retry(self, model_config: Dict) -> Dict:
        """Route to appropriate warming method with retry"""
        if model_config.get('type') == 'streaming_chat':
            return await self.warmup_chat_model_with_retry(model_config)
        else:
            return await self.warmup_health_model_with_retry(model_config)

# Initialize the parallel warmer
warmer = ParallelWarmer()

async def ping_all_in_parallel():
    """Ping ALL targets in parallel with isolated error handling"""
    global ping_results, model_warmup_results, health_results, last_ping_run, last_model_warmup, last_health_check_time
    
    all_tasks = []
    task_mapping = {}
    
    # 1. Create tasks for regular servers
    for server in regular_servers:
        task = asyncio.create_task(warmer.ping_server_with_retry(server))
        all_tasks.append(task)
        task_mapping[task] = ('server', server)
    
    # 2. Create tasks for model warmups
    for model in models_to_warm:
        task = asyncio.create_task(warmer.warmup_single_model_with_retry(model))
        all_tasks.append(task)
        task_mapping[task] = ('model', model['url'])
    
    # 3. Create tasks for health checks (if needed)
    current_time = time.time()
    if (current_time - last_health_check_time) >= HEALTH_CHECK_INTERVAL and pinger_spaces:
        for space_url in pinger_spaces:
            health_url = f"{space_url}/health"
            task = asyncio.create_task(warmer.ping_server_with_retry(health_url))
            all_tasks.append(task)
            task_mapping[task] = ('health', space_url)
        last_health_check_time = current_time
    
    # 4. Execute ALL tasks in parallel
    if all_tasks:
        results = await asyncio.gather(*all_tasks, return_exceptions=True)
        
        # 5. Process results (isolated - failures don't affect others)
        for task, result in zip(all_tasks, results):
            task_type, identifier = task_mapping[task]
            
            if isinstance(result, Exception):
                # Task crashed but we isolate the failure
                print(f"⚠️  Task crashed with exception: {type(result).__name__}: {result}")
                error_result = {
                    'status': 'error',
                    'error': f"{type(result).__name__}: {str(result)}",
                    'timestamp': datetime.now().isoformat()
                }
                result = error_result
            
            if task_type == 'server':
                ping_results[identifier] = result
            elif task_type == 'model':
                # Find model name for the URL
                model_name = next((m['name'] for m in models_to_warm if m['url'] == identifier), identifier)
                model_warmup_results[identifier] = {
                    'model_info': {'name': model_name, 'url': identifier},
                    'health_check': result
                }
            elif task_type == 'health':
                health_results[identifier] = result
    
    # Update timestamps
    last_ping_run = datetime.now()
    last_model_warmup = datetime.now()
    
    # Log summary
    server_success = sum(1 for r in ping_results.values() if r.get('status') == 'success')
    model_success = sum(1 for r in model_warmup_results.values() 
                      if r['health_check'].get('status') == 'success')
    model_ai_response = sum(1 for r in model_warmup_results.values() 
                          if r['health_check'].get('got_ai_response'))
    
    print(f"βœ… {datetime.now().strftime('%H:%M:%S')} - Parallel ping complete: "
          f"{server_success}/{len(regular_servers)} servers OK, "
          f"{model_success}/{len(models_to_warm)} models healthy "
          f"({model_ai_response} AI responding)")

async def continuous_parallel_pinging():
    """Main pinging loop with full parallel execution"""
    print("πŸš€ Smart Model Warmer Started (Fully Parallel)!")
    print(f"🌐 Regular servers: {len(regular_servers)}")
    print(f"πŸ€– Models to warm: {len(models_to_warm)}")
    print(f"πŸ”— Pinger network: {len(pinger_spaces)}")
    
    while True:
        try:
            start_cycle = time.time()
            await ping_all_in_parallel()
            
            # Calculate sleep time (ensure exactly PING_INTERVAL between starts)
            cycle_duration = time.time() - start_cycle
            sleep_time = max(0, PING_INTERVAL - cycle_duration)
            
            if sleep_time > 0:
                await asyncio.sleep(sleep_time)
            else:
                print(f"⚠️  Warning: Ping cycle took {cycle_duration:.1f}s (longer than {PING_INTERVAL}s interval)!")
                await asyncio.sleep(1)  # Minimum delay
                
        except Exception as e:
            print(f"❌ Error in main loop: {e}")
            await asyncio.sleep(60)  # Recover after error

@asynccontextmanager
async def lifespan(app: FastAPI):
    # Startup
    asyncio.create_task(continuous_parallel_pinging())
    yield
    # Shutdown
    print("Shutting down...")

app = FastAPI(title="Smart Model Warmer", lifespan=lifespan)

@app.get("/", response_class=HTMLResponse)
async def home():
    """Dashboard showing warming status"""
    regular_success = sum(1 for r in ping_results.values() if r.get('status') == 'success')
    model_success = sum(1 for r in model_warmup_results.values() if r['health_check'].get('status') == 'success')
    health_success = sum(1 for r in health_results.values() if r.get('status') == 'success')
    ai_success = sum(1 for r in model_warmup_results.values() if r['health_check'].get('got_ai_response'))
    
    # Get model details for display
    model_statuses = []
    for url, data in model_warmup_results.items():
        model_info = data['model_info']
        health = data['health_check']
        
        status_display = "success" if health['status'] == 'success' else "error"
        ai_indicator = " βœ“AI" if health.get('got_ai_response') else ""
        error_display = f" - {health['error']}" if health.get('error') else ""
        preview = f" - '{health.get('ai_response', '')[:50]}...'" if health.get('ai_response') else ""
        
        model_statuses.append({
            'name': model_info['name'],
            'type': next((m.get('type', 'health_check') for m in models_to_warm if m['url'] == url), 'health_check'),
            'status_display': status_display,
            'response_time': health.get('response_time_ms', 0),
            'ai_indicator': ai_indicator,
            'error_display': error_display,
            'preview': preview
        })
    
    model_status_html = "".join([
        f"<li>{m['name']} ({m['type']}): <span class='{m['status_display']}'>{m['status_display']}</span> ({m['response_time']}ms){m['ai_indicator']}{m['error_display']}{m['preview']}</li>" 
        for m in model_statuses
    ])
    
    # Get server status for display
    server_statuses = []
    for server, result in ping_results.items():
        status_display = "success" if result.get('status') == 'success' else "error"
        server_name = server.split('//')[-1].split('.')[0]  # Extract name from URL
        error_display = f" - {result['error']}" if result.get('error') else ""
        
        server_statuses.append({
            'name': server_name,
            'status_display': status_display,
            'response_time': result.get('response_time_ms', 0),
            'error_display': error_display
        })
    
    server_status_html = "".join([
        f"<li>{s['name']}: <span class='{s['status_display']}'>{s['status_display']}</span> ({s['response_time']}ms){s['error_display']}</li>" 
        for s in server_statuses
    ])
    
    html_content = f"""
    <html>
        <head>
            <title>Smart Model Warmer</title>
            <style>
                body {{ font-family: Arial, sans-serif; margin: 40px; }}
                .success {{ color: green; font-weight: bold; }}
                .error {{ color: red; font-weight: bold; }}
                .container {{ max-width: 1200px; margin: 0 auto; }}
                .model-list {{ background: #f5f5f5; padding: 15px; border-radius: 5px; margin-bottom: 20px; }}
                .server-list {{ background: #e8f5e8; padding: 15px; border-radius: 5px; margin-bottom: 20px; }}
                .stats-grid {{ display: grid; grid-template-columns: 1fr 1fr 1fr; gap: 20px; margin-bottom: 30px; }}
                .stat-box {{ padding: 15px; border-radius: 8px; }}
                .servers-stat {{ background: #e8f5e8; }}
                .models-stat {{ background: #e3f2fd; }}
                .network-stat {{ background: #fff3e0; }}
            </style>
        </head>
        <body>
            <div class="container">
                <h1>πŸ€– Smart Model Warmer (Parallel)</h1>
                
                <div class="stats-grid">
                    <div class="stat-box servers-stat">
                        <h3>🌐 Regular Servers</h3>
                        <p><strong>{regular_success}/{len(regular_servers)} OK</strong></p>
                    </div>
                    <div class="stat-box models-stat">
                        <h3>πŸ€– AI Models</h3>
                        <p><strong>{model_success}/{len(models_to_warm)} Healthy</strong></p>
                        <p><strong>{ai_success}/{len(models_to_warm)} AI Responding</strong></p>
                    </div>
                    <div class="stat-box network-stat">
                        <h3>πŸ”— Pinger Network</h3>
                        <p><strong>{health_success}/{len(pinger_spaces)} OK</strong></p>
                    </div>
                </div>
                
                <div class="server-list">
                    <h3>Server Status</h3>
                    <ul>
                        {server_status_html if server_statuses else "<li>No server data yet</li>"}
                    </ul>
                </div>
                
                <div class="model-list">
                    <h3>Model Status</h3>
                    <ul>
                        {model_status_html if model_statuses else "<li>No model data yet</li>"}
                    </ul>
                </div>
                
                <p><strong>Last Model Check:</strong> {last_model_warmup.strftime('%Y-%m-%d %H:%M:%S') if last_model_warmup else 'Never'}</p>
                <p><strong>Last Server Check:</strong> {last_ping_run.strftime('%Y-%m-%d %H:%M:%S') if last_ping_run else 'Never'}</p>
                <p><strong>Next check in:</strong> ~{PING_INTERVAL // 60} minutes</p>
                <p><em>All checks run in parallel with automatic retries</em></p>
            </div>
        </body>
    </html>
    """
    return HTMLResponse(content=html_content)

@app.get("/health")
async def health():
    """Health endpoint for other pingers"""
    return JSONResponse({
        "status": "healthy",
        "service": "smart-model-warmer",
        "regular_servers": len(regular_servers),
        "ai_models": len(models_to_warm),
        "last_model_warmup": last_model_warmup.isoformat() if last_model_warmup else None,
        "last_ping_run": last_ping_run.isoformat() if last_ping_run else None,
        "parallel_execution": True,
        "retry_enabled": True,
        "timestamp": datetime.now().isoformat()
    })

@app.get("/status")
async def status():
    """Detailed status endpoint"""
    return JSONResponse({
        "regular_servers": ping_results,
        "ai_models": model_warmup_results,
        "pinger_network": health_results,
        "timestamp": datetime.now().isoformat()
    })

@app.get("/ping-now")
async def ping_now():
    """Manually trigger immediate warming"""
    await ping_all_in_parallel()
    
    regular_success = sum(1 for r in ping_results.values() if r.get('status') == 'success')
    model_success = sum(1 for r in model_warmup_results.values() if r['health_check'].get('status') == 'success')
    ai_response_count = sum(1 for r in model_warmup_results.values() if r['health_check'].get('got_ai_response'))
    
    return JSONResponse({
        "message": "Manual warming completed",
        "execution_mode": "parallel",
        "servers_ok": f"{regular_success}/{len(regular_servers)}",
        "models_healthy": f"{model_success}/{len(models_to_warm)}",
        "ai_responding": f"{ai_response_count}/{len(models_to_warm)}",
        "timestamp": datetime.now().isoformat()
    })

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)