Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -489,6 +489,87 @@ def calculate_token_cost(username: str, duration_ms: float) -> str:
|
|
| 489 |
"timestamp": datetime.datetime.now(pytz.UTC).isoformat()
|
| 490 |
}, indent=2)
|
| 491 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 492 |
def get_backend_health() -> str:
|
| 493 |
"""SPEED-OPTIMIZED backend health status with hard-coded RAM"""
|
| 494 |
logger.info(f"[BACKEND-HEALTH] Checking backend health status...")
|
|
|
|
| 489 |
"timestamp": datetime.datetime.now(pytz.UTC).isoformat()
|
| 490 |
}, indent=2)
|
| 491 |
|
| 492 |
+
def get_cache_stats() -> str:
|
| 493 |
+
"""SPEED-OPTIMIZED cache statistics with performance tracking"""
|
| 494 |
+
start_time = time.time()
|
| 495 |
+
|
| 496 |
+
try:
|
| 497 |
+
# Calculate detailed statistics
|
| 498 |
+
total_prompt_memory = sum(len(str(v)) for v in prompt_cache.values())
|
| 499 |
+
total_response_memory = sum(len(v['response']) for v in response_cache.values())
|
| 500 |
+
total_requests = sum(u['requests'] for u in token_ledger.values())
|
| 501 |
+
total_tokens = sum(u['total_cost'] for u in token_ledger.values())
|
| 502 |
+
total_duration = sum(u['total_duration_ms'] for u in token_ledger.values())
|
| 503 |
+
|
| 504 |
+
# User statistics
|
| 505 |
+
active_users = len([u for u in token_ledger.values() if time.time() - u.get('last_seen', u.get('first_seen', 0)) < 3600])
|
| 506 |
+
avg_requests_per_user = total_requests / len(token_ledger) if len(token_ledger) > 0 else 0
|
| 507 |
+
avg_tokens_per_user = total_tokens / len(token_ledger) if len(token_ledger) > 0 else 0
|
| 508 |
+
|
| 509 |
+
# Performance metrics
|
| 510 |
+
cache_hit_rate = (performance_stats["cache_hits"] / performance_stats["total_requests"] * 100) if performance_stats["total_requests"] > 0 else 0
|
| 511 |
+
memory_usage_mb = get_memory_usage()
|
| 512 |
+
uptime_seconds = round(time.time() - backend_start_time, 2)
|
| 513 |
+
|
| 514 |
+
# HARD-CODED: Use Hugging Face Space RAM limits
|
| 515 |
+
total_ram_mb = TOTAL_RAM_GB * 1024 # 18GB * 1024 = 18432MB
|
| 516 |
+
usable_ram_mb = USABLE_RAM_GB * 1024 # 16GB * 1024 = 16384MB
|
| 517 |
+
used_ram_mb = memory_usage_mb
|
| 518 |
+
available_ram_mb = usable_ram_mb - used_ram_mb
|
| 519 |
+
ram_usage_pct = (used_ram_mb / usable_ram_mb) * 100
|
| 520 |
+
|
| 521 |
+
processing_time = time.time() - start_time
|
| 522 |
+
|
| 523 |
+
result = {
|
| 524 |
+
"success": True,
|
| 525 |
+
"prompt_cache_size": len(prompt_cache),
|
| 526 |
+
"response_cache_size": len(response_cache),
|
| 527 |
+
"users_tracked": len(token_ledger),
|
| 528 |
+
"active_users_last_hour": active_users,
|
| 529 |
+
"total_requests": total_requests,
|
| 530 |
+
"total_tokens_spent": round(total_tokens, 4),
|
| 531 |
+
"total_duration_ms": round(total_duration, 2),
|
| 532 |
+
"avg_requests_per_user": round(avg_requests_per_user, 2),
|
| 533 |
+
"avg_tokens_per_user": round(avg_tokens_per_user, 4),
|
| 534 |
+
"prompt_cache_memory_bytes": total_prompt_memory,
|
| 535 |
+
"response_cache_memory_bytes": total_response_memory,
|
| 536 |
+
"total_cache_memory_bytes": total_prompt_memory + total_response_memory,
|
| 537 |
+
# PERFORMANCE METRICS
|
| 538 |
+
"performance_stats": performance_stats,
|
| 539 |
+
"cache_hit_rate_pct": round(cache_hit_rate, 2),
|
| 540 |
+
"memory_usage_mb": round(memory_usage_mb, 2),
|
| 541 |
+
"uptime_seconds": uptime_seconds,
|
| 542 |
+
"requests_per_second": round(total_requests / uptime_seconds, 2) if uptime_seconds > 0 else 0,
|
| 543 |
+
# HARD-CODED RAM INFO
|
| 544 |
+
"ram_info": {
|
| 545 |
+
"total_ram_gb": TOTAL_RAM_GB,
|
| 546 |
+
"usable_ram_gb": USABLE_RAM_GB,
|
| 547 |
+
"used_ram_mb": round(used_ram_mb, 2),
|
| 548 |
+
"available_ram_mb": round(available_ram_mb, 2),
|
| 549 |
+
"total_ram_mb": total_ram_mb,
|
| 550 |
+
"ram_usage_pct": round(ram_usage_pct, 2),
|
| 551 |
+
"hardcoded": True
|
| 552 |
+
},
|
| 553 |
+
"processing_time_ms": round(processing_time * 1000, 2),
|
| 554 |
+
"timestamp": datetime.datetime.now(pytz.UTC).isoformat(),
|
| 555 |
+
"request_id": hashlib.md5(f"stats{time.time()}".encode()).hexdigest()[:8]
|
| 556 |
+
}
|
| 557 |
+
|
| 558 |
+
logger.info(f"[CACHE-STATS] ⚡ Retrieved in {processing_time*1000:.1f}ms - {cache_hit_rate:.1f}% hit rate | RAM: {used_ram_mb:.1f}/{usable_ram_mb:.1f}MB ({ram_usage_pct:.1f}%)")
|
| 559 |
+
return json.dumps(result, indent=2)
|
| 560 |
+
|
| 561 |
+
except Exception as e:
|
| 562 |
+
processing_time = time.time() - start_time
|
| 563 |
+
logger.error(f"[CACHE-STATS] ❌ Failed after {processing_time*1000:.1f}ms: {e}")
|
| 564 |
+
|
| 565 |
+
return json.dumps({
|
| 566 |
+
"success": False,
|
| 567 |
+
"error": str(e),
|
| 568 |
+
"error_type": type(e).__name__,
|
| 569 |
+
"processing_time_ms": round(processing_time * 1000, 2),
|
| 570 |
+
"timestamp": datetime.datetime.now(pytz.UTC).isoformat()
|
| 571 |
+
}, indent=2)
|
| 572 |
+
|
| 573 |
def get_backend_health() -> str:
|
| 574 |
"""SPEED-OPTIMIZED backend health status with hard-coded RAM"""
|
| 575 |
logger.info(f"[BACKEND-HEALTH] Checking backend health status...")
|