turtle170 commited on
Commit
01e3269
·
verified ·
1 Parent(s): fe3a741

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -0
app.py CHANGED
@@ -489,6 +489,87 @@ def calculate_token_cost(username: str, duration_ms: float) -> str:
489
  "timestamp": datetime.datetime.now(pytz.UTC).isoformat()
490
  }, indent=2)
491
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
492
  def get_backend_health() -> str:
493
  """SPEED-OPTIMIZED backend health status with hard-coded RAM"""
494
  logger.info(f"[BACKEND-HEALTH] Checking backend health status...")
 
489
  "timestamp": datetime.datetime.now(pytz.UTC).isoformat()
490
  }, indent=2)
491
 
492
+ def get_cache_stats() -> str:
493
+ """SPEED-OPTIMIZED cache statistics with performance tracking"""
494
+ start_time = time.time()
495
+
496
+ try:
497
+ # Calculate detailed statistics
498
+ total_prompt_memory = sum(len(str(v)) for v in prompt_cache.values())
499
+ total_response_memory = sum(len(v['response']) for v in response_cache.values())
500
+ total_requests = sum(u['requests'] for u in token_ledger.values())
501
+ total_tokens = sum(u['total_cost'] for u in token_ledger.values())
502
+ total_duration = sum(u['total_duration_ms'] for u in token_ledger.values())
503
+
504
+ # User statistics
505
+ active_users = len([u for u in token_ledger.values() if time.time() - u.get('last_seen', u.get('first_seen', 0)) < 3600])
506
+ avg_requests_per_user = total_requests / len(token_ledger) if len(token_ledger) > 0 else 0
507
+ avg_tokens_per_user = total_tokens / len(token_ledger) if len(token_ledger) > 0 else 0
508
+
509
+ # Performance metrics
510
+ cache_hit_rate = (performance_stats["cache_hits"] / performance_stats["total_requests"] * 100) if performance_stats["total_requests"] > 0 else 0
511
+ memory_usage_mb = get_memory_usage()
512
+ uptime_seconds = round(time.time() - backend_start_time, 2)
513
+
514
+ # HARD-CODED: Use Hugging Face Space RAM limits
515
+ total_ram_mb = TOTAL_RAM_GB * 1024 # 18GB * 1024 = 18432MB
516
+ usable_ram_mb = USABLE_RAM_GB * 1024 # 16GB * 1024 = 16384MB
517
+ used_ram_mb = memory_usage_mb
518
+ available_ram_mb = usable_ram_mb - used_ram_mb
519
+ ram_usage_pct = (used_ram_mb / usable_ram_mb) * 100
520
+
521
+ processing_time = time.time() - start_time
522
+
523
+ result = {
524
+ "success": True,
525
+ "prompt_cache_size": len(prompt_cache),
526
+ "response_cache_size": len(response_cache),
527
+ "users_tracked": len(token_ledger),
528
+ "active_users_last_hour": active_users,
529
+ "total_requests": total_requests,
530
+ "total_tokens_spent": round(total_tokens, 4),
531
+ "total_duration_ms": round(total_duration, 2),
532
+ "avg_requests_per_user": round(avg_requests_per_user, 2),
533
+ "avg_tokens_per_user": round(avg_tokens_per_user, 4),
534
+ "prompt_cache_memory_bytes": total_prompt_memory,
535
+ "response_cache_memory_bytes": total_response_memory,
536
+ "total_cache_memory_bytes": total_prompt_memory + total_response_memory,
537
+ # PERFORMANCE METRICS
538
+ "performance_stats": performance_stats,
539
+ "cache_hit_rate_pct": round(cache_hit_rate, 2),
540
+ "memory_usage_mb": round(memory_usage_mb, 2),
541
+ "uptime_seconds": uptime_seconds,
542
+ "requests_per_second": round(total_requests / uptime_seconds, 2) if uptime_seconds > 0 else 0,
543
+ # HARD-CODED RAM INFO
544
+ "ram_info": {
545
+ "total_ram_gb": TOTAL_RAM_GB,
546
+ "usable_ram_gb": USABLE_RAM_GB,
547
+ "used_ram_mb": round(used_ram_mb, 2),
548
+ "available_ram_mb": round(available_ram_mb, 2),
549
+ "total_ram_mb": total_ram_mb,
550
+ "ram_usage_pct": round(ram_usage_pct, 2),
551
+ "hardcoded": True
552
+ },
553
+ "processing_time_ms": round(processing_time * 1000, 2),
554
+ "timestamp": datetime.datetime.now(pytz.UTC).isoformat(),
555
+ "request_id": hashlib.md5(f"stats{time.time()}".encode()).hexdigest()[:8]
556
+ }
557
+
558
+ logger.info(f"[CACHE-STATS] ⚡ Retrieved in {processing_time*1000:.1f}ms - {cache_hit_rate:.1f}% hit rate | RAM: {used_ram_mb:.1f}/{usable_ram_mb:.1f}MB ({ram_usage_pct:.1f}%)")
559
+ return json.dumps(result, indent=2)
560
+
561
+ except Exception as e:
562
+ processing_time = time.time() - start_time
563
+ logger.error(f"[CACHE-STATS] ❌ Failed after {processing_time*1000:.1f}ms: {e}")
564
+
565
+ return json.dumps({
566
+ "success": False,
567
+ "error": str(e),
568
+ "error_type": type(e).__name__,
569
+ "processing_time_ms": round(processing_time * 1000, 2),
570
+ "timestamp": datetime.datetime.now(pytz.UTC).isoformat()
571
+ }, indent=2)
572
+
573
  def get_backend_health() -> str:
574
  """SPEED-OPTIMIZED backend health status with hard-coded RAM"""
575
  logger.info(f"[BACKEND-HEALTH] Checking backend health status...")