Spaces:

turtle170
/

ZeroEngine-Backend

Running

App Files Files Community

turtle170 commited on Feb 1

Commit

01e3269

verified ·

1 Parent(s): fe3a741

Update app.py

Browse files

Files changed (1) hide show

app.py +81 -0

app.py CHANGED Viewed

@@ -489,6 +489,87 @@ def calculate_token_cost(username: str, duration_ms: float) -> str:
             "timestamp": datetime.datetime.now(pytz.UTC).isoformat()
         }, indent=2)
 def get_backend_health() -> str:
     """SPEED-OPTIMIZED backend health status with hard-coded RAM"""
     logger.info(f"[BACKEND-HEALTH] Checking backend health status...")

             "timestamp": datetime.datetime.now(pytz.UTC).isoformat()
         }, indent=2)
+def get_cache_stats() -> str:
+    """SPEED-OPTIMIZED cache statistics with performance tracking"""
+    start_time = time.time()
+    try:
+        # Calculate detailed statistics
+        total_prompt_memory = sum(len(str(v)) for v in prompt_cache.values())
+        total_response_memory = sum(len(v['response']) for v in response_cache.values())
+        total_requests = sum(u['requests'] for u in token_ledger.values())
+        total_tokens = sum(u['total_cost'] for u in token_ledger.values())
+        total_duration = sum(u['total_duration_ms'] for u in token_ledger.values())
+        # User statistics
+        active_users = len([u for u in token_ledger.values() if time.time() - u.get('last_seen', u.get('first_seen', 0)) < 3600])
+        avg_requests_per_user = total_requests / len(token_ledger) if len(token_ledger) > 0 else 0
+        avg_tokens_per_user = total_tokens / len(token_ledger) if len(token_ledger) > 0 else 0
+        # Performance metrics
+        cache_hit_rate = (performance_stats["cache_hits"] / performance_stats["total_requests"] * 100) if performance_stats["total_requests"] > 0 else 0
+        memory_usage_mb = get_memory_usage()
+        uptime_seconds = round(time.time() - backend_start_time, 2)
+        # HARD-CODED: Use Hugging Face Space RAM limits
+        total_ram_mb = TOTAL_RAM_GB * 1024  # 18GB * 1024 = 18432MB
+        usable_ram_mb = USABLE_RAM_GB * 1024  # 16GB * 1024 = 16384MB
+        used_ram_mb = memory_usage_mb
+        available_ram_mb = usable_ram_mb - used_ram_mb
+        ram_usage_pct = (used_ram_mb / usable_ram_mb) * 100
+        processing_time = time.time() - start_time
+        result = {
+            "success": True,
+            "prompt_cache_size": len(prompt_cache),
+            "response_cache_size": len(response_cache),
+            "users_tracked": len(token_ledger),
+            "active_users_last_hour": active_users,
+            "total_requests": total_requests,
+            "total_tokens_spent": round(total_tokens, 4),
+            "total_duration_ms": round(total_duration, 2),
+            "avg_requests_per_user": round(avg_requests_per_user, 2),
+            "avg_tokens_per_user": round(avg_tokens_per_user, 4),
+            "prompt_cache_memory_bytes": total_prompt_memory,
+            "response_cache_memory_bytes": total_response_memory,
+            "total_cache_memory_bytes": total_prompt_memory + total_response_memory,
+            # PERFORMANCE METRICS
+            "performance_stats": performance_stats,
+            "cache_hit_rate_pct": round(cache_hit_rate, 2),
+            "memory_usage_mb": round(memory_usage_mb, 2),
+            "uptime_seconds": uptime_seconds,
+            "requests_per_second": round(total_requests / uptime_seconds, 2) if uptime_seconds > 0 else 0,
+            # HARD-CODED RAM INFO
+            "ram_info": {
+                "total_ram_gb": TOTAL_RAM_GB,
+                "usable_ram_gb": USABLE_RAM_GB,
+                "used_ram_mb": round(used_ram_mb, 2),
+                "available_ram_mb": round(available_ram_mb, 2),
+                "total_ram_mb": total_ram_mb,
+                "ram_usage_pct": round(ram_usage_pct, 2),
+                "hardcoded": True
+            },
+            "processing_time_ms": round(processing_time * 1000, 2),
+            "timestamp": datetime.datetime.now(pytz.UTC).isoformat(),
+            "request_id": hashlib.md5(f"stats{time.time()}".encode()).hexdigest()[:8]
+        }
+        logger.info(f"[CACHE-STATS] ⚡ Retrieved in {processing_time*1000:.1f}ms - {cache_hit_rate:.1f}% hit rate | RAM: {used_ram_mb:.1f}/{usable_ram_mb:.1f}MB ({ram_usage_pct:.1f}%)")
+        return json.dumps(result, indent=2)
+    except Exception as e:
+        processing_time = time.time() - start_time
+        logger.error(f"[CACHE-STATS] ❌ Failed after {processing_time*1000:.1f}ms: {e}")
+        return json.dumps({
+            "success": False,
+            "error": str(e),
+            "error_type": type(e).__name__,
+            "processing_time_ms": round(processing_time * 1000, 2),
+            "timestamp": datetime.datetime.now(pytz.UTC).isoformat()
+        }, indent=2)
 def get_backend_health() -> str:
     """SPEED-OPTIMIZED backend health status with hard-coded RAM"""
     logger.info(f"[BACKEND-HEALTH] Checking backend health status...")