Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,8 +2,17 @@ import gradio as gr
|
|
| 2 |
import json
|
| 3 |
import time
|
| 4 |
import hashlib
|
|
|
|
| 5 |
from typing import Dict, Optional
|
| 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
# ============================================================================
|
| 8 |
# ZEROENGINE-BACKEND: Background Processing Service
|
| 9 |
# ============================================================================
|
|
@@ -25,6 +34,8 @@ def tokenize_text(text: str) -> str:
|
|
| 25 |
Returns: JSON string with token count estimation
|
| 26 |
"""
|
| 27 |
try:
|
|
|
|
|
|
|
| 28 |
# Simple estimation (4 chars ≈ 1 token for English)
|
| 29 |
# This is FAST and good enough for pre-processing
|
| 30 |
estimated_tokens = len(text) // 4
|
|
@@ -49,9 +60,11 @@ def tokenize_text(text: str) -> str:
|
|
| 49 |
"cached_at": time.time()
|
| 50 |
}
|
| 51 |
|
|
|
|
| 52 |
return json.dumps(result, indent=2)
|
| 53 |
|
| 54 |
except Exception as e:
|
|
|
|
| 55 |
return json.dumps({
|
| 56 |
"success": False,
|
| 57 |
"error": str(e)
|
|
@@ -62,6 +75,8 @@ def cache_prompt(key: str, value: str) -> str:
|
|
| 62 |
Store prompt in cache with timestamp
|
| 63 |
"""
|
| 64 |
try:
|
|
|
|
|
|
|
| 65 |
prompt_cache[key] = {
|
| 66 |
"value": value,
|
| 67 |
"timestamp": time.time()
|
|
@@ -71,7 +86,9 @@ def cache_prompt(key: str, value: str) -> str:
|
|
| 71 |
if len(prompt_cache) > 100:
|
| 72 |
oldest_key = min(prompt_cache.keys(), key=lambda k: prompt_cache[k]["timestamp"])
|
| 73 |
del prompt_cache[oldest_key]
|
|
|
|
| 74 |
|
|
|
|
| 75 |
return json.dumps({
|
| 76 |
"success": True,
|
| 77 |
"cached": key,
|
|
@@ -79,6 +96,7 @@ def cache_prompt(key: str, value: str) -> str:
|
|
| 79 |
}, indent=2)
|
| 80 |
|
| 81 |
except Exception as e:
|
|
|
|
| 82 |
return json.dumps({
|
| 83 |
"success": False,
|
| 84 |
"error": str(e)
|
|
@@ -91,18 +109,21 @@ def get_cached_prompt(key: str) -> str:
|
|
| 91 |
try:
|
| 92 |
if key in prompt_cache:
|
| 93 |
data = prompt_cache[key]
|
|
|
|
| 94 |
return json.dumps({
|
| 95 |
"success": True,
|
| 96 |
"value": data["value"],
|
| 97 |
"age_seconds": round(time.time() - data["timestamp"], 2)
|
| 98 |
}, indent=2)
|
| 99 |
|
|
|
|
| 100 |
return json.dumps({
|
| 101 |
"success": False,
|
| 102 |
"error": "Cache key not found"
|
| 103 |
}, indent=2)
|
| 104 |
|
| 105 |
except Exception as e:
|
|
|
|
| 106 |
return json.dumps({
|
| 107 |
"success": False,
|
| 108 |
"error": str(e)
|
|
@@ -113,6 +134,8 @@ def cache_response(prompt_hash: str, response: str) -> str:
|
|
| 113 |
Cache a complete response for instant retrieval
|
| 114 |
"""
|
| 115 |
try:
|
|
|
|
|
|
|
| 116 |
response_cache[prompt_hash] = {
|
| 117 |
"response": response,
|
| 118 |
"timestamp": time.time()
|
|
@@ -122,7 +145,9 @@ def cache_response(prompt_hash: str, response: str) -> str:
|
|
| 122 |
if len(response_cache) > 50:
|
| 123 |
oldest_key = min(response_cache.keys(), key=lambda k: response_cache[k]["timestamp"])
|
| 124 |
del response_cache[oldest_key]
|
|
|
|
| 125 |
|
|
|
|
| 126 |
return json.dumps({
|
| 127 |
"success": True,
|
| 128 |
"cached": prompt_hash,
|
|
@@ -130,6 +155,7 @@ def cache_response(prompt_hash: str, response: str) -> str:
|
|
| 130 |
}, indent=2)
|
| 131 |
|
| 132 |
except Exception as e:
|
|
|
|
| 133 |
return json.dumps({
|
| 134 |
"success": False,
|
| 135 |
"error": str(e)
|
|
@@ -142,18 +168,21 @@ def get_cached_response(prompt_hash: str) -> str:
|
|
| 142 |
try:
|
| 143 |
if prompt_hash in response_cache:
|
| 144 |
data = response_cache[prompt_hash]
|
|
|
|
| 145 |
return json.dumps({
|
| 146 |
"success": True,
|
| 147 |
"response": data["response"],
|
| 148 |
"age_seconds": round(time.time() - data["timestamp"], 2)
|
| 149 |
}, indent=2)
|
| 150 |
|
|
|
|
| 151 |
return json.dumps({
|
| 152 |
"success": False,
|
| 153 |
"error": "Response not cached"
|
| 154 |
}, indent=2)
|
| 155 |
|
| 156 |
except Exception as e:
|
|
|
|
| 157 |
return json.dumps({
|
| 158 |
"success": False,
|
| 159 |
"error": str(e)
|
|
@@ -165,6 +194,8 @@ def calculate_token_cost(username: str, duration_ms: float) -> str:
|
|
| 165 |
Stateless - just returns the calculation
|
| 166 |
"""
|
| 167 |
try:
|
|
|
|
|
|
|
| 168 |
cost = (duration_ms / 100.0) * 0.001 # 0.001 tokens per 100ms
|
| 169 |
|
| 170 |
# Track in ledger (for analytics)
|
|
@@ -179,6 +210,7 @@ def calculate_token_cost(username: str, duration_ms: float) -> str:
|
|
| 179 |
token_ledger[username]["total_duration_ms"] += duration_ms
|
| 180 |
token_ledger[username]["requests"] += 1
|
| 181 |
|
|
|
|
| 182 |
return json.dumps({
|
| 183 |
"success": True,
|
| 184 |
"username": username,
|
|
@@ -189,6 +221,7 @@ def calculate_token_cost(username: str, duration_ms: float) -> str:
|
|
| 189 |
}, indent=2)
|
| 190 |
|
| 191 |
except Exception as e:
|
|
|
|
| 192 |
return json.dumps({
|
| 193 |
"success": False,
|
| 194 |
"error": str(e)
|
|
@@ -199,6 +232,8 @@ def get_cache_stats() -> str:
|
|
| 199 |
Get statistics about cache usage
|
| 200 |
"""
|
| 201 |
try:
|
|
|
|
|
|
|
| 202 |
return json.dumps({
|
| 203 |
"success": True,
|
| 204 |
"prompt_cache_size": len(prompt_cache),
|
|
@@ -209,6 +244,7 @@ def get_cache_stats() -> str:
|
|
| 209 |
}, indent=2)
|
| 210 |
|
| 211 |
except Exception as e:
|
|
|
|
| 212 |
return json.dumps({
|
| 213 |
"success": False,
|
| 214 |
"error": str(e)
|
|
@@ -219,6 +255,8 @@ def get_backend_health() -> str:
|
|
| 219 |
Get backend health status for monitoring
|
| 220 |
"""
|
| 221 |
try:
|
|
|
|
|
|
|
| 222 |
return json.dumps({
|
| 223 |
"success": True,
|
| 224 |
"status": "healthy",
|
|
@@ -229,6 +267,7 @@ def get_backend_health() -> str:
|
|
| 229 |
}, indent=2)
|
| 230 |
|
| 231 |
except Exception as e:
|
|
|
|
| 232 |
return json.dumps({
|
| 233 |
"success": False,
|
| 234 |
"error": str(e)
|
|
@@ -329,20 +368,24 @@ if __name__ == "__main__":
|
|
| 329 |
|
| 330 |
def cleanup_on_exit():
|
| 331 |
"""Cleanup function called on application exit"""
|
| 332 |
-
|
| 333 |
# Clear caches
|
| 334 |
global prompt_cache, response_cache, token_ledger
|
|
|
|
|
|
|
|
|
|
|
|
|
| 335 |
prompt_cache.clear()
|
| 336 |
response_cache.clear()
|
| 337 |
token_ledger.clear()
|
| 338 |
-
|
| 339 |
|
| 340 |
# Register cleanup functions
|
| 341 |
atexit.register(cleanup_on_exit)
|
| 342 |
|
| 343 |
def signal_handler(signum, frame):
|
| 344 |
"""Handle shutdown signals gracefully"""
|
| 345 |
-
|
| 346 |
cleanup_on_exit()
|
| 347 |
import sys
|
| 348 |
sys.exit(0)
|
|
@@ -350,4 +393,8 @@ if __name__ == "__main__":
|
|
| 350 |
signal.signal(signal.SIGTERM, signal_handler)
|
| 351 |
signal.signal(signal.SIGINT, signal_handler)
|
| 352 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 353 |
demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)
|
|
|
|
| 2 |
import json
|
| 3 |
import time
|
| 4 |
import hashlib
|
| 5 |
+
import logging
|
| 6 |
from typing import Dict, Optional
|
| 7 |
|
| 8 |
+
# Initialize logging for backend
|
| 9 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - BACKEND - %(message)s', force=True)
|
| 10 |
+
logger = logging.getLogger(__name__)
|
| 11 |
+
|
| 12 |
+
# Suppress asyncio warnings during shutdown
|
| 13 |
+
import warnings
|
| 14 |
+
warnings.filterwarnings("ignore", category=RuntimeWarning, message=".*asyncio.*")
|
| 15 |
+
|
| 16 |
# ============================================================================
|
| 17 |
# ZEROENGINE-BACKEND: Background Processing Service
|
| 18 |
# ============================================================================
|
|
|
|
| 34 |
Returns: JSON string with token count estimation
|
| 35 |
"""
|
| 36 |
try:
|
| 37 |
+
logger.info(f"[TOKENIZE] Processing text of length {len(text)}")
|
| 38 |
+
|
| 39 |
# Simple estimation (4 chars ≈ 1 token for English)
|
| 40 |
# This is FAST and good enough for pre-processing
|
| 41 |
estimated_tokens = len(text) // 4
|
|
|
|
| 60 |
"cached_at": time.time()
|
| 61 |
}
|
| 62 |
|
| 63 |
+
logger.info(f"[TOKENIZE] Estimated {estimated_tokens} tokens, cached as {text_hash}")
|
| 64 |
return json.dumps(result, indent=2)
|
| 65 |
|
| 66 |
except Exception as e:
|
| 67 |
+
logger.error(f"[TOKENIZE] Error: {e}")
|
| 68 |
return json.dumps({
|
| 69 |
"success": False,
|
| 70 |
"error": str(e)
|
|
|
|
| 75 |
Store prompt in cache with timestamp
|
| 76 |
"""
|
| 77 |
try:
|
| 78 |
+
logger.info(f"[CACHE-PROMPT] Storing key: {key} (value length: {len(value)})")
|
| 79 |
+
|
| 80 |
prompt_cache[key] = {
|
| 81 |
"value": value,
|
| 82 |
"timestamp": time.time()
|
|
|
|
| 86 |
if len(prompt_cache) > 100:
|
| 87 |
oldest_key = min(prompt_cache.keys(), key=lambda k: prompt_cache[k]["timestamp"])
|
| 88 |
del prompt_cache[oldest_key]
|
| 89 |
+
logger.info(f"[CACHE-PROMPT] Removed oldest entry: {oldest_key}")
|
| 90 |
|
| 91 |
+
logger.info(f"[CACHE-PROMPT] Stored successfully. Cache size: {len(prompt_cache)}")
|
| 92 |
return json.dumps({
|
| 93 |
"success": True,
|
| 94 |
"cached": key,
|
|
|
|
| 96 |
}, indent=2)
|
| 97 |
|
| 98 |
except Exception as e:
|
| 99 |
+
logger.error(f"[CACHE-PROMPT] Error: {e}")
|
| 100 |
return json.dumps({
|
| 101 |
"success": False,
|
| 102 |
"error": str(e)
|
|
|
|
| 109 |
try:
|
| 110 |
if key in prompt_cache:
|
| 111 |
data = prompt_cache[key]
|
| 112 |
+
logger.info(f"[CACHE-PROMPT] Retrieved key: {key} (value length: {len(data['value'])})")
|
| 113 |
return json.dumps({
|
| 114 |
"success": True,
|
| 115 |
"value": data["value"],
|
| 116 |
"age_seconds": round(time.time() - data["timestamp"], 2)
|
| 117 |
}, indent=2)
|
| 118 |
|
| 119 |
+
logger.info(f"[CACHE-PROMPT] Key not found: {key}")
|
| 120 |
return json.dumps({
|
| 121 |
"success": False,
|
| 122 |
"error": "Cache key not found"
|
| 123 |
}, indent=2)
|
| 124 |
|
| 125 |
except Exception as e:
|
| 126 |
+
logger.error(f"[CACHE-PROMPT] Error: {e}")
|
| 127 |
return json.dumps({
|
| 128 |
"success": False,
|
| 129 |
"error": str(e)
|
|
|
|
| 134 |
Cache a complete response for instant retrieval
|
| 135 |
"""
|
| 136 |
try:
|
| 137 |
+
logger.info(f"[CACHE-RESPONSE] Storing prompt hash: {prompt_hash} (response length: {len(response)})")
|
| 138 |
+
|
| 139 |
response_cache[prompt_hash] = {
|
| 140 |
"response": response,
|
| 141 |
"timestamp": time.time()
|
|
|
|
| 145 |
if len(response_cache) > 50:
|
| 146 |
oldest_key = min(response_cache.keys(), key=lambda k: response_cache[k]["timestamp"])
|
| 147 |
del response_cache[oldest_key]
|
| 148 |
+
logger.info(f"[CACHE-RESPONSE] Removed oldest entry: {oldest_key}")
|
| 149 |
|
| 150 |
+
logger.info(f"[CACHE-RESPONSE] Stored successfully. Cache size: {len(response_cache)}")
|
| 151 |
return json.dumps({
|
| 152 |
"success": True,
|
| 153 |
"cached": prompt_hash,
|
|
|
|
| 155 |
}, indent=2)
|
| 156 |
|
| 157 |
except Exception as e:
|
| 158 |
+
logger.error(f"[CACHE-RESPONSE] Error: {e}")
|
| 159 |
return json.dumps({
|
| 160 |
"success": False,
|
| 161 |
"error": str(e)
|
|
|
|
| 168 |
try:
|
| 169 |
if prompt_hash in response_cache:
|
| 170 |
data = response_cache[prompt_hash]
|
| 171 |
+
logger.info(f"[CACHE-RESPONSE] Retrieved prompt hash: {prompt_hash} (response length: {len(data['response'])})")
|
| 172 |
return json.dumps({
|
| 173 |
"success": True,
|
| 174 |
"response": data["response"],
|
| 175 |
"age_seconds": round(time.time() - data["timestamp"], 2)
|
| 176 |
}, indent=2)
|
| 177 |
|
| 178 |
+
logger.info(f"[CACHE-RESPONSE] Prompt hash not found: {prompt_hash}")
|
| 179 |
return json.dumps({
|
| 180 |
"success": False,
|
| 181 |
"error": "Response not cached"
|
| 182 |
}, indent=2)
|
| 183 |
|
| 184 |
except Exception as e:
|
| 185 |
+
logger.error(f"[CACHE-RESPONSE] Error: {e}")
|
| 186 |
return json.dumps({
|
| 187 |
"success": False,
|
| 188 |
"error": str(e)
|
|
|
|
| 194 |
Stateless - just returns the calculation
|
| 195 |
"""
|
| 196 |
try:
|
| 197 |
+
logger.info(f"[TOKEN-COST] Calculating cost for user: {username} (duration: {duration_ms}ms)")
|
| 198 |
+
|
| 199 |
cost = (duration_ms / 100.0) * 0.001 # 0.001 tokens per 100ms
|
| 200 |
|
| 201 |
# Track in ledger (for analytics)
|
|
|
|
| 210 |
token_ledger[username]["total_duration_ms"] += duration_ms
|
| 211 |
token_ledger[username]["requests"] += 1
|
| 212 |
|
| 213 |
+
logger.info(f"[TOKEN-COST] Calculated cost: {cost} tokens (total: {token_ledger[username]['total_cost']})")
|
| 214 |
return json.dumps({
|
| 215 |
"success": True,
|
| 216 |
"username": username,
|
|
|
|
| 221 |
}, indent=2)
|
| 222 |
|
| 223 |
except Exception as e:
|
| 224 |
+
logger.error(f"[TOKEN-COST] Error: {e}")
|
| 225 |
return json.dumps({
|
| 226 |
"success": False,
|
| 227 |
"error": str(e)
|
|
|
|
| 232 |
Get statistics about cache usage
|
| 233 |
"""
|
| 234 |
try:
|
| 235 |
+
logger.info("[CACHE-STATS] Retrieving cache statistics")
|
| 236 |
+
|
| 237 |
return json.dumps({
|
| 238 |
"success": True,
|
| 239 |
"prompt_cache_size": len(prompt_cache),
|
|
|
|
| 244 |
}, indent=2)
|
| 245 |
|
| 246 |
except Exception as e:
|
| 247 |
+
logger.error(f"[CACHE-STATS] Error: {e}")
|
| 248 |
return json.dumps({
|
| 249 |
"success": False,
|
| 250 |
"error": str(e)
|
|
|
|
| 255 |
Get backend health status for monitoring
|
| 256 |
"""
|
| 257 |
try:
|
| 258 |
+
logger.info("[BACKEND-HEALTH] Retrieving backend health status")
|
| 259 |
+
|
| 260 |
return json.dumps({
|
| 261 |
"success": True,
|
| 262 |
"status": "healthy",
|
|
|
|
| 267 |
}, indent=2)
|
| 268 |
|
| 269 |
except Exception as e:
|
| 270 |
+
logger.error(f"[BACKEND-HEALTH] Error: {e}")
|
| 271 |
return json.dumps({
|
| 272 |
"success": False,
|
| 273 |
"error": str(e)
|
|
|
|
| 368 |
|
| 369 |
def cleanup_on_exit():
|
| 370 |
"""Cleanup function called on application exit"""
|
| 371 |
+
logger.info("[CLEANUP] Backend shutting down...")
|
| 372 |
# Clear caches
|
| 373 |
global prompt_cache, response_cache, token_ledger
|
| 374 |
+
logger.info(f"[CLEANUP] Clearing {len(prompt_cache)} prompt cache entries")
|
| 375 |
+
logger.info(f"[CLEANUP] Clearing {len(response_cache)} response cache entries")
|
| 376 |
+
logger.info(f"[CLEANUP] Clearing {len(token_ledger)} user token records")
|
| 377 |
+
|
| 378 |
prompt_cache.clear()
|
| 379 |
response_cache.clear()
|
| 380 |
token_ledger.clear()
|
| 381 |
+
logger.info("[CLEANUP] Backend shutdown complete")
|
| 382 |
|
| 383 |
# Register cleanup functions
|
| 384 |
atexit.register(cleanup_on_exit)
|
| 385 |
|
| 386 |
def signal_handler(signum, frame):
|
| 387 |
"""Handle shutdown signals gracefully"""
|
| 388 |
+
logger.info(f"[CLEANUP] Received signal {signum}")
|
| 389 |
cleanup_on_exit()
|
| 390 |
import sys
|
| 391 |
sys.exit(0)
|
|
|
|
| 393 |
signal.signal(signal.SIGTERM, signal_handler)
|
| 394 |
signal.signal(signal.SIGINT, signal_handler)
|
| 395 |
|
| 396 |
+
logger.info("[INIT] ZeroEngine-Backend starting up...")
|
| 397 |
+
logger.info(f"[INIT] Cache sizes - Prompt: {len(prompt_cache)}, Response: {len(response_cache)}")
|
| 398 |
+
logger.info(f"[INIT] Users tracked: {len(token_ledger)}")
|
| 399 |
+
|
| 400 |
demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)
|