turtle170 commited on
Commit
47f6bdb
·
verified ·
1 Parent(s): 41c6199

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +182 -243
app.py CHANGED
@@ -5,7 +5,13 @@ import hashlib
5
  import logging
6
  import datetime
7
  import pytz
 
 
 
8
  from typing import Dict, Optional
 
 
 
9
 
10
  # Initialize logging for backend
11
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - BACKEND - %(message)s', force=True)
@@ -16,7 +22,7 @@ import warnings
16
  warnings.filterwarnings("ignore", category=RuntimeWarning, message=".*asyncio.*")
17
 
18
  # ============================================================================
19
- # ZEROENGINE-BACKEND: Background Processing Service
20
  # ============================================================================
21
  # This space handles:
22
  # - Tokenization pre-processing
@@ -25,24 +31,129 @@ warnings.filterwarnings("ignore", category=RuntimeWarning, message=".*asyncio.*"
25
  # - Response caching
26
  # ============================================================================
27
 
28
- # In-memory caches (will reset on space restart)
 
 
 
 
 
 
 
 
 
29
  prompt_cache = {}
30
  response_cache = {}
31
  token_ledger = {}
32
  backend_start_time = time.time()
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  def tokenize_text(text: str) -> str:
35
- """Enhanced tokenization with extremely detailed logging"""
36
- logger.info(f"[TOKENIZE] ===== TOKENIZE REQUEST START =====")
37
- logger.info(f"[TOKENIZE] Input text length: {len(text)} characters")
38
- logger.info(f"[TOKENIZE] Input text preview: '{text[:100]}{'...' if len(text) > 100 else ''}'")
39
- logger.info(f"[TOKENIZE] Input text hash: {hashlib.md5(text.encode()).hexdigest()[:16]}")
40
-
41
  start_time = time.time()
42
 
 
 
 
43
  try:
44
- # Simple character-based estimation (can be enhanced with proper tokenizer)
45
- estimated_tokens = len(text.split()) + len(text) // 4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  processing_time = time.time() - start_time
47
 
48
  result = {
@@ -50,40 +161,31 @@ def tokenize_text(text: str) -> str:
50
  "estimated_tokens": estimated_tokens,
51
  "processing_time_ms": round(processing_time * 1000, 2),
52
  "text_length": len(text),
53
- "word_count": len(text.split()),
54
- "char_count": len(text),
55
  "timestamp": datetime.datetime.now(pytz.UTC).isoformat(),
56
- "request_id": hashlib.md5(f"{text}{time.time()}".encode()).hexdigest()[:8]
 
57
  }
58
 
59
- logger.info(f"[TOKENIZE] Tokenization completed successfully")
60
- logger.info(f"[TOKENIZE] Estimated tokens: {estimated_tokens}")
61
- logger.info(f"[TOKENIZE] Processing time: {processing_time:.4f}s ({processing_time*1000:.2f}ms)")
62
- logger.info(f"[TOKENIZE] Word count: {len(text.split())}")
63
- logger.info(f"[TOKENIZE] Character count: {len(text)}")
64
- logger.info(f"[TOKENIZE] Request ID: {result['request_id']}")
65
- logger.info(f"[TOKENIZE] ===== TOKENIZE REQUEST END =====")
66
-
67
- # Create cache key
68
- text_hash = hashlib.md5(text.encode()).hexdigest()[:16]
69
-
70
  prompt_cache[text_hash] = {
71
  "text": text[:100] + "..." if len(text) > 100 else text,
72
  "tokens": estimated_tokens,
73
  "cached_at": time.time()
74
  }
75
 
76
- logger.info(f"[TOKENIZE] Cached tokenization result for key: {text_hash}")
 
 
 
77
 
 
78
  return json.dumps(result, indent=2)
79
 
80
  except Exception as e:
81
  processing_time = time.time() - start_time
82
- logger.error(f"[TOKENIZE] ❌ Tokenization failed after {processing_time:.4f}s: {e}")
83
- logger.error(f"[TOKENIZE] Error type: {type(e).__name__}")
84
- logger.error(f"[TOKENIZE] Error details: {str(e)}")
85
- logger.error(f"[TOKENIZE] Input text that caused error: '{text[:200]}{'...' if len(text) > 200 else ''}'")
86
- logger.error(f"[TOKENIZE] ===== TOKENIZE REQUEST END (ERROR) =====")
87
 
88
  return json.dumps({
89
  "success": False,
@@ -94,61 +196,44 @@ def tokenize_text(text: str) -> str:
94
  }, indent=2)
95
 
96
  def cache_prompt(key: str, value: str) -> str:
97
- """Store prompt in cache with extremely detailed logging"""
98
- logger.info(f"[CACHE-PROMPT] ===== CACHE PROMPT REQUEST START =====")
99
- logger.info(f"[CACHE-PROMPT] Requested key: '{key}'")
100
- logger.info(f"[CACHE-PROMPT] Key length: {len(key)} characters")
101
- logger.info(f"[CACHE-PROMPT] Key hash: {hashlib.md5(key.encode()).hexdigest()[:16]}")
102
- logger.info(f"[CACHE-PROMPT] Value length: {len(value)} characters")
103
- logger.info(f"[CACHE-PROMPT] Value preview: '{value[:100]}{'...' if len(value) > 100 else ''}'")
104
- logger.info(f"[CACHE-PROMPT] Current cache size: {len(prompt_cache)} entries")
105
- logger.info(f"[CACHE-PROMPT] Current cache memory usage: {sum(len(v) for v in prompt_cache.values())} characters")
106
- logger.info(f"[CACHE-PROMPT] Available keys: {list(prompt_cache.keys())[:10]}{'...' if len(prompt_cache) > 10 else ''}")
107
-
108
  start_time = time.time()
109
 
110
  try:
111
- prompt_cache[key] = {
 
 
 
112
  "value": value,
113
- "timestamp": time.time()
114
  }
115
 
116
- processing_time = time.time() - start_time
 
 
 
 
 
 
117
 
118
- # Limit cache size to 100 entries
119
- if len(prompt_cache) > 100:
120
- oldest_key = min(prompt_cache.keys(), key=lambda k: prompt_cache[k]["timestamp"])
121
- del prompt_cache[oldest_key]
122
- logger.info(f"[CACHE-PROMPT] Removed oldest entry: {oldest_key}")
123
 
124
  result = {
125
  "success": True,
126
- "key": key,
127
  "value_length": len(value),
128
  "cache_size": len(prompt_cache),
129
  "processing_time_ms": round(processing_time * 1000, 2),
130
  "timestamp": datetime.datetime.now(pytz.UTC).isoformat(),
131
- "request_id": hashlib.md5(f"{key}{time.time()}".encode()).hexdigest()[:8]
132
  }
133
 
134
- logger.info(f"[CACHE-PROMPT] Prompt cached successfully")
135
- logger.info(f"[CACHE-PROMPT] Cached key: '{key}'")
136
- logger.info(f"[CACHE-PROMPT] Value length: {len(value)} characters")
137
- logger.info(f"[CACHE-PROMPT] Value preview: '{value[:100]}{'...' if len(value) > 100 else ''}'")
138
- logger.info(f"[CACHE-PROMPT] Processing time: {processing_time:.4f}s ({processing_time*1000:.2f}ms)")
139
- logger.info(f"[CACHE-PROMPT] Request ID: {result['request_id']}")
140
- logger.info(f"[CACHE-PROMPT] ===== CACHE PROMPT REQUEST END =====")
141
-
142
  return json.dumps(result, indent=2)
143
 
144
  except Exception as e:
145
  processing_time = time.time() - start_time
146
- logger.error(f"[CACHE-PROMPT] ❌ Cache prompt failed after {processing_time:.4f}s: {e}")
147
- logger.error(f"[CACHE-PROMPT] Error type: {type(e).__name__}")
148
- logger.error(f"[CACHE-PROMPT] Error details: {str(e)}")
149
- logger.error(f"[CACHE-PROMPT] Key that caused error: '{key}'")
150
- logger.error(f"[CACHE-PROMPT] Value that caused error: '{value[:200]}{'...' if len(value) > 200 else ''}'")
151
- logger.error(f"[CACHE-PROMPT] ===== CACHE PROMPT REQUEST END (ERROR) =====")
152
 
153
  return json.dumps({
154
  "success": False,
@@ -159,69 +244,50 @@ def cache_prompt(key: str, value: str) -> str:
159
  }, indent=2)
160
 
161
  def get_cached_prompt(key: str) -> str:
162
- """Retrieve a cached prompt with extremely detailed logging"""
163
- logger.info(f"[GET-PROMPT] ===== GET CACHED PROMPT REQUEST START =====")
164
- logger.info(f"[GET-PROMPT] Requested key: '{key}'")
165
- logger.info(f"[GET-PROMPT] Key length: {len(key)} characters")
166
- logger.info(f"[GET-PROMPT] Key hash: {hashlib.md5(key.encode()).hexdigest()[:16]}")
167
- logger.info(f"[GET-PROMPT] Current cache size: {len(prompt_cache)} entries")
168
- logger.info(f"[GET-PROMPT] Current cache memory usage: {sum(len(v) for v in prompt_cache.values())} characters")
169
- logger.info(f"[GET-PROMPT] Available keys: {list(prompt_cache.keys())[:10]}{'...' if len(prompt_cache) > 10 else ''}")
170
-
171
  start_time = time.time()
172
 
173
  try:
174
- cached_value = prompt_cache.get(key)
 
 
175
  processing_time = time.time() - start_time
176
 
177
  if cached_value is not None:
178
  result = {
179
  "success": True,
180
  "found": True,
181
- "key": key,
182
- "value": cached_value,
183
- "value_length": len(cached_value),
184
  "cache_size": len(prompt_cache),
185
  "processing_time_ms": round(processing_time * 1000, 2),
186
  "timestamp": datetime.datetime.now(pytz.UTC).isoformat(),
187
- "request_id": hashlib.md5(f"{key}{time.time()}".encode()).hexdigest()[:8],
188
  "cache_hit": True
189
  }
190
 
191
- logger.info(f"[GET-PROMPT] Cache HIT - prompt found")
192
- logger.info(f"[GET-PROMPT] Found key: '{key}'")
193
- logger.info(f"[GET-PROMPT] Value length: {len(cached_value)} characters")
194
- logger.info(f"[GET-PROMPT] Value preview: '{cached_value[:100]}{'...' if len(cached_value) > 100 else ''}'")
195
- logger.info(f"[GET-PROMPT] Processing time: {processing_time:.4f}s ({processing_time*1000:.2f}ms)")
196
- logger.info(f"[GET-PROMPT] Request ID: {result['request_id']}")
197
  else:
198
  result = {
199
  "success": True,
200
  "found": False,
201
- "key": key,
202
  "value": None,
203
  "cache_size": len(prompt_cache),
204
  "processing_time_ms": round(processing_time * 1000, 2),
205
  "timestamp": datetime.datetime.now(pytz.UTC).isoformat(),
206
- "request_id": hashlib.md5(f"{key}{time.time()}".encode()).hexdigest()[:8],
207
  "cache_hit": False
208
  }
209
 
210
- logger.warning(f"[GET-PROMPT] ⚠️ Cache MISS - prompt not found")
211
- logger.warning(f"[GET-PROMPT] Missing key: '{key}'")
212
- logger.warning(f"[GET-PROMPT] Processing time: {processing_time:.4f}s ({processing_time*1000:.2f}ms)")
213
- logger.warning(f"[GET-PROMPT] Request ID: {result['request_id']}")
214
 
215
- logger.info(f"[GET-PROMPT] ===== GET CACHED PROMPT REQUEST END =====")
216
  return json.dumps(result, indent=2)
217
 
218
  except Exception as e:
219
  processing_time = time.time() - start_time
220
- logger.error(f"[GET-PROMPT] ❌ Get cached prompt failed after {processing_time:.4f}s: {e}")
221
- logger.error(f"[GET-PROMPT] Error type: {type(e).__name__}")
222
- logger.error(f"[GET-PROMPT] Error details: {str(e)}")
223
- logger.error(f"[GET-PROMPT] Key that caused error: '{key}'")
224
- logger.error(f"[GET-PROMPT] ===== GET CACHED PROMPT REQUEST END (ERROR) =====")
225
 
226
  return json.dumps({
227
  "success": False,
@@ -232,31 +298,24 @@ def get_cached_prompt(key: str) -> str:
232
  }, indent=2)
233
 
234
  def cache_response(prompt_hash: str, response: str) -> str:
235
- """Cache a complete response with extremely detailed logging"""
236
- logger.info(f"[CACHE-RESPONSE] ===== CACHE RESPONSE REQUEST START =====")
237
- logger.info(f"[CACHE-RESPONSE] Prompt hash: '{prompt_hash}'")
238
- logger.info(f"[CACHE-RESPONSE] Hash length: {len(prompt_hash)} characters")
239
- logger.info(f"[CACHE-RESPONSE] Response length: {len(response)} characters")
240
- logger.info(f"[CACHE-RESPONSE] Response preview: '{response[:150]}{'...' if len(response) > 150 else ''}'")
241
- logger.info(f"[CACHE-RESPONSE] Current response cache size: {len(response_cache)} entries")
242
- logger.info(f"[CACHE-RESPONSE] Current cache memory usage: {sum(len(v['response']) for v in response_cache.values())} characters")
243
- logger.info(f"[CACHE-RESPONSE] Available hashes: {list(response_cache.keys())[:10]}{'...' if len(response_cache) > 10 else ''}")
244
-
245
  start_time = time.time()
246
 
247
  try:
248
  response_cache[prompt_hash] = {
249
  "response": response,
250
- "timestamp": time.time()
251
  }
252
 
253
- processing_time = time.time() - start_time
 
 
 
 
 
 
254
 
255
- # Limit cache size to 50 entries
256
- if len(response_cache) > 50:
257
- oldest_key = min(response_cache.keys(), key=lambda k: response_cache[k]["timestamp"])
258
- del response_cache[oldest_key]
259
- logger.info(f"[CACHE-RESPONSE] Removed oldest entry: {oldest_key}")
260
 
261
  result = {
262
  "success": True,
@@ -265,29 +324,15 @@ def cache_response(prompt_hash: str, response: str) -> str:
265
  "cache_size": len(response_cache),
266
  "processing_time_ms": round(processing_time * 1000, 2),
267
  "timestamp": datetime.datetime.now(pytz.UTC).isoformat(),
268
- "request_id": hashlib.md5(f"{prompt_hash}{time.time()}".encode()).hexdigest()[:8],
269
- "cache_memory_usage": sum(len(v['response']) for v in response_cache.values())
270
  }
271
 
272
- logger.info(f"[CACHE-RESPONSE] Response cached successfully")
273
- logger.info(f"[CACHE-RESPONSE] Cached hash: '{prompt_hash}'")
274
- logger.info(f"[CACHE-RESPONSE] Response length: {len(response)} characters")
275
- logger.info(f"[CACHE-RESPONSE] New cache size: {len(response_cache)} entries")
276
- logger.info(f"[CACHE-RESPONSE] New cache memory usage: {result['cache_memory_usage']} characters")
277
- logger.info(f"[CACHE-RESPONSE] Processing time: {processing_time:.4f}s ({processing_time*1000:.2f}ms)")
278
- logger.info(f"[CACHE-RESPONSE] Request ID: {result['request_id']}")
279
- logger.info(f"[CACHE-RESPONSE] ===== CACHE RESPONSE REQUEST END =====")
280
-
281
  return json.dumps(result, indent=2)
282
 
283
  except Exception as e:
284
  processing_time = time.time() - start_time
285
- logger.error(f"[CACHE-RESPONSE] ❌ Cache response failed after {processing_time:.4f}s: {e}")
286
- logger.error(f"[CACHE-RESPONSE] Error type: {type(e).__name__}")
287
- logger.error(f"[CACHE-RESPONSE] Error details: {str(e)}")
288
- logger.error(f"[CACHE-RESPONSE] Hash that caused error: '{prompt_hash}'")
289
- logger.error(f"[CACHE-RESPONSE] Response preview that caused error: '{response[:300]}{'...' if len(response) > 300 else ''}'")
290
- logger.error(f"[CACHE-RESPONSE] ===== CACHE RESPONSE REQUEST END (ERROR) =====")
291
 
292
  return json.dumps({
293
  "success": False,
@@ -298,14 +343,7 @@ def cache_response(prompt_hash: str, response: str) -> str:
298
  }, indent=2)
299
 
300
  def get_cached_response(prompt_hash: str) -> str:
301
- """Retrieve cached response with extremely detailed logging"""
302
- logger.info(f"[GET-RESPONSE] ===== GET CACHED RESPONSE REQUEST START =====")
303
- logger.info(f"[GET-RESPONSE] Requested hash: '{prompt_hash}'")
304
- logger.info(f"[GET-RESPONSE] Hash length: {len(prompt_hash)} characters")
305
- logger.info(f"[GET-RESPONSE] Current response cache size: {len(response_cache)} entries")
306
- logger.info(f"[GET-RESPONSE] Current cache memory usage: {sum(len(v['response']) for v in response_cache.values())} characters")
307
- logger.info(f"[GET-RESPONSE] Available hashes: {list(response_cache.keys())[:10]}{'...' if len(response_cache) > 10 else ''}")
308
-
309
  start_time = time.time()
310
 
311
  try:
@@ -314,7 +352,7 @@ def get_cached_response(prompt_hash: str) -> str:
314
 
315
  if cached_data is not None:
316
  response = cached_data["response"]
317
- age_seconds = round(time.time() - cached_data["timestamp"], 2)
318
 
319
  result = {
320
  "success": True,
@@ -328,17 +366,10 @@ def get_cached_response(prompt_hash: str) -> str:
328
  "timestamp": datetime.datetime.now(pytz.UTC).isoformat(),
329
  "request_id": hashlib.md5(f"{prompt_hash}{time.time()}".encode()).hexdigest()[:8],
330
  "cache_hit": True,
331
- "cached_at": datetime.datetime.fromtimestamp(cached_data["timestamp"], pytz.UTC).isoformat()
332
  }
333
 
334
- logger.info(f"[GET-RESPONSE] Cache HIT - response found")
335
- logger.info(f"[GET-RESPONSE] Found hash: '{prompt_hash}'")
336
- logger.info(f"[GET-RESPONSE] Response length: {len(response)} characters")
337
- logger.info(f"[GET-RESPONSE] Response preview: '{response[:150]}{'...' if len(response) > 150 else ''}'")
338
- logger.info(f"[GET-RESPONSE] Response age: {age_seconds} seconds")
339
- logger.info(f"[GET-RESPONSE] Cached at: {result['cached_at']}")
340
- logger.info(f"[GET-RESPONSE] Processing time: {processing_time:.4f}s ({processing_time*1000:.2f}ms)")
341
- logger.info(f"[GET-RESPONSE] Request ID: {result['request_id']}")
342
  else:
343
  result = {
344
  "success": True,
@@ -352,21 +383,13 @@ def get_cached_response(prompt_hash: str) -> str:
352
  "cache_hit": False
353
  }
354
 
355
- logger.warning(f"[GET-RESPONSE] ⚠️ Cache MISS - response not found")
356
- logger.warning(f"[GET-RESPONSE] Missing hash: '{prompt_hash}'")
357
- logger.warning(f"[GET-RESPONSE] Processing time: {processing_time:.4f}s ({processing_time*1000:.2f}ms)")
358
- logger.warning(f"[GET-RESPONSE] Request ID: {result['request_id']}")
359
 
360
- logger.info(f"[GET-RESPONSE] ===== GET CACHED RESPONSE REQUEST END =====")
361
  return json.dumps(result, indent=2)
362
 
363
  except Exception as e:
364
  processing_time = time.time() - start_time
365
- logger.error(f"[GET-RESPONSE] ❌ Get cached response failed after {processing_time:.4f}s: {e}")
366
- logger.error(f"[GET-RESPONSE] Error type: {type(e).__name__}")
367
- logger.error(f"[GET-RESPONSE] Error details: {str(e)}")
368
- logger.error(f"[GET-RESPONSE] Hash that caused error: '{prompt_hash}'")
369
- logger.error(f"[GET-RESPONSE] ===== GET CACHED RESPONSE REQUEST END (ERROR) =====")
370
 
371
  return json.dumps({
372
  "success": False,
@@ -461,90 +484,6 @@ def calculate_token_cost(username: str, duration_ms: float) -> str:
461
  return json.dumps({
462
  "success": False,
463
  "error": str(e),
464
- "error_type": type(e).__name__,
465
- "processing_time_ms": round(processing_time * 1000, 2),
466
- "timestamp": datetime.datetime.now(pytz.UTC).isoformat()
467
- }, indent=2)
468
-
469
- def get_cache_stats() -> str:
470
- """Get cache statistics with extremely detailed logging"""
471
- logger.info(f"[CACHE-STATS] ===== CACHE STATS REQUEST START =====")
472
- logger.info(f"[CACHE-STATS] Current prompt cache size: {len(prompt_cache)} entries")
473
- logger.info(f"[CACHE-STATS] Current response cache size: {len(response_cache)} entries")
474
- logger.info(f"[CACHE-STATS] Current users tracked: {len(token_ledger)}")
475
- logger.info(f"[CACHE-STATS] Prompt cache memory usage: {sum(len(str(v)) for v in prompt_cache.values())} characters")
476
- logger.info(f"[CACHE-STATS] Response cache memory usage: {sum(len(v['response']) for v in response_cache.values())} characters")
477
- logger.info(f"[CACHE-STATS] Total requests processed: {sum(u['requests'] for u in token_ledger.values())}")
478
-
479
- start_time = time.time()
480
-
481
- try:
482
- # Calculate detailed statistics
483
- total_prompt_memory = sum(len(str(v)) for v in prompt_cache.values())
484
- total_response_memory = sum(len(v['response']) for v in response_cache.values())
485
- total_requests = sum(u['requests'] for u in token_ledger.values())
486
- total_tokens = sum(u['total_cost'] for u in token_ledger.values())
487
- total_duration = sum(u['total_duration_ms'] for u in token_ledger.values())
488
-
489
- # User statistics
490
- active_users = len([u for u in token_ledger.values() if time.time() - u.get('last_seen', u.get('first_seen', 0)) < 3600])
491
- avg_requests_per_user = total_requests / len(token_ledger) if len(token_ledger) > 0 else 0
492
- avg_tokens_per_user = total_tokens / len(token_ledger) if len(token_ledger) > 0 else 0
493
-
494
- processing_time = time.time() - start_time
495
-
496
- result = {
497
- "success": True,
498
- "prompt_cache_size": len(prompt_cache),
499
- "response_cache_size": len(response_cache),
500
- "users_tracked": len(token_ledger),
501
- "active_users_last_hour": active_users,
502
- "total_requests": total_requests,
503
- "total_tokens_spent": round(total_tokens, 4),
504
- "total_duration_ms": round(total_duration, 2),
505
- "avg_requests_per_user": round(avg_requests_per_user, 2),
506
- "avg_tokens_per_user": round(avg_tokens_per_user, 4),
507
- "prompt_cache_memory_bytes": total_prompt_memory,
508
- "response_cache_memory_bytes": total_response_memory,
509
- "total_cache_memory_bytes": total_prompt_memory + total_response_memory,
510
- "processing_time_ms": round(processing_time * 1000, 2),
511
- "timestamp": datetime.datetime.now(pytz.UTC).isoformat(),
512
- "request_id": hashlib.md5(f"stats{time.time()}".encode()).hexdigest()[:8]
513
- }
514
-
515
- logger.info(f"[CACHE-STATS] ✅ Cache statistics retrieved successfully")
516
- logger.info(f"[CACHE-STATS] Prompt cache: {len(prompt_cache)} entries ({total_prompt_memory} chars)")
517
- logger.info(f"[CACHE-STATS] Response cache: {len(response_cache)} entries ({total_response_memory} chars)")
518
- logger.info(f"[CACHE-STATS] Users tracked: {len(token_ledger)} ({active_users} active last hour)")
519
- logger.info(f"[CACHE-STATS] Total requests: {total_requests}")
520
- logger.info(f"[CACHE-STATS] Total tokens spent: {total_tokens}")
521
- logger.info(f"[CACHE-STATS] Total duration: {total_duration}ms")
522
- logger.info(f"[CACHE-STATS] Avg requests per user: {avg_requests_per_user}")
523
- logger.info(f"[CACHE-STATS] Avg tokens per user: {avg_tokens_per_user}")
524
- logger.info(f"[CACHE-STATS] Processing time: {processing_time:.4f}s ({processing_time*1000:.2f}ms)")
525
- logger.info(f"[CACHE-STATS] Request ID: {result['request_id']}")
526
- logger.info(f"[CACHE-STATS] ===== CACHE STATS REQUEST END =====")
527
-
528
- return json.dumps(result, indent=2)
529
-
530
- except Exception as e:
531
- processing_time = time.time() - start_time
532
- logger.error(f"[CACHE-STATS] ❌ Cache statistics retrieval failed after {processing_time:.4f}s: {e}")
533
- logger.error(f"[CACHE-STATS] Error type: {type(e).__name__}")
534
- logger.error(f"[CACHE-STATS] Error details: {str(e)}")
535
- logger.error(f"[CACHE-STATS] ===== CACHE STATS REQUEST END (ERROR) =====")
536
-
537
- return json.dumps({
538
- "success": False,
539
- "error": str(e),
540
- "error_type": type(e).__name__,
541
- "processing_time_ms": round(processing_time * 1000, 2),
542
- "timestamp": datetime.datetime.now(pytz.UTC).isoformat()
543
- }, indent=2)
544
-
545
- def get_backend_health() -> str:
546
- """Get backend health status with extremely detailed logging"""
547
- logger.info(f"[BACKEND-HEALTH] ===== BACKEND HEALTH REQUEST START =====")
548
  logger.info(f"[BACKEND-HEALTH] Checking backend health status...")
549
  logger.info(f"[BACKEND-HEALTH] Current prompt cache size: {len(prompt_cache)} entries")
550
  logger.info(f"[BACKEND-HEALTH] Current response cache size: {len(response_cache)} entries")
 
5
  import logging
6
  import datetime
7
  import pytz
8
+ import psutil
9
+ import threading
10
+ import gc
11
  from typing import Dict, Optional
12
+ from functools import lru_cache
13
+ import concurrent.futures
14
+ import os
15
 
16
  # Initialize logging for backend
17
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - BACKEND - %(message)s', force=True)
 
22
  warnings.filterwarnings("ignore", category=RuntimeWarning, message=".*asyncio.*")
23
 
24
  # ============================================================================
25
+ # ZEROENGINE-BACKEND: Background Processing Service - SPEED OPTIMIZED
26
  # ============================================================================
27
  # This space handles:
28
  # - Tokenization pre-processing
 
31
  # - Response caching
32
  # ============================================================================
33
 
34
+ # SPEED OPTIMIZATIONS: Larger caches with 16GB RAM available
35
+ MAX_PROMPT_CACHE_SIZE = 50000 # Increased from default
36
+ MAX_RESPONSE_CACHE_SIZE = 10000 # Increased from default
37
+ MAX_TOKEN_LEDGER_SIZE = 10000 # Increased from default
38
+
39
+ # HARD-CODED: Hugging Face Space RAM limits (same as main app)
40
+ TOTAL_RAM_GB = 18.0 # HARD-CODED: 18GB total for container
41
+ USABLE_RAM_GB = 16.0 # HARD-CODED: 16GB usable for backend (2GB reserved)
42
+
43
+ # In-memory caches with optimized data structures
44
  prompt_cache = {}
45
  response_cache = {}
46
  token_ledger = {}
47
  backend_start_time = time.time()
48
 
49
+ # Performance tracking
50
+ performance_stats = {
51
+ "total_requests": 0,
52
+ "cache_hits": 0,
53
+ "cache_misses": 0,
54
+ "avg_response_time": 0.0,
55
+ "memory_usage_mb": 0.0
56
+ }
57
+
58
+ # Background cleanup thread
59
+ cleanup_thread_running = True
60
+
61
+ def background_cleanup():
62
+ """Background thread for cache management and optimization"""
63
+ while cleanup_thread_running:
64
+ try:
65
+ # Clean up old entries every 5 minutes
66
+ time.sleep(300)
67
+
68
+ current_time = time.time()
69
+
70
+ # Clean old prompt cache entries (older than 1 hour)
71
+ old_prompt_keys = [
72
+ key for key, data in prompt_cache.items()
73
+ if current_time - data.get("cached_at", 0) > 3600
74
+ ]
75
+ for key in old_prompt_keys[:100]: # Limit cleanup batch size
76
+ del prompt_cache[key]
77
+
78
+ # Clean old response cache entries (older than 2 hours)
79
+ old_response_keys = [
80
+ key for key, data in response_cache.items()
81
+ if current_time - data.get("cached_at", 0) > 7200
82
+ ]
83
+ for key in old_response_keys[:50]: # Limit cleanup batch size
84
+ del response_cache[key]
85
+
86
+ # Force garbage collection
87
+ gc.collect()
88
+
89
+ logger.info(f"[CLEANUP] Removed {len(old_prompt_keys)} old prompts, {len(old_response_keys)} old responses")
90
+
91
+ except Exception as e:
92
+ logger.error(f"[CLEANUP] Background cleanup error: {e}")
93
+
94
+ # Start background cleanup thread
95
+ cleanup_thread = threading.Thread(target=background_cleanup, daemon=True)
96
+ cleanup_thread.start()
97
+ logger.info("[INIT] Background cleanup thread started")
98
+
99
+ # Log hard-coded RAM configuration
100
+ logger.info(f"[RAM] HARD-CODED: Total: {TOTAL_RAM_GB:.1f}GB, Usable: {USABLE_RAM_GB:.1f}GB (Hugging Face Space)")
101
+ logger.info(f"[RAM] (Ignoring host system memory - using container limits)")
102
+
103
+ @lru_cache(maxsize=10000)
104
+ def fast_hash(text: str) -> str:
105
+ """Fast hashing function with LRU cache"""
106
+ return hashlib.md5(text.encode()).hexdigest()
107
+
108
+ def get_memory_usage() -> float:
109
+ """Get current memory usage in MB"""
110
+ try:
111
+ return psutil.Process().memory_info().rss / 1024 / 1024
112
+ except:
113
+ return 0.0
114
+
115
  def tokenize_text(text: str) -> str:
116
+ """SPEED-OPTIMIZED tokenization with fast caching"""
 
 
 
 
 
117
  start_time = time.time()
118
 
119
+ # Update performance stats
120
+ performance_stats["total_requests"] += 1
121
+
122
  try:
123
+ # Check cache first for instant response
124
+ text_hash = fast_hash(text)[:16]
125
+ cached_result = prompt_cache.get(text_hash)
126
+
127
+ if cached_result:
128
+ performance_stats["cache_hits"] += 1
129
+ processing_time = time.time() - start_time
130
+
131
+ result = {
132
+ "success": True,
133
+ "estimated_tokens": cached_result["tokens"],
134
+ "processing_time_ms": round(processing_time * 1000, 2),
135
+ "text_length": len(text),
136
+ "word_count": len(text.split()),
137
+ "char_count": len(text),
138
+ "timestamp": datetime.datetime.now(pytz.UTC).isoformat(),
139
+ "request_id": hashlib.md5(f"{text}{time.time()}".encode()).hexdigest()[:8],
140
+ "cache_hit": True
141
+ }
142
+
143
+ logger.info(f"[TOKENIZE] ⚡ CACHE HIT: {cached_result['tokens']} tokens in {processing_time*1000:.1f}ms")
144
+ return json.dumps(result, indent=2)
145
+
146
+ # Cache miss - calculate tokens
147
+ performance_stats["cache_misses"] += 1
148
+
149
+ # OPTIMIZED: Faster token estimation algorithm
150
+ words = text.split()
151
+ word_count = len(words)
152
+ char_count = len(text)
153
+
154
+ # More accurate token estimation based on patterns
155
+ estimated_tokens = word_count + (char_count // 4) + (len([w for w in words if len(w) > 8]) * 2)
156
+
157
  processing_time = time.time() - start_time
158
 
159
  result = {
 
161
  "estimated_tokens": estimated_tokens,
162
  "processing_time_ms": round(processing_time * 1000, 2),
163
  "text_length": len(text),
164
+ "word_count": word_count,
165
+ "char_count": char_count,
166
  "timestamp": datetime.datetime.now(pytz.UTC).isoformat(),
167
+ "request_id": hashlib.md5(f"{text}{time.time()}".encode()).hexdigest()[:8],
168
+ "cache_hit": False
169
  }
170
 
171
+ # Cache the result for future requests
 
 
 
 
 
 
 
 
 
 
172
  prompt_cache[text_hash] = {
173
  "text": text[:100] + "..." if len(text) > 100 else text,
174
  "tokens": estimated_tokens,
175
  "cached_at": time.time()
176
  }
177
 
178
+ # Limit cache size with LRU eviction
179
+ if len(prompt_cache) > MAX_PROMPT_CACHE_SIZE:
180
+ oldest_key = min(prompt_cache.keys(), key=lambda k: prompt_cache[k]["cached_at"])
181
+ del prompt_cache[oldest_key]
182
 
183
+ logger.info(f"[TOKENIZE] ✅ CALCULATED: {estimated_tokens} tokens in {processing_time*1000:.1f}ms")
184
  return json.dumps(result, indent=2)
185
 
186
  except Exception as e:
187
  processing_time = time.time() - start_time
188
+ logger.error(f"[TOKENIZE] ❌ Failed after {processing_time*1000:.1f}ms: {e}")
 
 
 
 
189
 
190
  return json.dumps({
191
  "success": False,
 
196
  }, indent=2)
197
 
198
  def cache_prompt(key: str, value: str) -> str:
199
+ """SPEED-OPTIMIZED prompt caching with larger limits"""
 
 
 
 
 
 
 
 
 
 
200
  start_time = time.time()
201
 
202
  try:
203
+ # Use fast hash for key
204
+ cache_key = fast_hash(key) if len(key) > 32 else key
205
+
206
+ prompt_cache[cache_key] = {
207
  "value": value,
208
+ "cached_at": time.time()
209
  }
210
 
211
+ # Limit cache size with optimized eviction
212
+ if len(prompt_cache) > MAX_PROMPT_CACHE_SIZE:
213
+ # Batch remove oldest 1000 entries for efficiency
214
+ oldest_keys = sorted(prompt_cache.keys(),
215
+ key=lambda k: prompt_cache[k]["cached_at"])[:1000]
216
+ for old_key in oldest_keys:
217
+ del prompt_cache[old_key]
218
 
219
+ processing_time = time.time() - start_time
 
 
 
 
220
 
221
  result = {
222
  "success": True,
223
+ "key": cache_key,
224
  "value_length": len(value),
225
  "cache_size": len(prompt_cache),
226
  "processing_time_ms": round(processing_time * 1000, 2),
227
  "timestamp": datetime.datetime.now(pytz.UTC).isoformat(),
228
+ "request_id": hashlib.md5(f"{cache_key}{time.time()}".encode()).hexdigest()[:8]
229
  }
230
 
231
+ logger.info(f"[CACHE-PROMPT] Stored: {len(value)} chars in {processing_time*1000:.1f}ms")
 
 
 
 
 
 
 
232
  return json.dumps(result, indent=2)
233
 
234
  except Exception as e:
235
  processing_time = time.time() - start_time
236
+ logger.error(f"[CACHE-PROMPT] ❌ Failed after {processing_time*1000:.1f}ms: {e}")
 
 
 
 
 
237
 
238
  return json.dumps({
239
  "success": False,
 
244
  }, indent=2)
245
 
246
  def get_cached_prompt(key: str) -> str:
247
+ """SPEED-OPTIMIZED prompt retrieval"""
 
 
 
 
 
 
 
 
248
  start_time = time.time()
249
 
250
  try:
251
+ # Use fast hash for key
252
+ cache_key = fast_hash(key) if len(key) > 32 else key
253
+ cached_value = prompt_cache.get(cache_key)
254
  processing_time = time.time() - start_time
255
 
256
  if cached_value is not None:
257
  result = {
258
  "success": True,
259
  "found": True,
260
+ "key": cache_key,
261
+ "value": cached_value["value"],
262
+ "value_length": len(cached_value["value"]),
263
  "cache_size": len(prompt_cache),
264
  "processing_time_ms": round(processing_time * 1000, 2),
265
  "timestamp": datetime.datetime.now(pytz.UTC).isoformat(),
266
+ "request_id": hashlib.md5(f"{cache_key}{time.time()}".encode()).hexdigest()[:8],
267
  "cache_hit": True
268
  }
269
 
270
+ logger.info(f"[GET-PROMPT] HIT: {len(cached_value['value'])} chars in {processing_time*1000:.1f}ms")
 
 
 
 
 
271
  else:
272
  result = {
273
  "success": True,
274
  "found": False,
275
+ "key": cache_key,
276
  "value": None,
277
  "cache_size": len(prompt_cache),
278
  "processing_time_ms": round(processing_time * 1000, 2),
279
  "timestamp": datetime.datetime.now(pytz.UTC).isoformat(),
280
+ "request_id": hashlib.md5(f"{cache_key}{time.time()}".encode()).hexdigest()[:8],
281
  "cache_hit": False
282
  }
283
 
284
+ logger.info(f"[GET-PROMPT] ⚠️ MISS: {cache_key} in {processing_time*1000:.1f}ms")
 
 
 
285
 
 
286
  return json.dumps(result, indent=2)
287
 
288
  except Exception as e:
289
  processing_time = time.time() - start_time
290
+ logger.error(f"[GET-PROMPT] ❌ Failed after {processing_time*1000:.1f}ms: {e}")
 
 
 
 
291
 
292
  return json.dumps({
293
  "success": False,
 
298
  }, indent=2)
299
 
300
  def cache_response(prompt_hash: str, response: str) -> str:
301
+ """SPEED-OPTIMIZED response caching with larger limits"""
 
 
 
 
 
 
 
 
 
302
  start_time = time.time()
303
 
304
  try:
305
  response_cache[prompt_hash] = {
306
  "response": response,
307
+ "cached_at": time.time()
308
  }
309
 
310
+ # Limit cache size with optimized eviction
311
+ if len(response_cache) > MAX_RESPONSE_CACHE_SIZE:
312
+ # Batch remove oldest 500 entries for efficiency
313
+ oldest_keys = sorted(response_cache.keys(),
314
+ key=lambda k: response_cache[k]["cached_at"])[:500]
315
+ for old_key in oldest_keys:
316
+ del response_cache[old_key]
317
 
318
+ processing_time = time.time() - start_time
 
 
 
 
319
 
320
  result = {
321
  "success": True,
 
324
  "cache_size": len(response_cache),
325
  "processing_time_ms": round(processing_time * 1000, 2),
326
  "timestamp": datetime.datetime.now(pytz.UTC).isoformat(),
327
+ "request_id": hashlib.md5(f"{prompt_hash}{time.time()}".encode()).hexdigest()[:8]
 
328
  }
329
 
330
+ logger.info(f"[CACHE-RESPONSE] Stored: {len(response)} chars in {processing_time*1000:.1f}ms")
 
 
 
 
 
 
 
 
331
  return json.dumps(result, indent=2)
332
 
333
  except Exception as e:
334
  processing_time = time.time() - start_time
335
+ logger.error(f"[CACHE-RESPONSE] ❌ Failed after {processing_time*1000:.1f}ms: {e}")
 
 
 
 
 
336
 
337
  return json.dumps({
338
  "success": False,
 
343
  }, indent=2)
344
 
345
  def get_cached_response(prompt_hash: str) -> str:
346
+ """SPEED-OPTIMIZED response retrieval"""
 
 
 
 
 
 
 
347
  start_time = time.time()
348
 
349
  try:
 
352
 
353
  if cached_data is not None:
354
  response = cached_data["response"]
355
+ age_seconds = round(time.time() - cached_data["cached_at"], 2)
356
 
357
  result = {
358
  "success": True,
 
366
  "timestamp": datetime.datetime.now(pytz.UTC).isoformat(),
367
  "request_id": hashlib.md5(f"{prompt_hash}{time.time()}".encode()).hexdigest()[:8],
368
  "cache_hit": True,
369
+ "cached_at": datetime.datetime.fromtimestamp(cached_data["cached_at"], pytz.UTC).isoformat()
370
  }
371
 
372
+ logger.info(f"[GET-RESPONSE] HIT: {len(response)} chars in {processing_time*1000:.1f}ms")
 
 
 
 
 
 
 
373
  else:
374
  result = {
375
  "success": True,
 
383
  "cache_hit": False
384
  }
385
 
386
+ logger.info(f"[GET-RESPONSE] ⚠️ MISS: {prompt_hash} in {processing_time*1000:.1f}ms")
 
 
 
387
 
 
388
  return json.dumps(result, indent=2)
389
 
390
  except Exception as e:
391
  processing_time = time.time() - start_time
392
+ logger.error(f"[GET-RESPONSE] ❌ Failed after {processing_time*1000:.1f}ms: {e}")
 
 
 
 
393
 
394
  return json.dumps({
395
  "success": False,
 
484
  return json.dumps({
485
  "success": False,
486
  "error": str(e),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
487
  logger.info(f"[BACKEND-HEALTH] Checking backend health status...")
488
  logger.info(f"[BACKEND-HEALTH] Current prompt cache size: {len(prompt_cache)} entries")
489
  logger.info(f"[BACKEND-HEALTH] Current response cache size: {len(response_cache)} entries")