turtle170 commited on
Commit
5c7aeaa
·
verified ·
1 Parent(s): d4152d0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +469 -118
app.py CHANGED
@@ -3,6 +3,8 @@ import json
3
  import time
4
  import hashlib
5
  import logging
 
 
6
  from typing import Dict, Optional
7
 
8
  # Initialize logging for backend
@@ -27,250 +29,593 @@ warnings.filterwarnings("ignore", category=RuntimeWarning, message=".*asyncio.*"
27
  prompt_cache = {}
28
  response_cache = {}
29
  token_ledger = {}
 
30
 
31
  def tokenize_text(text: str) -> str:
32
- """
33
- Fast tokenization without loading full model
34
- Returns: JSON string with token count estimation
35
- """
 
 
 
 
36
  try:
37
- logger.info(f"[TOKENIZE] Processing text of length {len(text)}")
38
-
39
- # Simple estimation (4 chars ≈ 1 token for English)
40
- # This is FAST and good enough for pre-processing
41
- estimated_tokens = len(text) // 4
42
- word_count = len(text.split())
43
-
44
- # Create cache key
45
- text_hash = hashlib.md5(text.encode()).hexdigest()[:16]
46
 
47
  result = {
48
  "success": True,
49
- "text_hash": text_hash,
50
  "estimated_tokens": estimated_tokens,
51
- "word_count": word_count,
 
 
52
  "char_count": len(text),
53
- "timestamp": time.time()
 
54
  }
55
 
56
- # Cache this tokenization
 
 
 
 
 
 
 
 
 
 
57
  prompt_cache[text_hash] = {
58
  "text": text[:100] + "..." if len(text) > 100 else text,
59
  "tokens": estimated_tokens,
60
  "cached_at": time.time()
61
  }
62
 
63
- logger.info(f"[TOKENIZE] Estimated {estimated_tokens} tokens, cached as {text_hash}")
 
64
  return json.dumps(result, indent=2)
65
 
66
  except Exception as e:
67
- logger.error(f"[TOKENIZE] Error: {e}")
 
 
 
 
 
 
68
  return json.dumps({
69
  "success": False,
70
- "error": str(e)
 
 
 
71
  }, indent=2)
72
 
73
  def cache_prompt(key: str, value: str) -> str:
74
- """
75
- Store prompt in cache with timestamp
76
- """
 
 
 
 
 
 
 
 
 
 
77
  try:
78
- logger.info(f"[CACHE-PROMPT] Storing key: {key} (value length: {len(value)})")
79
-
80
  prompt_cache[key] = {
81
  "value": value,
82
  "timestamp": time.time()
83
  }
84
 
 
 
85
  # Limit cache size to 100 entries
86
  if len(prompt_cache) > 100:
87
  oldest_key = min(prompt_cache.keys(), key=lambda k: prompt_cache[k]["timestamp"])
88
  del prompt_cache[oldest_key]
89
  logger.info(f"[CACHE-PROMPT] Removed oldest entry: {oldest_key}")
90
 
91
- logger.info(f"[CACHE-PROMPT] Stored successfully. Cache size: {len(prompt_cache)}")
92
- return json.dumps({
93
  "success": True,
94
- "cached": key,
95
- "cache_size": len(prompt_cache)
96
- }, indent=2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
  except Exception as e:
99
- logger.error(f"[CACHE-PROMPT] Error: {e}")
 
 
 
 
 
 
 
100
  return json.dumps({
101
  "success": False,
102
- "error": str(e)
 
 
 
103
  }, indent=2)
104
 
105
  def get_cached_prompt(key: str) -> str:
106
- """
107
- Retrieve cached prompt
108
- """
 
 
 
 
 
 
 
 
109
  try:
110
- if key in prompt_cache:
111
- data = prompt_cache[key]
112
- logger.info(f"[CACHE-PROMPT] Retrieved key: {key} (value length: {len(data['value'])})")
113
- return json.dumps({
 
114
  "success": True,
115
- "value": data["value"],
116
- "age_seconds": round(time.time() - data["timestamp"], 2)
117
- }, indent=2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
 
119
- logger.info(f"[CACHE-PROMPT] Key not found: {key}")
120
- return json.dumps({
121
- "success": False,
122
- "error": "Cache key not found"
123
- }, indent=2)
124
 
125
  except Exception as e:
126
- logger.error(f"[CACHE-PROMPT] Error: {e}")
 
 
 
 
 
 
127
  return json.dumps({
128
  "success": False,
129
- "error": str(e)
130
- }, indent=2)
131
-
132
  def cache_response(prompt_hash: str, response: str) -> str:
133
- """
134
- Cache a complete response for instant retrieval
135
- """
 
 
 
 
 
 
 
 
 
136
  try:
137
- logger.info(f"[CACHE-RESPONSE] Storing prompt hash: {prompt_hash} (response length: {len(response)})")
138
-
139
  response_cache[prompt_hash] = {
140
  "response": response,
141
  "timestamp": time.time()
142
  }
143
 
144
- # Limit cache size
 
 
145
  if len(response_cache) > 50:
146
  oldest_key = min(response_cache.keys(), key=lambda k: response_cache[k]["timestamp"])
147
  del response_cache[oldest_key]
148
  logger.info(f"[CACHE-RESPONSE] Removed oldest entry: {oldest_key}")
149
 
150
- logger.info(f"[CACHE-RESPONSE] Stored successfully. Cache size: {len(response_cache)}")
151
- return json.dumps({
152
  "success": True,
153
- "cached": prompt_hash,
154
- "cache_size": len(response_cache)
155
- }, indent=2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
 
157
  except Exception as e:
158
- logger.error(f"[CACHE-RESPONSE] Error: {e}")
 
 
 
 
 
 
 
159
  return json.dumps({
160
  "success": False,
161
- "error": str(e)
 
 
 
162
  }, indent=2)
163
 
164
  def get_cached_response(prompt_hash: str) -> str:
165
- """
166
- Retrieve cached response
167
- """
 
 
 
 
 
 
 
168
  try:
169
- if prompt_hash in response_cache:
170
- data = response_cache[prompt_hash]
171
- logger.info(f"[CACHE-RESPONSE] Retrieved prompt hash: {prompt_hash} (response length: {len(data['response'])})")
172
- return json.dumps({
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
  "success": True,
174
- "response": data["response"],
175
- "age_seconds": round(time.time() - data["timestamp"], 2)
176
- }, indent=2)
 
 
 
 
 
 
 
 
 
 
 
177
 
178
- logger.info(f"[CACHE-RESPONSE] Prompt hash not found: {prompt_hash}")
179
- return json.dumps({
180
- "success": False,
181
- "error": "Response not cached"
182
- }, indent=2)
183
 
184
  except Exception as e:
185
- logger.error(f"[CACHE-RESPONSE] Error: {e}")
 
 
 
 
 
 
186
  return json.dumps({
187
  "success": False,
188
- "error": str(e)
 
 
 
189
  }, indent=2)
190
 
191
  def calculate_token_cost(username: str, duration_ms: float) -> str:
192
- """
193
- Calculate token cost for a user
194
- Stateless - just returns the calculation
195
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  try:
197
- logger.info(f"[TOKEN-COST] Calculating cost for user: {username} (duration: {duration_ms}ms)")
198
-
199
  cost = (duration_ms / 100.0) * 0.001 # 0.001 tokens per 100ms
 
200
 
201
  # Track in ledger (for analytics)
202
  if username not in token_ledger:
203
  token_ledger[username] = {
204
  "total_cost": 0.0,
205
  "total_duration_ms": 0.0,
206
- "requests": 0
 
 
207
  }
208
 
 
209
  token_ledger[username]["total_cost"] += cost
210
  token_ledger[username]["total_duration_ms"] += duration_ms
211
  token_ledger[username]["requests"] += 1
 
212
 
213
- logger.info(f"[TOKEN-COST] Calculated cost: {cost} tokens (total: {token_ledger[username]['total_cost']})")
214
- return json.dumps({
 
 
 
 
215
  "success": True,
216
  "username": username,
217
  "duration_ms": duration_ms,
218
  "cost": round(cost, 6),
219
- "total_cost": round(token_ledger[username]["total_cost"], 4),
220
- "total_requests": token_ledger[username]["requests"]
221
- }, indent=2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
 
223
  except Exception as e:
224
- logger.error(f"[TOKEN-COST] Error: {e}")
 
 
 
 
 
 
 
225
  return json.dumps({
226
  "success": False,
227
- "error": str(e)
 
 
 
228
  }, indent=2)
229
 
230
  def get_cache_stats() -> str:
231
- """
232
- Get statistics about cache usage
233
- """
 
 
 
 
 
 
 
 
234
  try:
235
- logger.info("[CACHE-STATS] Retrieving cache statistics")
 
 
 
 
 
236
 
237
- return json.dumps({
 
 
 
 
 
 
 
238
  "success": True,
239
  "prompt_cache_size": len(prompt_cache),
240
  "response_cache_size": len(response_cache),
241
  "users_tracked": len(token_ledger),
242
- "total_requests": sum(u["requests"] for u in token_ledger.values()),
243
- "timestamp": time.time()
244
- }, indent=2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
 
246
  except Exception as e:
247
- logger.error(f"[CACHE-STATS] Error: {e}")
 
 
 
 
 
248
  return json.dumps({
249
  "success": False,
250
- "error": str(e)
 
 
 
251
  }, indent=2)
252
 
253
  def get_backend_health() -> str:
254
- """
255
- Get backend health status for monitoring
256
- """
 
 
 
 
 
 
 
257
  try:
258
- logger.info("[BACKEND-HEALTH] Retrieving backend health status")
 
 
 
259
 
260
- return json.dumps({
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
261
  "success": True,
262
- "status": "healthy",
263
- "cache_size": len(prompt_cache) + len(response_cache),
 
 
 
264
  "users_tracked": len(token_ledger),
265
- "total_requests": sum(u["requests"] for u in token_ledger.values()),
266
- "timestamp": time.time()
267
- }, indent=2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268
 
269
  except Exception as e:
270
- logger.error(f"[BACKEND-HEALTH] Error: {e}")
 
 
 
 
 
271
  return json.dumps({
272
  "success": False,
273
- "error": str(e)
 
 
 
 
274
  }, indent=2)
275
 
276
  # ============================================================================
@@ -393,8 +738,14 @@ if __name__ == "__main__":
393
  signal.signal(signal.SIGTERM, signal_handler)
394
  signal.signal(signal.SIGINT, signal_handler)
395
 
396
- logger.info("[INIT] ZeroEngine-Backend starting up...")
 
 
 
 
397
  logger.info(f"[INIT] Cache sizes - Prompt: {len(prompt_cache)}, Response: {len(response_cache)}")
398
  logger.info(f"[INIT] Users tracked: {len(token_ledger)}")
 
 
399
 
400
  demo.launch(server_name="0.0.0.0", server_port=7861, ssr_mode=False)
 
3
  import time
4
  import hashlib
5
  import logging
6
+ import datetime
7
+ import pytz
8
  from typing import Dict, Optional
9
 
10
  # Initialize logging for backend
 
29
  prompt_cache = {}
30
  response_cache = {}
31
  token_ledger = {}
32
+ backend_start_time = time.time()
33
 
34
  def tokenize_text(text: str) -> str:
35
+ """Enhanced tokenization with extremely detailed logging"""
36
+ logger.info(f"[TOKENIZE] ===== TOKENIZE REQUEST START =====")
37
+ logger.info(f"[TOKENIZE] Input text length: {len(text)} characters")
38
+ logger.info(f"[TOKENIZE] Input text preview: '{text[:100]}{'...' if len(text) > 100 else ''}'")
39
+ logger.info(f"[TOKENIZE] Input text hash: {hashlib.md5(text.encode()).hexdigest()[:16]}")
40
+
41
+ start_time = time.time()
42
+
43
  try:
44
+ # Simple character-based estimation (can be enhanced with proper tokenizer)
45
+ estimated_tokens = len(text.split()) + len(text) // 4
46
+ processing_time = time.time() - start_time
 
 
 
 
 
 
47
 
48
  result = {
49
  "success": True,
 
50
  "estimated_tokens": estimated_tokens,
51
+ "processing_time_ms": round(processing_time * 1000, 2),
52
+ "text_length": len(text),
53
+ "word_count": len(text.split()),
54
  "char_count": len(text),
55
+ "timestamp": datetime.datetime.now(pytz.UTC).isoformat(),
56
+ "request_id": hashlib.md5(f"{text}{time.time()}".encode()).hexdigest()[:8]
57
  }
58
 
59
+ logger.info(f"[TOKENIZE] Tokenization completed successfully")
60
+ logger.info(f"[TOKENIZE] Estimated tokens: {estimated_tokens}")
61
+ logger.info(f"[TOKENIZE] Processing time: {processing_time:.4f}s ({processing_time*1000:.2f}ms)")
62
+ logger.info(f"[TOKENIZE] Word count: {len(text.split())}")
63
+ logger.info(f"[TOKENIZE] Character count: {len(text)}")
64
+ logger.info(f"[TOKENIZE] Request ID: {result['request_id']}")
65
+ logger.info(f"[TOKENIZE] ===== TOKENIZE REQUEST END =====")
66
+
67
+ # Create cache key
68
+ text_hash = hashlib.md5(text.encode()).hexdigest()[:16]
69
+
70
  prompt_cache[text_hash] = {
71
  "text": text[:100] + "..." if len(text) > 100 else text,
72
  "tokens": estimated_tokens,
73
  "cached_at": time.time()
74
  }
75
 
76
+ logger.info(f"[TOKENIZE] Cached tokenization result for key: {text_hash}")
77
+
78
  return json.dumps(result, indent=2)
79
 
80
  except Exception as e:
81
+ processing_time = time.time() - start_time
82
+ logger.error(f"[TOKENIZE] ❌ Tokenization failed after {processing_time:.4f}s: {e}")
83
+ logger.error(f"[TOKENIZE] Error type: {type(e).__name__}")
84
+ logger.error(f"[TOKENIZE] Error details: {str(e)}")
85
+ logger.error(f"[TOKENIZE] Input text that caused error: '{text[:200]}{'...' if len(text) > 200 else ''}'")
86
+ logger.error(f"[TOKENIZE] ===== TOKENIZE REQUEST END (ERROR) =====")
87
+
88
  return json.dumps({
89
  "success": False,
90
+ "error": str(e),
91
+ "error_type": type(e).__name__,
92
+ "processing_time_ms": round(processing_time * 1000, 2),
93
+ "timestamp": datetime.datetime.now(pytz.UTC).isoformat()
94
  }, indent=2)
95
 
96
  def cache_prompt(key: str, value: str) -> str:
97
+ """Store prompt in cache with extremely detailed logging"""
98
+ logger.info(f"[CACHE-PROMPT] ===== CACHE PROMPT REQUEST START =====")
99
+ logger.info(f"[CACHE-PROMPT] Requested key: '{key}'")
100
+ logger.info(f"[CACHE-PROMPT] Key length: {len(key)} characters")
101
+ logger.info(f"[CACHE-PROMPT] Key hash: {hashlib.md5(key.encode()).hexdigest()[:16]}")
102
+ logger.info(f"[CACHE-PROMPT] Value length: {len(value)} characters")
103
+ logger.info(f"[CACHE-PROMPT] Value preview: '{value[:100]}{'...' if len(value) > 100 else ''}'")
104
+ logger.info(f"[CACHE-PROMPT] Current cache size: {len(prompt_cache)} entries")
105
+ logger.info(f"[CACHE-PROMPT] Current cache memory usage: {sum(len(v) for v in prompt_cache.values())} characters")
106
+ logger.info(f"[CACHE-PROMPT] Available keys: {list(prompt_cache.keys())[:10]}{'...' if len(prompt_cache) > 10 else ''}")
107
+
108
+ start_time = time.time()
109
+
110
  try:
 
 
111
  prompt_cache[key] = {
112
  "value": value,
113
  "timestamp": time.time()
114
  }
115
 
116
+ processing_time = time.time() - start_time
117
+
118
  # Limit cache size to 100 entries
119
  if len(prompt_cache) > 100:
120
  oldest_key = min(prompt_cache.keys(), key=lambda k: prompt_cache[k]["timestamp"])
121
  del prompt_cache[oldest_key]
122
  logger.info(f"[CACHE-PROMPT] Removed oldest entry: {oldest_key}")
123
 
124
+ result = {
 
125
  "success": True,
126
+ "key": key,
127
+ "value_length": len(value),
128
+ "cache_size": len(prompt_cache),
129
+ "processing_time_ms": round(processing_time * 1000, 2),
130
+ "timestamp": datetime.datetime.now(pytz.UTC).isoformat(),
131
+ "request_id": hashlib.md5(f"{key}{time.time()}".encode()).hexdigest()[:8]
132
+ }
133
+
134
+ logger.info(f"[CACHE-PROMPT] ✅ Prompt cached successfully")
135
+ logger.info(f"[CACHE-PROMPT] Cached key: '{key}'")
136
+ logger.info(f"[CACHE-PROMPT] Value length: {len(value)} characters")
137
+ logger.info(f"[CACHE-PROMPT] Value preview: '{value[:100]}{'...' if len(value) > 100 else ''}'")
138
+ logger.info(f"[CACHE-PROMPT] Processing time: {processing_time:.4f}s ({processing_time*1000:.2f}ms)")
139
+ logger.info(f"[CACHE-PROMPT] Request ID: {result['request_id']}")
140
+ logger.info(f"[CACHE-PROMPT] ===== CACHE PROMPT REQUEST END =====")
141
+
142
+ return json.dumps(result, indent=2)
143
 
144
  except Exception as e:
145
+ processing_time = time.time() - start_time
146
+ logger.error(f"[CACHE-PROMPT] ❌ Cache prompt failed after {processing_time:.4f}s: {e}")
147
+ logger.error(f"[CACHE-PROMPT] Error type: {type(e).__name__}")
148
+ logger.error(f"[CACHE-PROMPT] Error details: {str(e)}")
149
+ logger.error(f"[CACHE-PROMPT] Key that caused error: '{key}'")
150
+ logger.error(f"[CACHE-PROMPT] Value that caused error: '{value[:200]}{'...' if len(value) > 200 else ''}'")
151
+ logger.error(f"[CACHE-PROMPT] ===== CACHE PROMPT REQUEST END (ERROR) =====")
152
+
153
  return json.dumps({
154
  "success": False,
155
+ "error": str(e),
156
+ "error_type": type(e).__name__,
157
+ "processing_time_ms": round(processing_time * 1000, 2),
158
+ "timestamp": datetime.datetime.now(pytz.UTC).isoformat()
159
  }, indent=2)
160
 
161
  def get_cached_prompt(key: str) -> str:
162
+ """Retrieve a cached prompt with extremely detailed logging"""
163
+ logger.info(f"[GET-PROMPT] ===== GET CACHED PROMPT REQUEST START =====")
164
+ logger.info(f"[GET-PROMPT] Requested key: '{key}'")
165
+ logger.info(f"[GET-PROMPT] Key length: {len(key)} characters")
166
+ logger.info(f"[GET-PROMPT] Key hash: {hashlib.md5(key.encode()).hexdigest()[:16]}")
167
+ logger.info(f"[GET-PROMPT] Current cache size: {len(prompt_cache)} entries")
168
+ logger.info(f"[GET-PROMPT] Current cache memory usage: {sum(len(v) for v in prompt_cache.values())} characters")
169
+ logger.info(f"[GET-PROMPT] Available keys: {list(prompt_cache.keys())[:10]}{'...' if len(prompt_cache) > 10 else ''}")
170
+
171
+ start_time = time.time()
172
+
173
  try:
174
+ cached_value = prompt_cache.get(key)
175
+ processing_time = time.time() - start_time
176
+
177
+ if cached_value is not None:
178
+ result = {
179
  "success": True,
180
+ "found": True,
181
+ "key": key,
182
+ "value": cached_value,
183
+ "value_length": len(cached_value),
184
+ "cache_size": len(prompt_cache),
185
+ "processing_time_ms": round(processing_time * 1000, 2),
186
+ "timestamp": datetime.datetime.now(pytz.UTC).isoformat(),
187
+ "request_id": hashlib.md5(f"{key}{time.time()}".encode()).hexdigest()[:8],
188
+ "cache_hit": True
189
+ }
190
+
191
+ logger.info(f"[GET-PROMPT] ✅ Cache HIT - prompt found")
192
+ logger.info(f"[GET-PROMPT] Found key: '{key}'")
193
+ logger.info(f"[GET-PROMPT] Value length: {len(cached_value)} characters")
194
+ logger.info(f"[GET-PROMPT] Value preview: '{cached_value[:100]}{'...' if len(cached_value) > 100 else ''}'")
195
+ logger.info(f"[GET-PROMPT] Processing time: {processing_time:.4f}s ({processing_time*1000:.2f}ms)")
196
+ logger.info(f"[GET-PROMPT] Request ID: {result['request_id']}")
197
+ else:
198
+ result = {
199
+ "success": True,
200
+ "found": False,
201
+ "key": key,
202
+ "value": None,
203
+ "cache_size": len(prompt_cache),
204
+ "processing_time_ms": round(processing_time * 1000, 2),
205
+ "timestamp": datetime.datetime.now(pytz.UTC).isoformat(),
206
+ "request_id": hashlib.md5(f"{key}{time.time()}".encode()).hexdigest()[:8],
207
+ "cache_hit": False
208
+ }
209
+
210
+ logger.warning(f"[GET-PROMPT] ⚠️ Cache MISS - prompt not found")
211
+ logger.warning(f"[GET-PROMPT] Missing key: '{key}'")
212
+ logger.warning(f"[GET-PROMPT] Processing time: {processing_time:.4f}s ({processing_time*1000:.2f}ms)")
213
+ logger.warning(f"[GET-PROMPT] Request ID: {result['request_id']}")
214
 
215
+ logger.info(f"[GET-PROMPT] ===== GET CACHED PROMPT REQUEST END =====")
216
+ return json.dumps(result, indent=2)
 
 
 
217
 
218
  except Exception as e:
219
+ processing_time = time.time() - start_time
220
+ logger.error(f"[GET-PROMPT] ❌ Get cached prompt failed after {processing_time:.4f}s: {e}")
221
+ logger.error(f"[GET-PROMPT] Error type: {type(e).__name__}")
222
+ logger.error(f"[GET-PROMPT] Error details: {str(e)}")
223
+ logger.error(f"[GET-PROMPT] Key that caused error: '{key}'")
224
+ logger.error(f"[GET-PROMPT] ===== GET CACHED PROMPT REQUEST END (ERROR) =====")
225
+
226
  return json.dumps({
227
  "success": False,
228
+ "error": str(e),
 
 
229
  def cache_response(prompt_hash: str, response: str) -> str:
230
+ """Cache a complete response with extremely detailed logging"""
231
+ logger.info(f"[CACHE-RESPONSE] ===== CACHE RESPONSE REQUEST START =====")
232
+ logger.info(f"[CACHE-RESPONSE] Prompt hash: '{prompt_hash}'")
233
+ logger.info(f"[CACHE-RESPONSE] Hash length: {len(prompt_hash)} characters")
234
+ logger.info(f"[CACHE-RESPONSE] Response length: {len(response)} characters")
235
+ logger.info(f"[CACHE-RESPONSE] Response preview: '{response[:150]}{'...' if len(response) > 150 else ''}'")
236
+ logger.info(f"[CACHE-RESPONSE] Current response cache size: {len(response_cache)} entries")
237
+ logger.info(f"[CACHE-RESPONSE] Current cache memory usage: {sum(len(v['response']) for v in response_cache.values())} characters")
238
+ logger.info(f"[CACHE-RESPONSE] Available hashes: {list(response_cache.keys())[:10]}{'...' if len(response_cache) > 10 else ''}")
239
+
240
+ start_time = time.time()
241
+
242
  try:
 
 
243
  response_cache[prompt_hash] = {
244
  "response": response,
245
  "timestamp": time.time()
246
  }
247
 
248
+ processing_time = time.time() - start_time
249
+
250
+ # Limit cache size to 50 entries
251
  if len(response_cache) > 50:
252
  oldest_key = min(response_cache.keys(), key=lambda k: response_cache[k]["timestamp"])
253
  del response_cache[oldest_key]
254
  logger.info(f"[CACHE-RESPONSE] Removed oldest entry: {oldest_key}")
255
 
256
+ result = {
 
257
  "success": True,
258
+ "cached_hash": prompt_hash,
259
+ "response_length": len(response),
260
+ "cache_size": len(response_cache),
261
+ "processing_time_ms": round(processing_time * 1000, 2),
262
+ "timestamp": datetime.datetime.now(pytz.UTC).isoformat(),
263
+ "request_id": hashlib.md5(f"{prompt_hash}{time.time()}".encode()).hexdigest()[:8],
264
+ "cache_memory_usage": sum(len(v['response']) for v in response_cache.values())
265
+ }
266
+
267
+ logger.info(f"[CACHE-RESPONSE] ✅ Response cached successfully")
268
+ logger.info(f"[CACHE-RESPONSE] Cached hash: '{prompt_hash}'")
269
+ logger.info(f"[CACHE-RESPONSE] Response length: {len(response)} characters")
270
+ logger.info(f"[CACHE-RESPONSE] New cache size: {len(response_cache)} entries")
271
+ logger.info(f"[CACHE-RESPONSE] New cache memory usage: {result['cache_memory_usage']} characters")
272
+ logger.info(f"[CACHE-RESPONSE] Processing time: {processing_time:.4f}s ({processing_time*1000:.2f}ms)")
273
+ logger.info(f"[CACHE-RESPONSE] Request ID: {result['request_id']}")
274
+ logger.info(f"[CACHE-RESPONSE] ===== CACHE RESPONSE REQUEST END =====")
275
+
276
+ return json.dumps(result, indent=2)
277
 
278
  except Exception as e:
279
+ processing_time = time.time() - start_time
280
+ logger.error(f"[CACHE-RESPONSE] ❌ Cache response failed after {processing_time:.4f}s: {e}")
281
+ logger.error(f"[CACHE-RESPONSE] Error type: {type(e).__name__}")
282
+ logger.error(f"[CACHE-RESPONSE] Error details: {str(e)}")
283
+ logger.error(f"[CACHE-RESPONSE] Hash that caused error: '{prompt_hash}'")
284
+ logger.error(f"[CACHE-RESPONSE] Response preview that caused error: '{response[:300]}{'...' if len(response) > 300 else ''}'")
285
+ logger.error(f"[CACHE-RESPONSE] ===== CACHE RESPONSE REQUEST END (ERROR) =====")
286
+
287
  return json.dumps({
288
  "success": False,
289
+ "error": str(e),
290
+ "error_type": type(e).__name__,
291
+ "processing_time_ms": round(processing_time * 1000, 2),
292
+ "timestamp": datetime.datetime.now(pytz.UTC).isoformat()
293
  }, indent=2)
294
 
295
  def get_cached_response(prompt_hash: str) -> str:
296
+ """Retrieve cached response with extremely detailed logging"""
297
+ logger.info(f"[GET-RESPONSE] ===== GET CACHED RESPONSE REQUEST START =====")
298
+ logger.info(f"[GET-RESPONSE] Requested hash: '{prompt_hash}'")
299
+ logger.info(f"[GET-RESPONSE] Hash length: {len(prompt_hash)} characters")
300
+ logger.info(f"[GET-RESPONSE] Current response cache size: {len(response_cache)} entries")
301
+ logger.info(f"[GET-RESPONSE] Current cache memory usage: {sum(len(v['response']) for v in response_cache.values())} characters")
302
+ logger.info(f"[GET-RESPONSE] Available hashes: {list(response_cache.keys())[:10]}{'...' if len(response_cache) > 10 else ''}")
303
+
304
+ start_time = time.time()
305
+
306
  try:
307
+ cached_data = response_cache.get(prompt_hash)
308
+ processing_time = time.time() - start_time
309
+
310
+ if cached_data is not None:
311
+ response = cached_data["response"]
312
+ age_seconds = round(time.time() - cached_data["timestamp"], 2)
313
+
314
+ result = {
315
+ "success": True,
316
+ "found": True,
317
+ "hash": prompt_hash,
318
+ "response": response,
319
+ "response_length": len(response),
320
+ "age_seconds": age_seconds,
321
+ "cache_size": len(response_cache),
322
+ "processing_time_ms": round(processing_time * 1000, 2),
323
+ "timestamp": datetime.datetime.now(pytz.UTC).isoformat(),
324
+ "request_id": hashlib.md5(f"{prompt_hash}{time.time()}".encode()).hexdigest()[:8],
325
+ "cache_hit": True,
326
+ "cached_at": datetime.datetime.fromtimestamp(cached_data["timestamp"], pytz.UTC).isoformat()
327
+ }
328
+
329
+ logger.info(f"[GET-RESPONSE] ✅ Cache HIT - response found")
330
+ logger.info(f"[GET-RESPONSE] Found hash: '{prompt_hash}'")
331
+ logger.info(f"[GET-RESPONSE] Response length: {len(response)} characters")
332
+ logger.info(f"[GET-RESPONSE] Response preview: '{response[:150]}{'...' if len(response) > 150 else ''}'")
333
+ logger.info(f"[GET-RESPONSE] Response age: {age_seconds} seconds")
334
+ logger.info(f"[GET-RESPONSE] Cached at: {result['cached_at']}")
335
+ logger.info(f"[GET-RESPONSE] Processing time: {processing_time:.4f}s ({processing_time*1000:.2f}ms)")
336
+ logger.info(f"[GET-RESPONSE] Request ID: {result['request_id']}")
337
+ else:
338
+ result = {
339
  "success": True,
340
+ "found": False,
341
+ "hash": prompt_hash,
342
+ "response": None,
343
+ "cache_size": len(response_cache),
344
+ "processing_time_ms": round(processing_time * 1000, 2),
345
+ "timestamp": datetime.datetime.now(pytz.UTC).isoformat(),
346
+ "request_id": hashlib.md5(f"{prompt_hash}{time.time()}".encode()).hexdigest()[:8],
347
+ "cache_hit": False
348
+ }
349
+
350
+ logger.warning(f"[GET-RESPONSE] ⚠️ Cache MISS - response not found")
351
+ logger.warning(f"[GET-RESPONSE] Missing hash: '{prompt_hash}'")
352
+ logger.warning(f"[GET-RESPONSE] Processing time: {processing_time:.4f}s ({processing_time*1000:.2f}ms)")
353
+ logger.warning(f"[GET-RESPONSE] Request ID: {result['request_id']}")
354
 
355
+ logger.info(f"[GET-RESPONSE] ===== GET CACHED RESPONSE REQUEST END =====")
356
+ return json.dumps(result, indent=2)
 
 
 
357
 
358
  except Exception as e:
359
+ processing_time = time.time() - start_time
360
+ logger.error(f"[GET-RESPONSE] ❌ Get cached response failed after {processing_time:.4f}s: {e}")
361
+ logger.error(f"[GET-RESPONSE] Error type: {type(e).__name__}")
362
+ logger.error(f"[GET-RESPONSE] Error details: {str(e)}")
363
+ logger.error(f"[GET-RESPONSE] Hash that caused error: '{prompt_hash}'")
364
+ logger.error(f"[GET-RESPONSE] ===== GET CACHED RESPONSE REQUEST END (ERROR) =====")
365
+
366
  return json.dumps({
367
  "success": False,
368
+ "error": str(e),
369
+ "error_type": type(e).__name__,
370
+ "processing_time_ms": round(processing_time * 1000, 2),
371
+ "timestamp": datetime.datetime.now(pytz.UTC).isoformat()
372
  }, indent=2)
373
 
374
  def calculate_token_cost(username: str, duration_ms: float) -> str:
375
+ """Calculate token cost with extremely detailed logging"""
376
+ logger.info(f"[TOKEN-COST] ===== TOKEN COST REQUEST START =====")
377
+ logger.info(f"[TOKEN-COST] Username: '{username}'")
378
+ logger.info(f"[TOKEN-COST] Username length: {len(username)} characters")
379
+ logger.info(f"[TOKEN-COST] Duration: {duration_ms}ms")
380
+ logger.info(f"[TOKEN-COST] Current users tracked: {len(token_ledger)}")
381
+ logger.info(f"[TOKEN-COST] User ledger keys: {list(token_ledger.keys())[:10]}{'...' if len(token_ledger) > 10 else ''}")
382
+
383
+ if username in token_ledger:
384
+ user_data = token_ledger[username]
385
+ logger.info(f"[TOKEN-COST] Existing user data found:")
386
+ logger.info(f"[TOKEN-COST] - Total cost: {user_data['total_cost']} tokens")
387
+ logger.info(f"[TOKEN-COST] - Total duration: {user_data['total_duration_ms']}ms")
388
+ logger.info(f"[TOKEN-COST] - Previous requests: {user_data['requests']}")
389
+ else:
390
+ logger.info(f"[TOKEN-COST] New user - creating ledger entry")
391
+
392
+ start_time = time.time()
393
+
394
  try:
 
 
395
  cost = (duration_ms / 100.0) * 0.001 # 0.001 tokens per 100ms
396
+ processing_time = time.time() - start_time
397
 
398
  # Track in ledger (for analytics)
399
  if username not in token_ledger:
400
  token_ledger[username] = {
401
  "total_cost": 0.0,
402
  "total_duration_ms": 0.0,
403
+ "requests": 0,
404
+ "first_seen": time.time(),
405
+ "last_seen": time.time()
406
  }
407
 
408
+ # Update user data
409
  token_ledger[username]["total_cost"] += cost
410
  token_ledger[username]["total_duration_ms"] += duration_ms
411
  token_ledger[username]["requests"] += 1
412
+ token_ledger[username]["last_seen"] = time.time()
413
 
414
+ user_data = token_ledger[username]
415
+ avg_cost_per_request = user_data["total_cost"] / user_data["requests"]
416
+ avg_duration_per_request = user_data["total_duration_ms"] / user_data["requests"]
417
+ account_age_seconds = round(time.time() - user_data["first_seen"], 2)
418
+
419
+ result = {
420
  "success": True,
421
  "username": username,
422
  "duration_ms": duration_ms,
423
  "cost": round(cost, 6),
424
+ "total_cost": round(user_data["total_cost"], 4),
425
+ "total_requests": user_data["requests"],
426
+ "total_duration_ms": round(user_data["total_duration_ms"], 2),
427
+ "avg_cost_per_request": round(avg_cost_per_request, 6),
428
+ "avg_duration_per_request": round(avg_duration_per_request, 2),
429
+ "account_age_seconds": account_age_seconds,
430
+ "processing_time_ms": round(processing_time * 1000, 2),
431
+ "timestamp": datetime.datetime.now(pytz.UTC).isoformat(),
432
+ "request_id": hashlib.md5(f"{username}{duration_ms}{time.time()}".encode()).hexdigest()[:8]
433
+ }
434
+
435
+ logger.info(f"[TOKEN-COST] ✅ Token cost calculated successfully")
436
+ logger.info(f"[TOKEN-COST] Request cost: {cost} tokens")
437
+ logger.info(f"[TOKEN-COST] User total cost: {user_data['total_cost']} tokens")
438
+ logger.info(f"[TOKEN-COST] User total requests: {user_data['requests']}")
439
+ logger.info(f"[TOKEN-COST] User avg cost per request: {avg_cost_per_request} tokens")
440
+ logger.info(f"[TOKEN-COST] User avg duration per request: {avg_duration_per_request}ms")
441
+ logger.info(f"[TOKEN-COST] User account age: {account_age_seconds} seconds")
442
+ logger.info(f"[TOKEN-COST] Processing time: {processing_time:.4f}s ({processing_time*1000:.2f}ms)")
443
+ logger.info(f"[TOKEN-COST] Request ID: {result['request_id']}")
444
+ logger.info(f"[TOKEN-COST] ===== TOKEN COST REQUEST END =====")
445
+ return json.dumps(result, indent=2)
446
 
447
  except Exception as e:
448
+ processing_time = time.time() - start_time
449
+ logger.error(f"[TOKEN-COST] ❌ Token cost calculation failed after {processing_time:.4f}s: {e}")
450
+ logger.error(f"[TOKEN-COST] Error type: {type(e).__name__}")
451
+ logger.error(f"[TOKEN-COST] Error details: {str(e)}")
452
+ logger.error(f"[TOKEN-COST] Username that caused error: '{username}'")
453
+ logger.error(f"[TOKEN-COST] Duration that caused error: {duration_ms}ms")
454
+ logger.error(f"[TOKEN-COST] ===== TOKEN COST REQUEST END (ERROR) =====")
455
+
456
  return json.dumps({
457
  "success": False,
458
+ "error": str(e),
459
+ "error_type": type(e).__name__,
460
+ "processing_time_ms": round(processing_time * 1000, 2),
461
+ "timestamp": datetime.datetime.now(pytz.UTC).isoformat()
462
  }, indent=2)
463
 
464
  def get_cache_stats() -> str:
465
+ """Get cache statistics with extremely detailed logging"""
466
+ logger.info(f"[CACHE-STATS] ===== CACHE STATS REQUEST START =====")
467
+ logger.info(f"[CACHE-STATS] Current prompt cache size: {len(prompt_cache)} entries")
468
+ logger.info(f"[CACHE-STATS] Current response cache size: {len(response_cache)} entries")
469
+ logger.info(f"[CACHE-STATS] Current users tracked: {len(token_ledger)}")
470
+ logger.info(f"[CACHE-STATS] Prompt cache memory usage: {sum(len(str(v)) for v in prompt_cache.values())} characters")
471
+ logger.info(f"[CACHE-STATS] Response cache memory usage: {sum(len(v['response']) for v in response_cache.values())} characters")
472
+ logger.info(f"[CACHE-STATS] Total requests processed: {sum(u['requests'] for u in token_ledger.values())}")
473
+
474
+ start_time = time.time()
475
+
476
  try:
477
+ # Calculate detailed statistics
478
+ total_prompt_memory = sum(len(str(v)) for v in prompt_cache.values())
479
+ total_response_memory = sum(len(v['response']) for v in response_cache.values())
480
+ total_requests = sum(u['requests'] for u in token_ledger.values())
481
+ total_tokens = sum(u['total_cost'] for u in token_ledger.values())
482
+ total_duration = sum(u['total_duration_ms'] for u in token_ledger.values())
483
 
484
+ # User statistics
485
+ active_users = len([u for u in token_ledger.values() if time.time() - u.get('last_seen', u.get('first_seen', 0)) < 3600])
486
+ avg_requests_per_user = total_requests / len(token_ledger) if len(token_ledger) > 0 else 0
487
+ avg_tokens_per_user = total_tokens / len(token_ledger) if len(token_ledger) > 0 else 0
488
+
489
+ processing_time = time.time() - start_time
490
+
491
+ result = {
492
  "success": True,
493
  "prompt_cache_size": len(prompt_cache),
494
  "response_cache_size": len(response_cache),
495
  "users_tracked": len(token_ledger),
496
+ "active_users_last_hour": active_users,
497
+ "total_requests": total_requests,
498
+ "total_tokens_spent": round(total_tokens, 4),
499
+ "total_duration_ms": round(total_duration, 2),
500
+ "avg_requests_per_user": round(avg_requests_per_user, 2),
501
+ "avg_tokens_per_user": round(avg_tokens_per_user, 4),
502
+ "prompt_cache_memory_bytes": total_prompt_memory,
503
+ "response_cache_memory_bytes": total_response_memory,
504
+ "total_cache_memory_bytes": total_prompt_memory + total_response_memory,
505
+ "processing_time_ms": round(processing_time * 1000, 2),
506
+ "timestamp": datetime.datetime.now(pytz.UTC).isoformat(),
507
+ "request_id": hashlib.md5(f"stats{time.time()}".encode()).hexdigest()[:8]
508
+ }
509
+
510
+ logger.info(f"[CACHE-STATS] ✅ Cache statistics retrieved successfully")
511
+ logger.info(f"[CACHE-STATS] Prompt cache: {len(prompt_cache)} entries ({total_prompt_memory} chars)")
512
+ logger.info(f"[CACHE-STATS] Response cache: {len(response_cache)} entries ({total_response_memory} chars)")
513
+ logger.info(f"[CACHE-STATS] Users tracked: {len(token_ledger)} ({active_users} active last hour)")
514
+ logger.info(f"[CACHE-STATS] Total requests: {total_requests}")
515
+ logger.info(f"[CACHE-STATS] Total tokens spent: {total_tokens}")
516
+ logger.info(f"[CACHE-STATS] Total duration: {total_duration}ms")
517
+ logger.info(f"[CACHE-STATS] Avg requests per user: {avg_requests_per_user}")
518
+ logger.info(f"[CACHE-STATS] Avg tokens per user: {avg_tokens_per_user}")
519
+ logger.info(f"[CACHE-STATS] Processing time: {processing_time:.4f}s ({processing_time*1000:.2f}ms)")
520
+ logger.info(f"[CACHE-STATS] Request ID: {result['request_id']}")
521
+ logger.info(f"[CACHE-STATS] ===== CACHE STATS REQUEST END =====")
522
+
523
+ return json.dumps(result, indent=2)
524
 
525
  except Exception as e:
526
+ processing_time = time.time() - start_time
527
+ logger.error(f"[CACHE-STATS] ❌ Cache statistics retrieval failed after {processing_time:.4f}s: {e}")
528
+ logger.error(f"[CACHE-STATS] Error type: {type(e).__name__}")
529
+ logger.error(f"[CACHE-STATS] Error details: {str(e)}")
530
+ logger.error(f"[CACHE-STATS] ===== CACHE STATS REQUEST END (ERROR) =====")
531
+
532
  return json.dumps({
533
  "success": False,
534
+ "error": str(e),
535
+ "error_type": type(e).__name__,
536
+ "processing_time_ms": round(processing_time * 1000, 2),
537
+ "timestamp": datetime.datetime.now(pytz.UTC).isoformat()
538
  }, indent=2)
539
 
540
  def get_backend_health() -> str:
541
+ """Get backend health status with extremely detailed logging"""
542
+ logger.info(f"[BACKEND-HEALTH] ===== BACKEND HEALTH REQUEST START =====")
543
+ logger.info(f"[BACKEND-HEALTH] Checking backend health status...")
544
+ logger.info(f"[BACKEND-HEALTH] Current prompt cache size: {len(prompt_cache)} entries")
545
+ logger.info(f"[BACKEND-HEALTH] Current response cache size: {len(response_cache)} entries")
546
+ logger.info(f"[BACKEND-HEALTH] Current users tracked: {len(token_ledger)}")
547
+ logger.info(f"[BACKEND-HEALTH] Total requests processed: {sum(u['requests'] for u in token_ledger.values())}")
548
+
549
+ start_time = time.time()
550
+
551
  try:
552
+ # Calculate health metrics
553
+ total_cache_size = len(prompt_cache) + len(response_cache)
554
+ total_requests = sum(u['requests'] for u in token_ledger.values())
555
+ total_memory_usage = sum(len(str(v)) for v in prompt_cache.values()) + sum(len(v['response']) for v in response_cache.values())
556
 
557
+ # Determine health status
558
+ health_status = "healthy"
559
+ issues = []
560
+
561
+ if total_cache_size > 200:
562
+ health_status = "degraded"
563
+ issues.append("High cache usage")
564
+
565
+ if len(token_ledger) > 1000:
566
+ health_status = "degraded"
567
+ issues.append("High user count")
568
+
569
+ if total_memory_usage > 10000000: # 10MB
570
+ health_status = "degraded"
571
+ issues.append("High memory usage")
572
+
573
+ processing_time = time.time() - start_time
574
+
575
+ result = {
576
  "success": True,
577
+ "status": health_status,
578
+ "issues": issues,
579
+ "prompt_cache_size": len(prompt_cache),
580
+ "response_cache_size": len(response_cache),
581
+ "total_cache_size": total_cache_size,
582
  "users_tracked": len(token_ledger),
583
+ "total_requests": total_requests,
584
+ "total_memory_usage_bytes": total_memory_usage,
585
+ "uptime_seconds": round(time.time() - backend_start_time, 2) if 'backend_start_time' in globals() else 0,
586
+ "processing_time_ms": round(processing_time * 1000, 2),
587
+ "timestamp": datetime.datetime.now(pytz.UTC).isoformat(),
588
+ "request_id": hashlib.md5(f"health{time.time()}".encode()).hexdigest()[:8]
589
+ }
590
+
591
+ logger.info(f"[BACKEND-HEALTH] ✅ Backend health check completed successfully")
592
+ logger.info(f"[BACKEND-HEALTH] Health status: {health_status}")
593
+ if issues:
594
+ logger.warning(f"[BACKEND-HEALTH] Issues detected: {', '.join(issues)}")
595
+ logger.info(f"[BACKEND-HEALTH] Total cache size: {total_cache_size} entries")
596
+ logger.info(f"[BACKEND-HEALTH] Users tracked: {len(token_ledger)}")
597
+ logger.info(f"[BACKEND-HEALTH] Total requests: {total_requests}")
598
+ logger.info(f"[BACKEND-HEALTH] Memory usage: {total_memory_usage} bytes")
599
+ logger.info(f"[BACKEND-HEALTH] Processing time: {processing_time:.4f}s ({processing_time*1000:.2f}ms)")
600
+ logger.info(f"[BACKEND-HEALTH] Request ID: {result['request_id']}")
601
+ logger.info(f"[BACKEND-HEALTH] ===== BACKEND HEALTH REQUEST END =====")
602
+
603
+ return json.dumps(result, indent=2)
604
 
605
  except Exception as e:
606
+ processing_time = time.time() - start_time
607
+ logger.error(f"[BACKEND-HEALTH] ❌ Backend health check failed after {processing_time:.4f}s: {e}")
608
+ logger.error(f"[BACKEND-HEALTH] Error type: {type(e).__name__}")
609
+ logger.error(f"[BACKEND-HEALTH] Error details: {str(e)}")
610
+ logger.error(f"[BACKEND-HEALTH] ===== BACKEND HEALTH REQUEST END (ERROR) =====")
611
+
612
  return json.dumps({
613
  "success": False,
614
+ "status": "error",
615
+ "error": str(e),
616
+ "error_type": type(e).__name__,
617
+ "processing_time_ms": round(processing_time * 1000, 2),
618
+ "timestamp": datetime.datetime.now(pytz.UTC).isoformat()
619
  }, indent=2)
620
 
621
  # ============================================================================
 
738
  signal.signal(signal.SIGTERM, signal_handler)
739
  signal.signal(signal.SIGINT, signal_handler)
740
 
741
+ logger.info("[INIT] ===== BACKEND APPLICATION STARTUP =====")
742
+ logger.info(f"[INIT] ZeroEngine-Backend starting up...")
743
+ logger.info(f"[INIT] Backend start time: {datetime.datetime.fromtimestamp(backend_start_time, pytz.UTC).isoformat()}")
744
+ logger.info(f"[INIT] Python version: {sys.version}")
745
+ logger.info(f"[INIT] Gradio version: {gr.__version__}")
746
  logger.info(f"[INIT] Cache sizes - Prompt: {len(prompt_cache)}, Response: {len(response_cache)}")
747
  logger.info(f"[INIT] Users tracked: {len(token_ledger)}")
748
+ logger.info(f"[INIT] Server will launch on port 7861")
749
+ logger.info(f"[INIT] ===== BACKEND APPLICATION STARTUP END =====")
750
 
751
  demo.launch(server_name="0.0.0.0", server_port=7861, ssr_mode=False)