turtle170 commited on
Commit
e324254
·
verified ·
1 Parent(s): f47aeda

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +297 -0
app.py ADDED
@@ -0,0 +1,297 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import json
3
+ import time
4
+ import hashlib
5
+ from typing import Dict, Optional
6
+
7
+ # ============================================================================
8
+ # ZEROENGINE-BACKEND: Background Processing Service
9
+ # ============================================================================
10
+ # This space handles:
11
+ # - Tokenization pre-processing
12
+ # - Prompt caching
13
+ # - Token accounting calculations
14
+ # - Response caching
15
+ # ============================================================================
16
+
17
+ # In-memory caches (will reset on space restart)
18
+ prompt_cache = {}
19
+ response_cache = {}
20
+ token_ledger = {}
21
+
22
+ def tokenize_text(text: str) -> str:
23
+ """
24
+ Fast tokenization without loading full model
25
+ Returns: JSON string with token count estimation
26
+ """
27
+ try:
28
+ # Simple estimation (4 chars ≈ 1 token for English)
29
+ # This is FAST and good enough for pre-processing
30
+ estimated_tokens = len(text) // 4
31
+ word_count = len(text.split())
32
+
33
+ # Create cache key
34
+ text_hash = hashlib.md5(text.encode()).hexdigest()[:16]
35
+
36
+ result = {
37
+ "success": True,
38
+ "text_hash": text_hash,
39
+ "estimated_tokens": estimated_tokens,
40
+ "word_count": word_count,
41
+ "char_count": len(text),
42
+ "timestamp": time.time()
43
+ }
44
+
45
+ # Cache this tokenization
46
+ prompt_cache[text_hash] = {
47
+ "text": text[:100] + "..." if len(text) > 100 else text,
48
+ "tokens": estimated_tokens,
49
+ "cached_at": time.time()
50
+ }
51
+
52
+ return json.dumps(result, indent=2)
53
+
54
+ except Exception as e:
55
+ return json.dumps({
56
+ "success": False,
57
+ "error": str(e)
58
+ }, indent=2)
59
+
60
+ def cache_prompt(key: str, value: str) -> str:
61
+ """
62
+ Store prompt in cache with timestamp
63
+ """
64
+ try:
65
+ prompt_cache[key] = {
66
+ "value": value,
67
+ "timestamp": time.time()
68
+ }
69
+
70
+ # Limit cache size to 100 entries
71
+ if len(prompt_cache) > 100:
72
+ oldest_key = min(prompt_cache.keys(), key=lambda k: prompt_cache[k]["timestamp"])
73
+ del prompt_cache[oldest_key]
74
+
75
+ return json.dumps({
76
+ "success": True,
77
+ "cached": key,
78
+ "cache_size": len(prompt_cache)
79
+ }, indent=2)
80
+
81
+ except Exception as e:
82
+ return json.dumps({
83
+ "success": False,
84
+ "error": str(e)
85
+ }, indent=2)
86
+
87
+ def get_cached_prompt(key: str) -> str:
88
+ """
89
+ Retrieve cached prompt
90
+ """
91
+ try:
92
+ if key in prompt_cache:
93
+ data = prompt_cache[key]
94
+ return json.dumps({
95
+ "success": True,
96
+ "value": data["value"],
97
+ "age_seconds": round(time.time() - data["timestamp"], 2)
98
+ }, indent=2)
99
+
100
+ return json.dumps({
101
+ "success": False,
102
+ "error": "Cache key not found"
103
+ }, indent=2)
104
+
105
+ except Exception as e:
106
+ return json.dumps({
107
+ "success": False,
108
+ "error": str(e)
109
+ }, indent=2)
110
+
111
+ def cache_response(prompt_hash: str, response: str) -> str:
112
+ """
113
+ Cache a complete response for instant retrieval
114
+ """
115
+ try:
116
+ response_cache[prompt_hash] = {
117
+ "response": response,
118
+ "timestamp": time.time()
119
+ }
120
+
121
+ # Limit cache size
122
+ if len(response_cache) > 50:
123
+ oldest_key = min(response_cache.keys(), key=lambda k: response_cache[k]["timestamp"])
124
+ del response_cache[oldest_key]
125
+
126
+ return json.dumps({
127
+ "success": True,
128
+ "cached": prompt_hash,
129
+ "cache_size": len(response_cache)
130
+ }, indent=2)
131
+
132
+ except Exception as e:
133
+ return json.dumps({
134
+ "success": False,
135
+ "error": str(e)
136
+ }, indent=2)
137
+
138
+ def get_cached_response(prompt_hash: str) -> str:
139
+ """
140
+ Retrieve cached response
141
+ """
142
+ try:
143
+ if prompt_hash in response_cache:
144
+ data = response_cache[prompt_hash]
145
+ return json.dumps({
146
+ "success": True,
147
+ "response": data["response"],
148
+ "age_seconds": round(time.time() - data["timestamp"], 2)
149
+ }, indent=2)
150
+
151
+ return json.dumps({
152
+ "success": False,
153
+ "error": "Response not cached"
154
+ }, indent=2)
155
+
156
+ except Exception as e:
157
+ return json.dumps({
158
+ "success": False,
159
+ "error": str(e)
160
+ }, indent=2)
161
+
162
+ def calculate_token_cost(username: str, duration_ms: float) -> str:
163
+ """
164
+ Calculate token cost for a user
165
+ Stateless - just returns the calculation
166
+ """
167
+ try:
168
+ cost = (duration_ms / 100.0) * 0.001 # 0.001 tokens per 100ms
169
+
170
+ # Track in ledger (for analytics)
171
+ if username not in token_ledger:
172
+ token_ledger[username] = {
173
+ "total_cost": 0.0,
174
+ "total_duration_ms": 0.0,
175
+ "requests": 0
176
+ }
177
+
178
+ token_ledger[username]["total_cost"] += cost
179
+ token_ledger[username]["total_duration_ms"] += duration_ms
180
+ token_ledger[username]["requests"] += 1
181
+
182
+ return json.dumps({
183
+ "success": True,
184
+ "username": username,
185
+ "duration_ms": duration_ms,
186
+ "cost": round(cost, 6),
187
+ "total_cost": round(token_ledger[username]["total_cost"], 4),
188
+ "total_requests": token_ledger[username]["requests"]
189
+ }, indent=2)
190
+
191
+ except Exception as e:
192
+ return json.dumps({
193
+ "success": False,
194
+ "error": str(e)
195
+ }, indent=2)
196
+
197
+ def get_cache_stats() -> str:
198
+ """
199
+ Get statistics about cache usage
200
+ """
201
+ try:
202
+ return json.dumps({
203
+ "success": True,
204
+ "prompt_cache_size": len(prompt_cache),
205
+ "response_cache_size": len(response_cache),
206
+ "users_tracked": len(token_ledger),
207
+ "total_requests": sum(u["requests"] for u in token_ledger.values()),
208
+ "timestamp": time.time()
209
+ }, indent=2)
210
+
211
+ except Exception as e:
212
+ return json.dumps({
213
+ "success": False,
214
+ "error": str(e)
215
+ }, indent=2)
216
+
217
+ # ============================================================================
218
+ # GRADIO INTERFACE
219
+ # ============================================================================
220
+
221
+ with gr.Blocks(title="ZeroEngine-Backend", theme=gr.themes.Monochrome()) as demo:
222
+ gr.HTML("""
223
+ <div style='text-align: center; padding: 20px;'>
224
+ <h1>🔧 ZeroEngine-Backend</h1>
225
+ <p style='color: #888;'>Background Processing Service for ZeroEngine</p>
226
+ </div>
227
+ """)
228
+
229
+ with gr.Tab("🔢 Tokenize"):
230
+ gr.Markdown("### Fast Tokenization Pre-Processing")
231
+ with gr.Row():
232
+ with gr.Column():
233
+ tokenize_input = gr.Textbox(
234
+ label="Text to Tokenize",
235
+ placeholder="Enter text here...",
236
+ lines=5
237
+ )
238
+ tokenize_btn = gr.Button("Tokenize", variant="primary")
239
+ with gr.Column():
240
+ tokenize_output = gr.Code(label="Result (JSON)", language="json")
241
+
242
+ tokenize_btn.click(tokenize_text, [tokenize_input], [tokenize_output])
243
+
244
+ with gr.Tab("💾 Prompt Cache"):
245
+ gr.Markdown("### Store and Retrieve Prompts")
246
+ with gr.Row():
247
+ with gr.Column():
248
+ cache_key_input = gr.Textbox(label="Cache Key")
249
+ cache_value_input = gr.Textbox(label="Value to Cache", lines=3)
250
+ cache_store_btn = gr.Button("Store", variant="primary")
251
+ cache_store_output = gr.Code(label="Result", language="json")
252
+
253
+ with gr.Column():
254
+ cache_get_input = gr.Textbox(label="Key to Retrieve")
255
+ cache_get_btn = gr.Button("Retrieve", variant="secondary")
256
+ cache_get_output = gr.Code(label="Result", language="json")
257
+
258
+ cache_store_btn.click(cache_prompt, [cache_key_input, cache_value_input], [cache_store_output])
259
+ cache_get_btn.click(get_cached_prompt, [cache_get_input], [cache_get_output])
260
+
261
+ with gr.Tab("⚡ Response Cache"):
262
+ gr.Markdown("### Cache Complete Responses")
263
+ with gr.Row():
264
+ with gr.Column():
265
+ resp_hash_input = gr.Textbox(label="Prompt Hash")
266
+ resp_value_input = gr.Textbox(label="Response to Cache", lines=5)
267
+ resp_cache_btn = gr.Button("Cache Response", variant="primary")
268
+ resp_cache_output = gr.Code(label="Result", language="json")
269
+
270
+ with gr.Column():
271
+ resp_get_input = gr.Textbox(label="Hash to Retrieve")
272
+ resp_get_btn = gr.Button("Get Response", variant="secondary")
273
+ resp_get_output = gr.Code(label="Result", language="json")
274
+
275
+ resp_cache_btn.click(cache_response, [resp_hash_input, resp_value_input], [resp_cache_output])
276
+ resp_get_btn.click(get_cached_response, [resp_get_input], [resp_get_output])
277
+
278
+ with gr.Tab("💰 Token Accounting"):
279
+ gr.Markdown("### Calculate Token Costs")
280
+ with gr.Row():
281
+ username_input = gr.Textbox(label="Username", value="turtle170")
282
+ duration_input = gr.Number(label="Duration (ms)", value=5000)
283
+
284
+ calc_btn = gr.Button("Calculate Cost", variant="primary")
285
+ calc_output = gr.Code(label="Result (JSON)", language="json")
286
+
287
+ calc_btn.click(calculate_token_cost, [username_input, duration_input], [calc_output])
288
+
289
+ with gr.Tab("📊 Stats"):
290
+ gr.Markdown("### Cache Statistics")
291
+ stats_btn = gr.Button("Get Stats", variant="primary")
292
+ stats_output = gr.Code(label="Statistics (JSON)", language="json")
293
+
294
+ stats_btn.click(get_cache_stats, None, [stats_output])
295
+
296
+ if __name__ == "__main__":
297
+ demo.launch(server_name="0.0.0.0", server_port=7860)