turtle170 commited on
Commit
a5d38cb
Β·
verified Β·
1 Parent(s): 13f35fa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +239 -50
app.py CHANGED
@@ -14,109 +14,240 @@ from gradio_client import Client
14
  import hashlib
15
 
16
  # Backend processor connection
17
- BACKEND_URL = "turtle170/ZeroEngine-Backend"
 
 
 
 
18
 
19
  class BackendProcessor:
20
- """Client for ZeroEngine-Backend processing"""
21
 
22
  def __init__(self):
23
  self.client = None
24
  self.connected = False
25
  self.last_connect_attempt = 0
26
  self.connect_cooldown = 30 # seconds
 
 
 
 
 
 
 
 
27
 
28
- def connect(self):
29
- """Lazy connection with cooldown"""
30
  current_time = time.time()
31
 
32
- if self.connected:
33
  return True
34
 
35
- if current_time - self.last_connect_attempt < self.connect_cooldown:
36
  return False
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  try:
39
- self.last_connect_attempt = current_time
40
- self.client = Client(BACKEND_URL)
41
- self.connected = True
42
- logger.info("[BACKEND] βœ… Connected to ZeroEngine-Backend")
43
- return True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  except Exception as e:
45
- logger.error(f"[BACKEND] ❌ Connection failed: {e}")
46
- self.connected = False
47
- return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
  def tokenize_async(self, text: str):
50
- """Background tokenization"""
51
  if not text or len(text) < 5:
52
  return
53
 
54
  def _background():
55
- try:
56
- if self.connect():
57
- result = self.client.predict(text, api_name="/predict")
58
  data = json.loads(result)
59
  if data.get("success"):
60
  logger.info(f"[BACKEND] Tokenized: ~{data['estimated_tokens']} tokens")
61
- except Exception as e:
62
- logger.warning(f"[BACKEND] Tokenize failed: {e}")
63
 
64
  threading.Thread(target=_background, daemon=True).start()
65
 
66
  def cache_response(self, prompt: str, response: str):
67
- """Cache a response for instant retrieval"""
68
  prompt_hash = hashlib.md5(prompt.encode()).hexdigest()[:16]
69
 
70
  def _background():
71
- try:
72
- if self.connect():
73
- result = self.client.predict(
74
- prompt_hash,
75
- response,
76
- api_name="/predict_3"
77
- )
78
  data = json.loads(result)
79
  if data.get("success"):
80
  logger.info(f"[BACKEND] Cached response: {prompt_hash}")
81
- except Exception as e:
82
- logger.warning(f"[BACKEND] Cache failed: {e}")
83
 
84
  threading.Thread(target=_background, daemon=True).start()
85
 
86
  def get_cached_response(self, prompt: str) -> Optional[str]:
87
- """Try to get cached response (synchronous)"""
88
  prompt_hash = hashlib.md5(prompt.encode()).hexdigest()[:16]
89
 
90
- try:
91
- if self.connect():
92
- result = self.client.predict(
93
- prompt_hash,
94
- api_name="/predict_4"
95
- )
96
  data = json.loads(result)
97
  if data.get("success"):
98
  logger.info(f"[BACKEND] ⚑ CACHE HIT: {prompt_hash}")
99
  return data["response"]
100
- except Exception as e:
101
- logger.warning(f"[BACKEND] Cache retrieval failed: {e}")
102
 
103
  return None
104
 
105
  def charge_tokens_async(self, username: str, duration_ms: float):
106
- """Calculate token cost asynchronously"""
107
  def _background():
108
- try:
109
- if self.connect():
110
- result = self.client.predict(
111
- username,
112
- duration_ms,
113
- api_name="/predict_5"
114
- )
115
  data = json.loads(result)
116
  if data.get("success"):
117
  logger.info(f"[BACKEND] Charged {username}: {data['cost']} tokens")
118
- except Exception as e:
119
- logger.warning(f"[BACKEND] Charge failed: {e}")
120
 
121
  threading.Thread(target=_background, daemon=True).start()
122
 
@@ -1471,6 +1602,22 @@ with gr.Blocks(title="ZeroEngine V0.2") as demo:
1471
  # Owner-only Clear RAM button (hidden by default, shown only to owner)
1472
  clear_ram_btn = gr.Button("πŸŒ‹ CLEAR RAM", variant="stop", size="sm", visible=False)
1473
  session_status = gr.Markdown("", visible=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1474
 
1475
  with gr.Row():
1476
  with gr.Column(scale=8):
@@ -1608,6 +1755,41 @@ with gr.Blocks(title="ZeroEngine V0.2") as demo:
1608
  return gr.update(visible=True) # Show Clear RAM button
1609
  return gr.update(visible=False) # Hide Clear RAM button
1610
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1611
  def update_custom_params(temp, top_p, top_k, repeat_pen):
1612
  kernel.custom_params["temperature"] = temp
1613
  kernel.custom_params["top_p"] = top_p
@@ -1615,10 +1797,14 @@ with gr.Blocks(title="ZeroEngine V0.2") as demo:
1615
  kernel.custom_params["repeat_penalty"] = repeat_pen
1616
  return "βœ… Parameters updated!"
1617
 
1618
- # Timer for periodic stats updates (includes token balance)
1619
  timer = gr.Timer(value=2)
1620
  timer.tick(update_stats, None, [ram_metric, cpu_metric, token_balance])
1621
 
 
 
 
 
1622
  # Event handlers
1623
  scan_btn.click(on_scan, [repo_input], [quant_dropdown, log_output])
1624
  boot_btn.click(on_boot, [repo_input, quant_dropdown], [boot_status])
@@ -1629,6 +1815,9 @@ with gr.Blocks(title="ZeroEngine V0.2") as demo:
1629
  end_session_btn.click(on_end_session, None, [session_status])
1630
  clear_ram_btn.click(on_clear_ram, None, [session_status])
1631
 
 
 
 
1632
  # Custom parameter updates
1633
  temperature_slider.change(update_custom_params,
1634
  [temperature_slider, top_p_slider, top_k_slider, repeat_penalty_slider],
 
14
  import hashlib
15
 
16
  # Backend processor connection
17
+ BACKEND_URL = "https://turtle170-ZeroEngine-Backend.hf.space"
18
+ BACKEND_FALLBACK_URL = "turtle170/ZeroEngine-Backend" # Fallback to repo ID
19
+ CONNECTION_TIMEOUT = 30 # seconds
20
+ MAX_RETRIES = 3
21
+ RETRY_DELAY = 2 # seconds
22
 
23
  class BackendProcessor:
24
+ """Enhanced client for ZeroEngine-Backend processing with retry logic and health checks"""
25
 
26
  def __init__(self):
27
  self.client = None
28
  self.connected = False
29
  self.last_connect_attempt = 0
30
  self.connect_cooldown = 30 # seconds
31
+ self.connection_url = BACKEND_URL
32
+ self.health_status = "unknown"
33
+ self.last_health_check = 0
34
+ self.connection_attempts = 0
35
+ self.total_requests = 0
36
+ self.failed_requests = 0
37
+ self.response_times = []
38
+ self.backend_metrics = {"cache_size": 0, "users_tracked": 0, "total_requests": 0}
39
 
40
+ def connect(self, force_reconnect=False):
41
+ """Enhanced connection with retry logic and multiple URL fallbacks"""
42
  current_time = time.time()
43
 
44
+ if self.connected and not force_reconnect:
45
  return True
46
 
47
+ if current_time - self.last_connect_attempt < self.connect_cooldown and not force_reconnect:
48
  return False
49
 
50
+ # Try multiple connection strategies
51
+ urls_to_try = [BACKEND_URL, BACKEND_FALLBACK_URL] if BACKEND_URL != BACKEND_FALLBACK_URL else [BACKEND_URL]
52
+
53
+ for attempt in range(MAX_RETRIES):
54
+ for url in urls_to_try:
55
+ try:
56
+ self.last_connect_attempt = current_time
57
+ self.connection_attempts += 1
58
+
59
+ # Try with authentication if available
60
+ client_kwargs = {}
61
+ if HF_TOKEN:
62
+ client_kwargs["hf_token"] = HF_TOKEN
63
+
64
+ start_time = time.time()
65
+ self.client = Client(url, **client_kwargs)
66
+
67
+ # Test connection with a simple API call
68
+ test_result = self.client.predict("test", api_name="/predict")
69
+ response_time = time.time() - start_time
70
+
71
+ if test_result:
72
+ self.connected = True
73
+ self.connection_url = url
74
+ self.response_times.append(response_time)
75
+
76
+ # Keep only last 10 response times
77
+ if len(self.response_times) > 10:
78
+ self.response_times.pop(0)
79
+
80
+ logger.info(f"[BACKEND] βœ… Connected to {url} (attempt {attempt+1}, {response_time:.2f}s)")
81
+ return True
82
+
83
+ except Exception as e:
84
+ logger.warning(f"[BACKEND] ❌ Connection failed to {url} (attempt {attempt+1}): {e}")
85
+ self.connected = False
86
+
87
+ if attempt < MAX_RETRIES - 1:
88
+ time.sleep(RETRY_DELAY)
89
+
90
+ logger.error(f"[BACKEND] ❌ All connection attempts failed after {MAX_RETRIES} tries")
91
+ self.connected = False
92
+ return False
93
+
94
+ def health_check(self) -> dict:
95
+ """Perform comprehensive health check of backend service"""
96
+ current_time = time.time()
97
+
98
+ # Rate limit health checks to once per minute
99
+ if current_time - self.last_health_check < 60:
100
+ return self._get_health_status()
101
+
102
+ self.last_health_check = current_time
103
+
104
  try:
105
+ if not self.connect():
106
+ self.health_status = "unreachable"
107
+ return self._get_health_status()
108
+
109
+ # Test each API endpoint
110
+ endpoints_status = {}
111
+
112
+ # Test tokenization
113
+ try:
114
+ start_time = time.time()
115
+ result = self.client.predict("health check test", api_name="/predict")
116
+ response_time = time.time() - start_time
117
+ endpoints_status["tokenize"] = {"status": "ok", "response_time": response_time}
118
+ except Exception as e:
119
+ endpoints_status["tokenize"] = {"status": "error", "error": str(e)}
120
+
121
+ # Test cache stats
122
+ try:
123
+ start_time = time.time()
124
+ result = self.client.predict(api_name="/predict_6")
125
+ response_time = time.time() - start_time
126
+ data = json.loads(result)
127
+ if data.get("success"):
128
+ self.backend_metrics = {
129
+ "cache_size": data.get("cache_size", 0),
130
+ "users_tracked": data.get("users_tracked", 0),
131
+ "total_requests": data.get("total_requests", 0)
132
+ }
133
+ endpoints_status["stats"] = {"status": "ok", "response_time": response_time}
134
+ else:
135
+ endpoints_status["stats"] = {"status": "error", "error": "Invalid response"}
136
+ except Exception as e:
137
+ endpoints_status["stats"] = {"status": "error", "error": str(e)}
138
+
139
+ # Determine overall health
140
+ all_ok = all(status["status"] == "ok" for status in endpoints_status.values())
141
+ self.health_status = "healthy" if all_ok else "degraded"
142
+
143
+ return self._get_health_status()
144
+
145
  except Exception as e:
146
+ logger.error(f"[BACKEND] Health check failed: {e}")
147
+ self.health_status = "error"
148
+ return self._get_health_status()
149
+
150
+ def _get_health_status(self) -> dict:
151
+ """Get current health status as dict"""
152
+ avg_response_time = sum(self.response_times) / len(self.response_times) if self.response_times else 0
153
+ success_rate = ((self.total_requests - self.failed_requests) / self.total_requests * 100) if self.total_requests > 0 else 0
154
+
155
+ return {
156
+ "status": self.health_status,
157
+ "connected": self.connected,
158
+ "connection_url": self.connection_url,
159
+ "connection_attempts": self.connection_attempts,
160
+ "total_requests": self.total_requests,
161
+ "failed_requests": self.failed_requests,
162
+ "success_rate": round(success_rate, 2),
163
+ "avg_response_time": round(avg_response_time, 3),
164
+ "backend_metrics": self.backend_metrics
165
+ }
166
+
167
+ def _make_request(self, api_name: str, *args, **kwargs) -> Optional[str]:
168
+ """Make a request with error handling and metrics"""
169
+ self.total_requests += 1
170
+
171
+ try:
172
+ if not self.connect():
173
+ self.failed_requests += 1
174
+ return None
175
+
176
+ start_time = time.time()
177
+ result = self.client.predict(*args, api_name=api_name, **kwargs)
178
+ response_time = time.time() - start_time
179
+
180
+ self.response_times.append(response_time)
181
+ if len(self.response_times) > 10:
182
+ self.response_times.pop(0)
183
+
184
+ return result
185
+
186
+ except Exception as e:
187
+ self.failed_requests += 1
188
+ logger.warning(f"[BACKEND] Request failed to {api_name}: {e}")
189
+ return None
190
 
191
  def tokenize_async(self, text: str):
192
+ """Background tokenization with enhanced error handling"""
193
  if not text or len(text) < 5:
194
  return
195
 
196
  def _background():
197
+ result = self._make_request("/predict", text)
198
+ if result:
199
+ try:
200
  data = json.loads(result)
201
  if data.get("success"):
202
  logger.info(f"[BACKEND] Tokenized: ~{data['estimated_tokens']} tokens")
203
+ except Exception as e:
204
+ logger.warning(f"[BACKEND] Tokenize response parsing failed: {e}")
205
 
206
  threading.Thread(target=_background, daemon=True).start()
207
 
208
  def cache_response(self, prompt: str, response: str):
209
+ """Cache a response for instant retrieval with enhanced error handling"""
210
  prompt_hash = hashlib.md5(prompt.encode()).hexdigest()[:16]
211
 
212
  def _background():
213
+ result = self._make_request("/predict_3", prompt_hash, response)
214
+ if result:
215
+ try:
 
 
 
 
216
  data = json.loads(result)
217
  if data.get("success"):
218
  logger.info(f"[BACKEND] Cached response: {prompt_hash}")
219
+ except Exception as e:
220
+ logger.warning(f"[BACKEND] Cache response parsing failed: {e}")
221
 
222
  threading.Thread(target=_background, daemon=True).start()
223
 
224
  def get_cached_response(self, prompt: str) -> Optional[str]:
225
+ """Try to get cached response (synchronous) with enhanced error handling"""
226
  prompt_hash = hashlib.md5(prompt.encode()).hexdigest()[:16]
227
 
228
+ result = self._make_request("/predict_4", prompt_hash)
229
+ if result:
230
+ try:
 
 
 
231
  data = json.loads(result)
232
  if data.get("success"):
233
  logger.info(f"[BACKEND] ⚑ CACHE HIT: {prompt_hash}")
234
  return data["response"]
235
+ except Exception as e:
236
+ logger.warning(f"[BACKEND] Cache retrieval parsing failed: {e}")
237
 
238
  return None
239
 
240
  def charge_tokens_async(self, username: str, duration_ms: float):
241
+ """Calculate token cost asynchronously with enhanced error handling"""
242
  def _background():
243
+ result = self._make_request("/predict_5", username, duration_ms)
244
+ if result:
245
+ try:
 
 
 
 
246
  data = json.loads(result)
247
  if data.get("success"):
248
  logger.info(f"[BACKEND] Charged {username}: {data['cost']} tokens")
249
+ except Exception as e:
250
+ logger.warning(f"[BACKEND] Token charge parsing failed: {e}")
251
 
252
  threading.Thread(target=_background, daemon=True).start()
253
 
 
1602
  # Owner-only Clear RAM button (hidden by default, shown only to owner)
1603
  clear_ram_btn = gr.Button("πŸŒ‹ CLEAR RAM", variant="stop", size="sm", visible=False)
1604
  session_status = gr.Markdown("", visible=False)
1605
+
1606
+ # Backend Connection Status
1607
+ gr.HTML("""
1608
+ <div style='text-align: center; padding: 15px; border-radius: 15px;
1609
+ background: linear-gradient(135deg, #28a745 0%, #20c997 100%);
1610
+ margin-bottom: 20px; box-shadow: 0 6px 15px rgba(40,167,69,0.3);'>
1611
+ <div style='font-size: 1.5em; margin-bottom: 3px;'>πŸ”—</div>
1612
+ <div id='backend-status' style='font-size: 1.2em; font-weight: bold; color: white; font-family: Consolas;'>
1613
+ CONNECTED
1614
+ </div>
1615
+ <div style='font-size: 0.8em; color: #ddd; font-family: Consolas;'>BACKEND</div>
1616
+ </div>
1617
+ """)
1618
+ backend_status_label = gr.Label(value="Connected", label="Backend Status")
1619
+ backend_health_btn = gr.Button("πŸ₯ Check Health", size="sm", variant="secondary")
1620
+ backend_health_output = gr.Code(label="Backend Health", language="json", visible=False)
1621
 
1622
  with gr.Row():
1623
  with gr.Column(scale=8):
 
1755
  return gr.update(visible=True) # Show Clear RAM button
1756
  return gr.update(visible=False) # Hide Clear RAM button
1757
 
1758
+ def update_backend_status():
1759
+ """Update backend connection status display"""
1760
+ try:
1761
+ health = backend.health_check()
1762
+ status = health.get("status", "unknown")
1763
+ connected = health.get("connected", False)
1764
+ success_rate = health.get("success_rate", 0)
1765
+ avg_response_time = health.get("avg_response_time", 0)
1766
+
1767
+ # Create status label with metrics
1768
+ if connected and status == "healthy":
1769
+ status_text = f"βœ… Connected ({success_rate}% success, {avg_response_time:.3f}s avg)"
1770
+ status_label = "Connected"
1771
+ elif connected:
1772
+ status_text = f"⚠️ Degraded ({success_rate}% success, {avg_response_time:.3f}s avg)"
1773
+ status_label = "Degraded"
1774
+ else:
1775
+ status_text = "❌ Disconnected"
1776
+ status_label = "Disconnected"
1777
+
1778
+ return status_label, status_text
1779
+ except Exception as e:
1780
+ logger.error(f"Backend status update error: {e}")
1781
+ return "Error", "❌ Status Error"
1782
+
1783
+ def on_backend_health_check():
1784
+ """Perform detailed backend health check"""
1785
+ try:
1786
+ health = backend.health_check()
1787
+ health_json = json.dumps(health, indent=2)
1788
+ return gr.update(visible=True), health_json
1789
+ except Exception as e:
1790
+ error_msg = {"error": str(e), "status": "error"}
1791
+ return gr.update(visible=True), json.dumps(error_msg, indent=2)
1792
+
1793
  def update_custom_params(temp, top_p, top_k, repeat_pen):
1794
  kernel.custom_params["temperature"] = temp
1795
  kernel.custom_params["top_p"] = top_p
 
1797
  kernel.custom_params["repeat_penalty"] = repeat_pen
1798
  return "βœ… Parameters updated!"
1799
 
1800
+ # Timer for periodic stats updates (includes token balance and backend status)
1801
  timer = gr.Timer(value=2)
1802
  timer.tick(update_stats, None, [ram_metric, cpu_metric, token_balance])
1803
 
1804
+ # Backend status timer (updates every 10 seconds)
1805
+ backend_timer = gr.Timer(value=10)
1806
+ backend_timer.tick(lambda: update_backend_status(), None, [backend_status_label])
1807
+
1808
  # Event handlers
1809
  scan_btn.click(on_scan, [repo_input], [quant_dropdown, log_output])
1810
  boot_btn.click(on_boot, [repo_input, quant_dropdown], [boot_status])
 
1815
  end_session_btn.click(on_end_session, None, [session_status])
1816
  clear_ram_btn.click(on_clear_ram, None, [session_status])
1817
 
1818
+ # Backend health check
1819
+ backend_health_btn.click(on_backend_health_check, None, [backend_health_output, backend_health_output])
1820
+
1821
  # Custom parameter updates
1822
  temperature_slider.change(update_custom_params,
1823
  [temperature_slider, top_p_slider, top_k_slider, repeat_penalty_slider],