Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -14,109 +14,240 @@ from gradio_client import Client
|
|
| 14 |
import hashlib
|
| 15 |
|
| 16 |
# Backend processor connection
|
| 17 |
-
BACKEND_URL = "turtle170
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
class BackendProcessor:
|
| 20 |
-
"""
|
| 21 |
|
| 22 |
def __init__(self):
|
| 23 |
self.client = None
|
| 24 |
self.connected = False
|
| 25 |
self.last_connect_attempt = 0
|
| 26 |
self.connect_cooldown = 30 # seconds
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
-
def connect(self):
|
| 29 |
-
"""
|
| 30 |
current_time = time.time()
|
| 31 |
|
| 32 |
-
if self.connected:
|
| 33 |
return True
|
| 34 |
|
| 35 |
-
if current_time - self.last_connect_attempt < self.connect_cooldown:
|
| 36 |
return False
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
try:
|
| 39 |
-
self.
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
except Exception as e:
|
| 45 |
-
logger.error(f"[BACKEND]
|
| 46 |
-
self.
|
| 47 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
def tokenize_async(self, text: str):
|
| 50 |
-
"""Background tokenization"""
|
| 51 |
if not text or len(text) < 5:
|
| 52 |
return
|
| 53 |
|
| 54 |
def _background():
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
data = json.loads(result)
|
| 59 |
if data.get("success"):
|
| 60 |
logger.info(f"[BACKEND] Tokenized: ~{data['estimated_tokens']} tokens")
|
| 61 |
-
|
| 62 |
-
|
| 63 |
|
| 64 |
threading.Thread(target=_background, daemon=True).start()
|
| 65 |
|
| 66 |
def cache_response(self, prompt: str, response: str):
|
| 67 |
-
"""Cache a response for instant retrieval"""
|
| 68 |
prompt_hash = hashlib.md5(prompt.encode()).hexdigest()[:16]
|
| 69 |
|
| 70 |
def _background():
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
prompt_hash,
|
| 75 |
-
response,
|
| 76 |
-
api_name="/predict_3"
|
| 77 |
-
)
|
| 78 |
data = json.loads(result)
|
| 79 |
if data.get("success"):
|
| 80 |
logger.info(f"[BACKEND] Cached response: {prompt_hash}")
|
| 81 |
-
|
| 82 |
-
|
| 83 |
|
| 84 |
threading.Thread(target=_background, daemon=True).start()
|
| 85 |
|
| 86 |
def get_cached_response(self, prompt: str) -> Optional[str]:
|
| 87 |
-
"""Try to get cached response (synchronous)"""
|
| 88 |
prompt_hash = hashlib.md5(prompt.encode()).hexdigest()[:16]
|
| 89 |
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
prompt_hash,
|
| 94 |
-
api_name="/predict_4"
|
| 95 |
-
)
|
| 96 |
data = json.loads(result)
|
| 97 |
if data.get("success"):
|
| 98 |
logger.info(f"[BACKEND] β‘ CACHE HIT: {prompt_hash}")
|
| 99 |
return data["response"]
|
| 100 |
-
|
| 101 |
-
|
| 102 |
|
| 103 |
return None
|
| 104 |
|
| 105 |
def charge_tokens_async(self, username: str, duration_ms: float):
|
| 106 |
-
"""Calculate token cost asynchronously"""
|
| 107 |
def _background():
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
username,
|
| 112 |
-
duration_ms,
|
| 113 |
-
api_name="/predict_5"
|
| 114 |
-
)
|
| 115 |
data = json.loads(result)
|
| 116 |
if data.get("success"):
|
| 117 |
logger.info(f"[BACKEND] Charged {username}: {data['cost']} tokens")
|
| 118 |
-
|
| 119 |
-
|
| 120 |
|
| 121 |
threading.Thread(target=_background, daemon=True).start()
|
| 122 |
|
|
@@ -1471,6 +1602,22 @@ with gr.Blocks(title="ZeroEngine V0.2") as demo:
|
|
| 1471 |
# Owner-only Clear RAM button (hidden by default, shown only to owner)
|
| 1472 |
clear_ram_btn = gr.Button("π CLEAR RAM", variant="stop", size="sm", visible=False)
|
| 1473 |
session_status = gr.Markdown("", visible=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1474 |
|
| 1475 |
with gr.Row():
|
| 1476 |
with gr.Column(scale=8):
|
|
@@ -1608,6 +1755,41 @@ with gr.Blocks(title="ZeroEngine V0.2") as demo:
|
|
| 1608 |
return gr.update(visible=True) # Show Clear RAM button
|
| 1609 |
return gr.update(visible=False) # Hide Clear RAM button
|
| 1610 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1611 |
def update_custom_params(temp, top_p, top_k, repeat_pen):
|
| 1612 |
kernel.custom_params["temperature"] = temp
|
| 1613 |
kernel.custom_params["top_p"] = top_p
|
|
@@ -1615,10 +1797,14 @@ with gr.Blocks(title="ZeroEngine V0.2") as demo:
|
|
| 1615 |
kernel.custom_params["repeat_penalty"] = repeat_pen
|
| 1616 |
return "β
Parameters updated!"
|
| 1617 |
|
| 1618 |
-
# Timer for periodic stats updates (includes token balance)
|
| 1619 |
timer = gr.Timer(value=2)
|
| 1620 |
timer.tick(update_stats, None, [ram_metric, cpu_metric, token_balance])
|
| 1621 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1622 |
# Event handlers
|
| 1623 |
scan_btn.click(on_scan, [repo_input], [quant_dropdown, log_output])
|
| 1624 |
boot_btn.click(on_boot, [repo_input, quant_dropdown], [boot_status])
|
|
@@ -1629,6 +1815,9 @@ with gr.Blocks(title="ZeroEngine V0.2") as demo:
|
|
| 1629 |
end_session_btn.click(on_end_session, None, [session_status])
|
| 1630 |
clear_ram_btn.click(on_clear_ram, None, [session_status])
|
| 1631 |
|
|
|
|
|
|
|
|
|
|
| 1632 |
# Custom parameter updates
|
| 1633 |
temperature_slider.change(update_custom_params,
|
| 1634 |
[temperature_slider, top_p_slider, top_k_slider, repeat_penalty_slider],
|
|
|
|
| 14 |
import hashlib
|
| 15 |
|
| 16 |
# Backend processor connection
|
| 17 |
+
BACKEND_URL = "https://turtle170-ZeroEngine-Backend.hf.space"
|
| 18 |
+
BACKEND_FALLBACK_URL = "turtle170/ZeroEngine-Backend" # Fallback to repo ID
|
| 19 |
+
CONNECTION_TIMEOUT = 30 # seconds
|
| 20 |
+
MAX_RETRIES = 3
|
| 21 |
+
RETRY_DELAY = 2 # seconds
|
| 22 |
|
| 23 |
class BackendProcessor:
|
| 24 |
+
"""Enhanced client for ZeroEngine-Backend processing with retry logic and health checks"""
|
| 25 |
|
| 26 |
def __init__(self):
|
| 27 |
self.client = None
|
| 28 |
self.connected = False
|
| 29 |
self.last_connect_attempt = 0
|
| 30 |
self.connect_cooldown = 30 # seconds
|
| 31 |
+
self.connection_url = BACKEND_URL
|
| 32 |
+
self.health_status = "unknown"
|
| 33 |
+
self.last_health_check = 0
|
| 34 |
+
self.connection_attempts = 0
|
| 35 |
+
self.total_requests = 0
|
| 36 |
+
self.failed_requests = 0
|
| 37 |
+
self.response_times = []
|
| 38 |
+
self.backend_metrics = {"cache_size": 0, "users_tracked": 0, "total_requests": 0}
|
| 39 |
|
| 40 |
+
def connect(self, force_reconnect=False):
|
| 41 |
+
"""Enhanced connection with retry logic and multiple URL fallbacks"""
|
| 42 |
current_time = time.time()
|
| 43 |
|
| 44 |
+
if self.connected and not force_reconnect:
|
| 45 |
return True
|
| 46 |
|
| 47 |
+
if current_time - self.last_connect_attempt < self.connect_cooldown and not force_reconnect:
|
| 48 |
return False
|
| 49 |
|
| 50 |
+
# Try multiple connection strategies
|
| 51 |
+
urls_to_try = [BACKEND_URL, BACKEND_FALLBACK_URL] if BACKEND_URL != BACKEND_FALLBACK_URL else [BACKEND_URL]
|
| 52 |
+
|
| 53 |
+
for attempt in range(MAX_RETRIES):
|
| 54 |
+
for url in urls_to_try:
|
| 55 |
+
try:
|
| 56 |
+
self.last_connect_attempt = current_time
|
| 57 |
+
self.connection_attempts += 1
|
| 58 |
+
|
| 59 |
+
# Try with authentication if available
|
| 60 |
+
client_kwargs = {}
|
| 61 |
+
if HF_TOKEN:
|
| 62 |
+
client_kwargs["hf_token"] = HF_TOKEN
|
| 63 |
+
|
| 64 |
+
start_time = time.time()
|
| 65 |
+
self.client = Client(url, **client_kwargs)
|
| 66 |
+
|
| 67 |
+
# Test connection with a simple API call
|
| 68 |
+
test_result = self.client.predict("test", api_name="/predict")
|
| 69 |
+
response_time = time.time() - start_time
|
| 70 |
+
|
| 71 |
+
if test_result:
|
| 72 |
+
self.connected = True
|
| 73 |
+
self.connection_url = url
|
| 74 |
+
self.response_times.append(response_time)
|
| 75 |
+
|
| 76 |
+
# Keep only last 10 response times
|
| 77 |
+
if len(self.response_times) > 10:
|
| 78 |
+
self.response_times.pop(0)
|
| 79 |
+
|
| 80 |
+
logger.info(f"[BACKEND] β
Connected to {url} (attempt {attempt+1}, {response_time:.2f}s)")
|
| 81 |
+
return True
|
| 82 |
+
|
| 83 |
+
except Exception as e:
|
| 84 |
+
logger.warning(f"[BACKEND] β Connection failed to {url} (attempt {attempt+1}): {e}")
|
| 85 |
+
self.connected = False
|
| 86 |
+
|
| 87 |
+
if attempt < MAX_RETRIES - 1:
|
| 88 |
+
time.sleep(RETRY_DELAY)
|
| 89 |
+
|
| 90 |
+
logger.error(f"[BACKEND] β All connection attempts failed after {MAX_RETRIES} tries")
|
| 91 |
+
self.connected = False
|
| 92 |
+
return False
|
| 93 |
+
|
| 94 |
+
def health_check(self) -> dict:
|
| 95 |
+
"""Perform comprehensive health check of backend service"""
|
| 96 |
+
current_time = time.time()
|
| 97 |
+
|
| 98 |
+
# Rate limit health checks to once per minute
|
| 99 |
+
if current_time - self.last_health_check < 60:
|
| 100 |
+
return self._get_health_status()
|
| 101 |
+
|
| 102 |
+
self.last_health_check = current_time
|
| 103 |
+
|
| 104 |
try:
|
| 105 |
+
if not self.connect():
|
| 106 |
+
self.health_status = "unreachable"
|
| 107 |
+
return self._get_health_status()
|
| 108 |
+
|
| 109 |
+
# Test each API endpoint
|
| 110 |
+
endpoints_status = {}
|
| 111 |
+
|
| 112 |
+
# Test tokenization
|
| 113 |
+
try:
|
| 114 |
+
start_time = time.time()
|
| 115 |
+
result = self.client.predict("health check test", api_name="/predict")
|
| 116 |
+
response_time = time.time() - start_time
|
| 117 |
+
endpoints_status["tokenize"] = {"status": "ok", "response_time": response_time}
|
| 118 |
+
except Exception as e:
|
| 119 |
+
endpoints_status["tokenize"] = {"status": "error", "error": str(e)}
|
| 120 |
+
|
| 121 |
+
# Test cache stats
|
| 122 |
+
try:
|
| 123 |
+
start_time = time.time()
|
| 124 |
+
result = self.client.predict(api_name="/predict_6")
|
| 125 |
+
response_time = time.time() - start_time
|
| 126 |
+
data = json.loads(result)
|
| 127 |
+
if data.get("success"):
|
| 128 |
+
self.backend_metrics = {
|
| 129 |
+
"cache_size": data.get("cache_size", 0),
|
| 130 |
+
"users_tracked": data.get("users_tracked", 0),
|
| 131 |
+
"total_requests": data.get("total_requests", 0)
|
| 132 |
+
}
|
| 133 |
+
endpoints_status["stats"] = {"status": "ok", "response_time": response_time}
|
| 134 |
+
else:
|
| 135 |
+
endpoints_status["stats"] = {"status": "error", "error": "Invalid response"}
|
| 136 |
+
except Exception as e:
|
| 137 |
+
endpoints_status["stats"] = {"status": "error", "error": str(e)}
|
| 138 |
+
|
| 139 |
+
# Determine overall health
|
| 140 |
+
all_ok = all(status["status"] == "ok" for status in endpoints_status.values())
|
| 141 |
+
self.health_status = "healthy" if all_ok else "degraded"
|
| 142 |
+
|
| 143 |
+
return self._get_health_status()
|
| 144 |
+
|
| 145 |
except Exception as e:
|
| 146 |
+
logger.error(f"[BACKEND] Health check failed: {e}")
|
| 147 |
+
self.health_status = "error"
|
| 148 |
+
return self._get_health_status()
|
| 149 |
+
|
| 150 |
+
def _get_health_status(self) -> dict:
|
| 151 |
+
"""Get current health status as dict"""
|
| 152 |
+
avg_response_time = sum(self.response_times) / len(self.response_times) if self.response_times else 0
|
| 153 |
+
success_rate = ((self.total_requests - self.failed_requests) / self.total_requests * 100) if self.total_requests > 0 else 0
|
| 154 |
+
|
| 155 |
+
return {
|
| 156 |
+
"status": self.health_status,
|
| 157 |
+
"connected": self.connected,
|
| 158 |
+
"connection_url": self.connection_url,
|
| 159 |
+
"connection_attempts": self.connection_attempts,
|
| 160 |
+
"total_requests": self.total_requests,
|
| 161 |
+
"failed_requests": self.failed_requests,
|
| 162 |
+
"success_rate": round(success_rate, 2),
|
| 163 |
+
"avg_response_time": round(avg_response_time, 3),
|
| 164 |
+
"backend_metrics": self.backend_metrics
|
| 165 |
+
}
|
| 166 |
+
|
| 167 |
+
def _make_request(self, api_name: str, *args, **kwargs) -> Optional[str]:
|
| 168 |
+
"""Make a request with error handling and metrics"""
|
| 169 |
+
self.total_requests += 1
|
| 170 |
+
|
| 171 |
+
try:
|
| 172 |
+
if not self.connect():
|
| 173 |
+
self.failed_requests += 1
|
| 174 |
+
return None
|
| 175 |
+
|
| 176 |
+
start_time = time.time()
|
| 177 |
+
result = self.client.predict(*args, api_name=api_name, **kwargs)
|
| 178 |
+
response_time = time.time() - start_time
|
| 179 |
+
|
| 180 |
+
self.response_times.append(response_time)
|
| 181 |
+
if len(self.response_times) > 10:
|
| 182 |
+
self.response_times.pop(0)
|
| 183 |
+
|
| 184 |
+
return result
|
| 185 |
+
|
| 186 |
+
except Exception as e:
|
| 187 |
+
self.failed_requests += 1
|
| 188 |
+
logger.warning(f"[BACKEND] Request failed to {api_name}: {e}")
|
| 189 |
+
return None
|
| 190 |
|
| 191 |
def tokenize_async(self, text: str):
|
| 192 |
+
"""Background tokenization with enhanced error handling"""
|
| 193 |
if not text or len(text) < 5:
|
| 194 |
return
|
| 195 |
|
| 196 |
def _background():
|
| 197 |
+
result = self._make_request("/predict", text)
|
| 198 |
+
if result:
|
| 199 |
+
try:
|
| 200 |
data = json.loads(result)
|
| 201 |
if data.get("success"):
|
| 202 |
logger.info(f"[BACKEND] Tokenized: ~{data['estimated_tokens']} tokens")
|
| 203 |
+
except Exception as e:
|
| 204 |
+
logger.warning(f"[BACKEND] Tokenize response parsing failed: {e}")
|
| 205 |
|
| 206 |
threading.Thread(target=_background, daemon=True).start()
|
| 207 |
|
| 208 |
def cache_response(self, prompt: str, response: str):
|
| 209 |
+
"""Cache a response for instant retrieval with enhanced error handling"""
|
| 210 |
prompt_hash = hashlib.md5(prompt.encode()).hexdigest()[:16]
|
| 211 |
|
| 212 |
def _background():
|
| 213 |
+
result = self._make_request("/predict_3", prompt_hash, response)
|
| 214 |
+
if result:
|
| 215 |
+
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 216 |
data = json.loads(result)
|
| 217 |
if data.get("success"):
|
| 218 |
logger.info(f"[BACKEND] Cached response: {prompt_hash}")
|
| 219 |
+
except Exception as e:
|
| 220 |
+
logger.warning(f"[BACKEND] Cache response parsing failed: {e}")
|
| 221 |
|
| 222 |
threading.Thread(target=_background, daemon=True).start()
|
| 223 |
|
| 224 |
def get_cached_response(self, prompt: str) -> Optional[str]:
|
| 225 |
+
"""Try to get cached response (synchronous) with enhanced error handling"""
|
| 226 |
prompt_hash = hashlib.md5(prompt.encode()).hexdigest()[:16]
|
| 227 |
|
| 228 |
+
result = self._make_request("/predict_4", prompt_hash)
|
| 229 |
+
if result:
|
| 230 |
+
try:
|
|
|
|
|
|
|
|
|
|
| 231 |
data = json.loads(result)
|
| 232 |
if data.get("success"):
|
| 233 |
logger.info(f"[BACKEND] β‘ CACHE HIT: {prompt_hash}")
|
| 234 |
return data["response"]
|
| 235 |
+
except Exception as e:
|
| 236 |
+
logger.warning(f"[BACKEND] Cache retrieval parsing failed: {e}")
|
| 237 |
|
| 238 |
return None
|
| 239 |
|
| 240 |
def charge_tokens_async(self, username: str, duration_ms: float):
|
| 241 |
+
"""Calculate token cost asynchronously with enhanced error handling"""
|
| 242 |
def _background():
|
| 243 |
+
result = self._make_request("/predict_5", username, duration_ms)
|
| 244 |
+
if result:
|
| 245 |
+
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 246 |
data = json.loads(result)
|
| 247 |
if data.get("success"):
|
| 248 |
logger.info(f"[BACKEND] Charged {username}: {data['cost']} tokens")
|
| 249 |
+
except Exception as e:
|
| 250 |
+
logger.warning(f"[BACKEND] Token charge parsing failed: {e}")
|
| 251 |
|
| 252 |
threading.Thread(target=_background, daemon=True).start()
|
| 253 |
|
|
|
|
| 1602 |
# Owner-only Clear RAM button (hidden by default, shown only to owner)
|
| 1603 |
clear_ram_btn = gr.Button("π CLEAR RAM", variant="stop", size="sm", visible=False)
|
| 1604 |
session_status = gr.Markdown("", visible=False)
|
| 1605 |
+
|
| 1606 |
+
# Backend Connection Status
|
| 1607 |
+
gr.HTML("""
|
| 1608 |
+
<div style='text-align: center; padding: 15px; border-radius: 15px;
|
| 1609 |
+
background: linear-gradient(135deg, #28a745 0%, #20c997 100%);
|
| 1610 |
+
margin-bottom: 20px; box-shadow: 0 6px 15px rgba(40,167,69,0.3);'>
|
| 1611 |
+
<div style='font-size: 1.5em; margin-bottom: 3px;'>π</div>
|
| 1612 |
+
<div id='backend-status' style='font-size: 1.2em; font-weight: bold; color: white; font-family: Consolas;'>
|
| 1613 |
+
CONNECTED
|
| 1614 |
+
</div>
|
| 1615 |
+
<div style='font-size: 0.8em; color: #ddd; font-family: Consolas;'>BACKEND</div>
|
| 1616 |
+
</div>
|
| 1617 |
+
""")
|
| 1618 |
+
backend_status_label = gr.Label(value="Connected", label="Backend Status")
|
| 1619 |
+
backend_health_btn = gr.Button("π₯ Check Health", size="sm", variant="secondary")
|
| 1620 |
+
backend_health_output = gr.Code(label="Backend Health", language="json", visible=False)
|
| 1621 |
|
| 1622 |
with gr.Row():
|
| 1623 |
with gr.Column(scale=8):
|
|
|
|
| 1755 |
return gr.update(visible=True) # Show Clear RAM button
|
| 1756 |
return gr.update(visible=False) # Hide Clear RAM button
|
| 1757 |
|
| 1758 |
+
def update_backend_status():
|
| 1759 |
+
"""Update backend connection status display"""
|
| 1760 |
+
try:
|
| 1761 |
+
health = backend.health_check()
|
| 1762 |
+
status = health.get("status", "unknown")
|
| 1763 |
+
connected = health.get("connected", False)
|
| 1764 |
+
success_rate = health.get("success_rate", 0)
|
| 1765 |
+
avg_response_time = health.get("avg_response_time", 0)
|
| 1766 |
+
|
| 1767 |
+
# Create status label with metrics
|
| 1768 |
+
if connected and status == "healthy":
|
| 1769 |
+
status_text = f"β
Connected ({success_rate}% success, {avg_response_time:.3f}s avg)"
|
| 1770 |
+
status_label = "Connected"
|
| 1771 |
+
elif connected:
|
| 1772 |
+
status_text = f"β οΈ Degraded ({success_rate}% success, {avg_response_time:.3f}s avg)"
|
| 1773 |
+
status_label = "Degraded"
|
| 1774 |
+
else:
|
| 1775 |
+
status_text = "β Disconnected"
|
| 1776 |
+
status_label = "Disconnected"
|
| 1777 |
+
|
| 1778 |
+
return status_label, status_text
|
| 1779 |
+
except Exception as e:
|
| 1780 |
+
logger.error(f"Backend status update error: {e}")
|
| 1781 |
+
return "Error", "β Status Error"
|
| 1782 |
+
|
| 1783 |
+
def on_backend_health_check():
|
| 1784 |
+
"""Perform detailed backend health check"""
|
| 1785 |
+
try:
|
| 1786 |
+
health = backend.health_check()
|
| 1787 |
+
health_json = json.dumps(health, indent=2)
|
| 1788 |
+
return gr.update(visible=True), health_json
|
| 1789 |
+
except Exception as e:
|
| 1790 |
+
error_msg = {"error": str(e), "status": "error"}
|
| 1791 |
+
return gr.update(visible=True), json.dumps(error_msg, indent=2)
|
| 1792 |
+
|
| 1793 |
def update_custom_params(temp, top_p, top_k, repeat_pen):
|
| 1794 |
kernel.custom_params["temperature"] = temp
|
| 1795 |
kernel.custom_params["top_p"] = top_p
|
|
|
|
| 1797 |
kernel.custom_params["repeat_penalty"] = repeat_pen
|
| 1798 |
return "β
Parameters updated!"
|
| 1799 |
|
| 1800 |
+
# Timer for periodic stats updates (includes token balance and backend status)
|
| 1801 |
timer = gr.Timer(value=2)
|
| 1802 |
timer.tick(update_stats, None, [ram_metric, cpu_metric, token_balance])
|
| 1803 |
|
| 1804 |
+
# Backend status timer (updates every 10 seconds)
|
| 1805 |
+
backend_timer = gr.Timer(value=10)
|
| 1806 |
+
backend_timer.tick(lambda: update_backend_status(), None, [backend_status_label])
|
| 1807 |
+
|
| 1808 |
# Event handlers
|
| 1809 |
scan_btn.click(on_scan, [repo_input], [quant_dropdown, log_output])
|
| 1810 |
boot_btn.click(on_boot, [repo_input, quant_dropdown], [boot_status])
|
|
|
|
| 1815 |
end_session_btn.click(on_end_session, None, [session_status])
|
| 1816 |
clear_ram_btn.click(on_clear_ram, None, [session_status])
|
| 1817 |
|
| 1818 |
+
# Backend health check
|
| 1819 |
+
backend_health_btn.click(on_backend_health_check, None, [backend_health_output, backend_health_output])
|
| 1820 |
+
|
| 1821 |
# Custom parameter updates
|
| 1822 |
temperature_slider.change(update_custom_params,
|
| 1823 |
[temperature_slider, top_p_slider, top_k_slider, repeat_penalty_slider],
|