Spaces:

turtle170
/

ZeroEngine

Running

App Files Files Community

turtle170 commited on 3 days ago

Commit

a5d38cb

verified ·

1 Parent(s): 13f35fa

Update app.py

Browse files

Files changed (1) hide show

app.py +239 -50

app.py CHANGED Viewed

@@ -14,109 +14,240 @@ from gradio_client import Client
 import hashlib
 # Backend processor connection
-BACKEND_URL = "turtle170/ZeroEngine-Backend"
 class BackendProcessor:
-    """Client for ZeroEngine-Backend processing"""
     def __init__(self):
         self.client = None
         self.connected = False
         self.last_connect_attempt = 0
         self.connect_cooldown = 30  # seconds
-    def connect(self):
-        """Lazy connection with cooldown"""
         current_time = time.time()
-        if self.connected:
             return True
-        if current_time - self.last_connect_attempt < self.connect_cooldown:
             return False
         try:
-            self.last_connect_attempt = current_time
-            self.client = Client(BACKEND_URL)
-            self.connected = True
-            logger.info("[BACKEND] ✅ Connected to ZeroEngine-Backend")
-            return True
         except Exception as e:
-            logger.error(f"[BACKEND] ❌ Connection failed: {e}")
-            self.connected = False
-            return False
     def tokenize_async(self, text: str):
-        """Background tokenization"""
         if not text or len(text) < 5:
             return
         def _background():
-            try:
-                if self.connect():
-                    result = self.client.predict(text, api_name="/predict")
                     data = json.loads(result)
                     if data.get("success"):
                         logger.info(f"[BACKEND] Tokenized: ~{data['estimated_tokens']} tokens")
-            except Exception as e:
-                logger.warning(f"[BACKEND] Tokenize failed: {e}")
         threading.Thread(target=_background, daemon=True).start()
     def cache_response(self, prompt: str, response: str):
-        """Cache a response for instant retrieval"""
         prompt_hash = hashlib.md5(prompt.encode()).hexdigest()[:16]
         def _background():
-            try:
-                if self.connect():
-                    result = self.client.predict(
-                        prompt_hash,
-                        response,
-                        api_name="/predict_3"
-                    )
                     data = json.loads(result)
                     if data.get("success"):
                         logger.info(f"[BACKEND] Cached response: {prompt_hash}")
-            except Exception as e:
-                logger.warning(f"[BACKEND] Cache failed: {e}")
         threading.Thread(target=_background, daemon=True).start()
     def get_cached_response(self, prompt: str) -> Optional[str]:
-        """Try to get cached response (synchronous)"""
         prompt_hash = hashlib.md5(prompt.encode()).hexdigest()[:16]
-        try:
-            if self.connect():
-                result = self.client.predict(
-                    prompt_hash,
-                    api_name="/predict_4"
-                )
                 data = json.loads(result)
                 if data.get("success"):
                     logger.info(f"[BACKEND] ⚡ CACHE HIT: {prompt_hash}")
                     return data["response"]
-        except Exception as e:
-            logger.warning(f"[BACKEND] Cache retrieval failed: {e}")
         return None
     def charge_tokens_async(self, username: str, duration_ms: float):
-        """Calculate token cost asynchronously"""
         def _background():
-            try:
-                if self.connect():
-                    result = self.client.predict(
-                        username,
-                        duration_ms,
-                        api_name="/predict_5"
-                    )
                     data = json.loads(result)
                     if data.get("success"):
                         logger.info(f"[BACKEND] Charged {username}: {data['cost']} tokens")
-            except Exception as e:
-                logger.warning(f"[BACKEND] Charge failed: {e}")
         threading.Thread(target=_background, daemon=True).start()
@@ -1471,6 +1602,22 @@ with gr.Blocks(title="ZeroEngine V0.2") as demo:
             # Owner-only Clear RAM button (hidden by default, shown only to owner)
             clear_ram_btn = gr.Button("🌋 CLEAR RAM", variant="stop", size="sm", visible=False)
             session_status = gr.Markdown("", visible=False)
     with gr.Row():
         with gr.Column(scale=8):
@@ -1608,6 +1755,41 @@ with gr.Blocks(title="ZeroEngine V0.2") as demo:
             return gr.update(visible=True)  # Show Clear RAM button
         return gr.update(visible=False)  # Hide Clear RAM button
     def update_custom_params(temp, top_p, top_k, repeat_pen):
         kernel.custom_params["temperature"] = temp
         kernel.custom_params["top_p"] = top_p
@@ -1615,10 +1797,14 @@ with gr.Blocks(title="ZeroEngine V0.2") as demo:
         kernel.custom_params["repeat_penalty"] = repeat_pen
         return "✅ Parameters updated!"
-    # Timer for periodic stats updates (includes token balance)
     timer = gr.Timer(value=2)
     timer.tick(update_stats, None, [ram_metric, cpu_metric, token_balance])
     # Event handlers
     scan_btn.click(on_scan, [repo_input], [quant_dropdown, log_output])
     boot_btn.click(on_boot, [repo_input, quant_dropdown], [boot_status])
@@ -1629,6 +1815,9 @@ with gr.Blocks(title="ZeroEngine V0.2") as demo:
     end_session_btn.click(on_end_session, None, [session_status])
     clear_ram_btn.click(on_clear_ram, None, [session_status])
     # Custom parameter updates
     temperature_slider.change(update_custom_params,
                              [temperature_slider, top_p_slider, top_k_slider, repeat_penalty_slider],

 import hashlib
 # Backend processor connection
+BACKEND_URL = "https://turtle170-ZeroEngine-Backend.hf.space"
+BACKEND_FALLBACK_URL = "turtle170/ZeroEngine-Backend"  # Fallback to repo ID
+CONNECTION_TIMEOUT = 30  # seconds
+MAX_RETRIES = 3
+RETRY_DELAY = 2  # seconds
 class BackendProcessor:
+    """Enhanced client for ZeroEngine-Backend processing with retry logic and health checks"""
     def __init__(self):
         self.client = None
         self.connected = False
         self.last_connect_attempt = 0
         self.connect_cooldown = 30  # seconds
+        self.connection_url = BACKEND_URL
+        self.health_status = "unknown"
+        self.last_health_check = 0
+        self.connection_attempts = 0
+        self.total_requests = 0
+        self.failed_requests = 0
+        self.response_times = []
+        self.backend_metrics = {"cache_size": 0, "users_tracked": 0, "total_requests": 0}
+    def connect(self, force_reconnect=False):
+        """Enhanced connection with retry logic and multiple URL fallbacks"""
         current_time = time.time()
+        if self.connected and not force_reconnect:
             return True
+        if current_time - self.last_connect_attempt < self.connect_cooldown and not force_reconnect:
             return False
+        # Try multiple connection strategies
+        urls_to_try = [BACKEND_URL, BACKEND_FALLBACK_URL] if BACKEND_URL != BACKEND_FALLBACK_URL else [BACKEND_URL]
+        for attempt in range(MAX_RETRIES):
+            for url in urls_to_try:
+                try:
+                    self.last_connect_attempt = current_time
+                    self.connection_attempts += 1
+                    # Try with authentication if available
+                    client_kwargs = {}
+                    if HF_TOKEN:
+                        client_kwargs["hf_token"] = HF_TOKEN
+                    start_time = time.time()
+                    self.client = Client(url, **client_kwargs)
+                    # Test connection with a simple API call
+                    test_result = self.client.predict("test", api_name="/predict")
+                    response_time = time.time() - start_time
+                    if test_result:
+                        self.connected = True
+                        self.connection_url = url
+                        self.response_times.append(response_time)
+                        # Keep only last 10 response times
+                        if len(self.response_times) > 10:
+                            self.response_times.pop(0)
+                        logger.info(f"[BACKEND] ✅ Connected to {url} (attempt {attempt+1}, {response_time:.2f}s)")
+                        return True
+                except Exception as e:
+                    logger.warning(f"[BACKEND] ❌ Connection failed to {url} (attempt {attempt+1}): {e}")
+                    self.connected = False
+                    if attempt < MAX_RETRIES - 1:
+                        time.sleep(RETRY_DELAY)
+        logger.error(f"[BACKEND] ❌ All connection attempts failed after {MAX_RETRIES} tries")
+        self.connected = False
+        return False
+    def health_check(self) -> dict:
+        """Perform comprehensive health check of backend service"""
+        current_time = time.time()
+        # Rate limit health checks to once per minute
+        if current_time - self.last_health_check < 60:
+            return self._get_health_status()
+        self.last_health_check = current_time
         try:
+            if not self.connect():
+                self.health_status = "unreachable"
+                return self._get_health_status()
+            # Test each API endpoint
+            endpoints_status = {}
+            # Test tokenization
+            try:
+                start_time = time.time()
+                result = self.client.predict("health check test", api_name="/predict")
+                response_time = time.time() - start_time
+                endpoints_status["tokenize"] = {"status": "ok", "response_time": response_time}
+            except Exception as e:
+                endpoints_status["tokenize"] = {"status": "error", "error": str(e)}
+            # Test cache stats
+            try:
+                start_time = time.time()
+                result = self.client.predict(api_name="/predict_6")
+                response_time = time.time() - start_time
+                data = json.loads(result)
+                if data.get("success"):
+                    self.backend_metrics = {
+                        "cache_size": data.get("cache_size", 0),
+                        "users_tracked": data.get("users_tracked", 0),
+                        "total_requests": data.get("total_requests", 0)
+                    }
+                    endpoints_status["stats"] = {"status": "ok", "response_time": response_time}
+                else:
+                    endpoints_status["stats"] = {"status": "error", "error": "Invalid response"}
+            except Exception as e:
+                endpoints_status["stats"] = {"status": "error", "error": str(e)}
+            # Determine overall health
+            all_ok = all(status["status"] == "ok" for status in endpoints_status.values())
+            self.health_status = "healthy" if all_ok else "degraded"
+            return self._get_health_status()
         except Exception as e:
+            logger.error(f"[BACKEND] Health check failed: {e}")
+            self.health_status = "error"
+            return self._get_health_status()
+    def _get_health_status(self) -> dict:
+        """Get current health status as dict"""
+        avg_response_time = sum(self.response_times) / len(self.response_times) if self.response_times else 0
+        success_rate = ((self.total_requests - self.failed_requests) / self.total_requests * 100) if self.total_requests > 0 else 0
+        return {
+            "status": self.health_status,
+            "connected": self.connected,
+            "connection_url": self.connection_url,
+            "connection_attempts": self.connection_attempts,
+            "total_requests": self.total_requests,
+            "failed_requests": self.failed_requests,
+            "success_rate": round(success_rate, 2),
+            "avg_response_time": round(avg_response_time, 3),
+            "backend_metrics": self.backend_metrics
+        }
+    def _make_request(self, api_name: str, *args, **kwargs) -> Optional[str]:
+        """Make a request with error handling and metrics"""
+        self.total_requests += 1
+        try:
+            if not self.connect():
+                self.failed_requests += 1
+                return None
+            start_time = time.time()
+            result = self.client.predict(*args, api_name=api_name, **kwargs)
+            response_time = time.time() - start_time
+            self.response_times.append(response_time)
+            if len(self.response_times) > 10:
+                self.response_times.pop(0)
+            return result
+        except Exception as e:
+            self.failed_requests += 1
+            logger.warning(f"[BACKEND] Request failed to {api_name}: {e}")
+            return None
     def tokenize_async(self, text: str):
+        """Background tokenization with enhanced error handling"""
         if not text or len(text) < 5:
             return
         def _background():
+            result = self._make_request("/predict", text)
+            if result:
+                try:
                     data = json.loads(result)
                     if data.get("success"):
                         logger.info(f"[BACKEND] Tokenized: ~{data['estimated_tokens']} tokens")
+                except Exception as e:
+                    logger.warning(f"[BACKEND] Tokenize response parsing failed: {e}")
         threading.Thread(target=_background, daemon=True).start()
     def cache_response(self, prompt: str, response: str):
+        """Cache a response for instant retrieval with enhanced error handling"""
         prompt_hash = hashlib.md5(prompt.encode()).hexdigest()[:16]
         def _background():
+            result = self._make_request("/predict_3", prompt_hash, response)
+            if result:
+                try:
                     data = json.loads(result)
                     if data.get("success"):
                         logger.info(f"[BACKEND] Cached response: {prompt_hash}")
+                except Exception as e:
+                    logger.warning(f"[BACKEND] Cache response parsing failed: {e}")
         threading.Thread(target=_background, daemon=True).start()
     def get_cached_response(self, prompt: str) -> Optional[str]:
+        """Try to get cached response (synchronous) with enhanced error handling"""
         prompt_hash = hashlib.md5(prompt.encode()).hexdigest()[:16]
+        result = self._make_request("/predict_4", prompt_hash)
+        if result:
+            try:
                 data = json.loads(result)
                 if data.get("success"):
                     logger.info(f"[BACKEND] ⚡ CACHE HIT: {prompt_hash}")
                     return data["response"]
+            except Exception as e:
+                logger.warning(f"[BACKEND] Cache retrieval parsing failed: {e}")
         return None
     def charge_tokens_async(self, username: str, duration_ms: float):
+        """Calculate token cost asynchronously with enhanced error handling"""
         def _background():
+            result = self._make_request("/predict_5", username, duration_ms)
+            if result:
+                try:
                     data = json.loads(result)
                     if data.get("success"):
                         logger.info(f"[BACKEND] Charged {username}: {data['cost']} tokens")
+                except Exception as e:
+                    logger.warning(f"[BACKEND] Token charge parsing failed: {e}")
         threading.Thread(target=_background, daemon=True).start()
             # Owner-only Clear RAM button (hidden by default, shown only to owner)
             clear_ram_btn = gr.Button("🌋 CLEAR RAM", variant="stop", size="sm", visible=False)
             session_status = gr.Markdown("", visible=False)
+            # Backend Connection Status
+            gr.HTML("""
+                <div style='text-align: center; padding: 15px; border-radius: 15px;
+                            background: linear-gradient(135deg, #28a745 0%, #20c997 100%);
+                            margin-bottom: 20px; box-shadow: 0 6px 15px rgba(40,167,69,0.3);'>
+                    <div style='font-size: 1.5em; margin-bottom: 3px;'>🔗</div>
+                    <div id='backend-status' style='font-size: 1.2em; font-weight: bold; color: white; font-family: Consolas;'>
+                        CONNECTED
+                    </div>
+                    <div style='font-size: 0.8em; color: #ddd; font-family: Consolas;'>BACKEND</div>
+                </div>
+            """)
+            backend_status_label = gr.Label(value="Connected", label="Backend Status")
+            backend_health_btn = gr.Button("🏥 Check Health", size="sm", variant="secondary")
+            backend_health_output = gr.Code(label="Backend Health", language="json", visible=False)
     with gr.Row():
         with gr.Column(scale=8):
             return gr.update(visible=True)  # Show Clear RAM button
         return gr.update(visible=False)  # Hide Clear RAM button
+    def update_backend_status():
+        """Update backend connection status display"""
+        try:
+            health = backend.health_check()
+            status = health.get("status", "unknown")
+            connected = health.get("connected", False)
+            success_rate = health.get("success_rate", 0)
+            avg_response_time = health.get("avg_response_time", 0)
+            # Create status label with metrics
+            if connected and status == "healthy":
+                status_text = f"✅ Connected ({success_rate}% success, {avg_response_time:.3f}s avg)"
+                status_label = "Connected"
+            elif connected:
+                status_text = f"⚠️ Degraded ({success_rate}% success, {avg_response_time:.3f}s avg)"
+                status_label = "Degraded"
+            else:
+                status_text = "❌ Disconnected"
+                status_label = "Disconnected"
+            return status_label, status_text
+        except Exception as e:
+            logger.error(f"Backend status update error: {e}")
+            return "Error", "❌ Status Error"
+    def on_backend_health_check():
+        """Perform detailed backend health check"""
+        try:
+            health = backend.health_check()
+            health_json = json.dumps(health, indent=2)
+            return gr.update(visible=True), health_json
+        except Exception as e:
+            error_msg = {"error": str(e), "status": "error"}
+            return gr.update(visible=True), json.dumps(error_msg, indent=2)
     def update_custom_params(temp, top_p, top_k, repeat_pen):
         kernel.custom_params["temperature"] = temp
         kernel.custom_params["top_p"] = top_p
         kernel.custom_params["repeat_penalty"] = repeat_pen
         return "✅ Parameters updated!"
+    # Timer for periodic stats updates (includes token balance and backend status)
     timer = gr.Timer(value=2)
     timer.tick(update_stats, None, [ram_metric, cpu_metric, token_balance])
+    # Backend status timer (updates every 10 seconds)
+    backend_timer = gr.Timer(value=10)
+    backend_timer.tick(lambda: update_backend_status(), None, [backend_status_label])
     # Event handlers
     scan_btn.click(on_scan, [repo_input], [quant_dropdown, log_output])
     boot_btn.click(on_boot, [repo_input, quant_dropdown], [boot_status])
     end_session_btn.click(on_end_session, None, [session_status])
     clear_ram_btn.click(on_clear_ram, None, [session_status])
+    # Backend health check
+    backend_health_btn.click(on_backend_health_check, None, [backend_health_output, backend_health_output])
     # Custom parameter updates
     temperature_slider.change(update_custom_params,
                              [temperature_slider, top_p_slider, top_k_slider, repeat_penalty_slider],