Spaces:

turtle170
/

ZeroEngine

Running

App Files Files Community

turtle170 commited on 2 days ago

Commit

8806e23

verified ·

1 Parent(s): 4166b44

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -18

app.py CHANGED Viewed

@@ -14,8 +14,8 @@ from gradio_client import Client
 import hashlib
 # Backend processor connection
-BACKEND_URL = "https://turtle170-ZeroEngine-Backend.hf.space"
-BACKEND_FALLBACK_URL = "turtle170/ZeroEngine-Backend"  # Fallback to repo ID
 CONNECTION_TIMEOUT = 60  # seconds
 MAX_RETRIES = 3
 RETRY_DELAY = 5  # seconds
@@ -47,8 +47,8 @@ class BackendProcessor:
         if current_time - self.last_connect_attempt < self.connect_cooldown and not force_reconnect:
             return False
-        # Try multiple connection strategies
-        urls_to_try = [BACKEND_URL, BACKEND_FALLBACK_URL] if BACKEND_URL != BACKEND_FALLBACK_URL else [BACKEND_URL]
         for attempt in range(MAX_RETRIES):
             for url in urls_to_try:
@@ -65,9 +65,11 @@ class BackendProcessor:
                     self.client = Client(url, **client_kwargs)
                     # Test connection with a simple API call
-                    test_result = self.client.predict(api_name="//predict")
                     response_time = time.time() - start_time
                     if test_result:
                         self.connected = True
                         self.connection_url = url
@@ -214,7 +216,7 @@ class BackendProcessor:
         prompt_hash = hashlib.md5(prompt.encode()).hexdigest()[:16]
         def _background():
-            result = self._make_request("/predict_2", prompt_hash, response)
             if result:
                 try:
                     data = json.loads(result)
@@ -224,16 +226,17 @@ class BackendProcessor:
                     logger.warning(f"[BACKEND] Cache response parsing failed: {e}")
         threading.Thread(target=_background, daemon=True).start()
     def get_cached_response(self, prompt: str) -> Optional[str]:
         """Try to get cached response (synchronous) with enhanced error handling"""
         prompt_hash = hashlib.md5(prompt.encode()).hexdigest()[:16]
-        result = self._make_request("/predict_3", prompt_hash)
         if result:
             try:
                 data = json.loads(result)
-                if data.get("success"):
                     logger.info(f"[BACKEND] ⚡ CACHE HIT: {prompt_hash}")
                     return data["response"]
             except Exception as e:
@@ -244,7 +247,7 @@ class BackendProcessor:
     def charge_tokens_async(self, username: str, duration_ms: float):
         """Calculate token cost asynchronously with enhanced error handling"""
         def _background():
-            result = self._make_request("/predict_4", username, duration_ms)
             if result:
                 try:
                     data = json.loads(result)
@@ -1442,7 +1445,11 @@ class ZeroEngine:
             yield history
             time.sleep(0.5)  # Brief pause for user to see the message
-        # Check prompt cache for exact matches (instant response)
         cached_response = backend.get_cached_response(full_input)
         if cached_response:
             logger.info("⚡ BACKEND CACHE HIT - Instant response!")
@@ -1451,15 +1458,13 @@ class ZeroEngine:
             yield history
             return
-        # Prepare input with optimized formatting
-        full_input = f"{ghost_context}\n{prompt}" if ghost_context else prompt
-        formatted_prompt = f"User: {full_input}\nAssistant: "
         # Add User Message & Empty Assistant Message for Streaming
         history.append({"role": "user", "content": prompt})
         history.append({"role": "assistant", "content": "..."})
         yield history
         response_text = ""
         start_time = time.time()
         tokens_count = 0
@@ -1652,10 +1657,8 @@ kernel = ZeroEngine()
 # Session ID for token tracking
 session_id = "turtle170"
-with gr.Blocks(title="ZeroEngine V0.2") as demo:
-    demo_css = CUSTOM_CSS
-    if hasattr(demo, 'css'):
-        demo.css = demo_css
     gr.LoginButton()
     # Header with Token Display
     with gr.Row():

 import hashlib
 # Backend processor connection
+BACKEND_URL = "turtle170/ZeroEngine-Backend"
+BACKEND_FALLBACK_URL = None  # Not needed
 CONNECTION_TIMEOUT = 60  # seconds
 MAX_RETRIES = 3
 RETRY_DELAY = 5  # seconds
         if current_time - self.last_connect_attempt < self.connect_cooldown and not force_reconnect:
             return False
+        # Try simple repo ID connection
+        urls_to_try = [BACKEND_URL]
         for attempt in range(MAX_RETRIES):
             for url in urls_to_try:
                     self.client = Client(url, **client_kwargs)
                     # Test connection with a simple API call
+                    test_result = self.client.predict("test connection", api_name="//predict")
                     response_time = time.time() - start_time
+                    logger.info(f"[BACKEND] Connection test response: {test_result}")
                     if test_result:
                         self.connected = True
                         self.connection_url = url
         prompt_hash = hashlib.md5(prompt.encode()).hexdigest()[:16]
         def _background():
+            result = self._make_request("/predict_4", prompt_hash, response)  # ✅ CORRECT - /predict_4 for cache_response
             if result:
                 try:
                     data = json.loads(result)
                     logger.warning(f"[BACKEND] Cache response parsing failed: {e}")
         threading.Thread(target=_background, daemon=True).start()
+        return None
     def get_cached_response(self, prompt: str) -> Optional[str]:
         """Try to get cached response (synchronous) with enhanced error handling"""
         prompt_hash = hashlib.md5(prompt.encode()).hexdigest()[:16]
+        result = self._make_request("/predict_5", prompt_hash)
         if result:
             try:
                 data = json.loads(result)
+                if data.get("success") and data.get("found"):
                     logger.info(f"[BACKEND] ⚡ CACHE HIT: {prompt_hash}")
                     return data["response"]
             except Exception as e:
     def charge_tokens_async(self, username: str, duration_ms: float):
         """Calculate token cost asynchronously with enhanced error handling"""
         def _background():
+            result = self._make_request("/predict_6", username, duration_ms)
             if result:
                 try:
                     data = json.loads(result)
             yield history
             time.sleep(0.5)  # Brief pause for user to see the message
+        # Prepare input with optimized formatting
+        full_input = f"{ghost_context}\n{prompt}" if ghost_context else prompt
+        formatted_prompt = f"User: {full_input}\nAssistant: "
+        # Try backend cache first
         cached_response = backend.get_cached_response(full_input)
         if cached_response:
             logger.info("⚡ BACKEND CACHE HIT - Instant response!")
             yield history
             return
         # Add User Message & Empty Assistant Message for Streaming
         history.append({"role": "user", "content": prompt})
         history.append({"role": "assistant", "content": "..."})
         yield history
+        cache_key = f"{ghost_context}:{prompt}"  # ← ADD THIS LINE
         response_text = ""
         start_time = time.time()
         tokens_count = 0
 # Session ID for token tracking
 session_id = "turtle170"
+with gr.Blocks(title="ZeroEngine V0.2", css=CUSTOM_CSS) as demo:
+    # CSS applied in Blocks constructor for Gradio 6.5.0
     gr.LoginButton()
     # Header with Token Display
     with gr.Row():