Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -14,8 +14,8 @@ from gradio_client import Client
|
|
| 14 |
import hashlib
|
| 15 |
|
| 16 |
# Backend processor connection
|
| 17 |
-
BACKEND_URL = "
|
| 18 |
-
BACKEND_FALLBACK_URL =
|
| 19 |
CONNECTION_TIMEOUT = 60 # seconds
|
| 20 |
MAX_RETRIES = 3
|
| 21 |
RETRY_DELAY = 5 # seconds
|
|
@@ -47,8 +47,8 @@ class BackendProcessor:
|
|
| 47 |
if current_time - self.last_connect_attempt < self.connect_cooldown and not force_reconnect:
|
| 48 |
return False
|
| 49 |
|
| 50 |
-
# Try
|
| 51 |
-
urls_to_try = [BACKEND_URL
|
| 52 |
|
| 53 |
for attempt in range(MAX_RETRIES):
|
| 54 |
for url in urls_to_try:
|
|
@@ -65,9 +65,11 @@ class BackendProcessor:
|
|
| 65 |
self.client = Client(url, **client_kwargs)
|
| 66 |
|
| 67 |
# Test connection with a simple API call
|
| 68 |
-
test_result = self.client.predict(api_name="//predict")
|
| 69 |
response_time = time.time() - start_time
|
| 70 |
|
|
|
|
|
|
|
| 71 |
if test_result:
|
| 72 |
self.connected = True
|
| 73 |
self.connection_url = url
|
|
@@ -214,7 +216,7 @@ class BackendProcessor:
|
|
| 214 |
prompt_hash = hashlib.md5(prompt.encode()).hexdigest()[:16]
|
| 215 |
|
| 216 |
def _background():
|
| 217 |
-
result = self._make_request("/
|
| 218 |
if result:
|
| 219 |
try:
|
| 220 |
data = json.loads(result)
|
|
@@ -224,16 +226,17 @@ class BackendProcessor:
|
|
| 224 |
logger.warning(f"[BACKEND] Cache response parsing failed: {e}")
|
| 225 |
|
| 226 |
threading.Thread(target=_background, daemon=True).start()
|
|
|
|
| 227 |
|
| 228 |
def get_cached_response(self, prompt: str) -> Optional[str]:
|
| 229 |
"""Try to get cached response (synchronous) with enhanced error handling"""
|
| 230 |
prompt_hash = hashlib.md5(prompt.encode()).hexdigest()[:16]
|
| 231 |
|
| 232 |
-
result = self._make_request("/
|
| 233 |
if result:
|
| 234 |
try:
|
| 235 |
data = json.loads(result)
|
| 236 |
-
if data.get("success"):
|
| 237 |
logger.info(f"[BACKEND] ⚡ CACHE HIT: {prompt_hash}")
|
| 238 |
return data["response"]
|
| 239 |
except Exception as e:
|
|
@@ -244,7 +247,7 @@ class BackendProcessor:
|
|
| 244 |
def charge_tokens_async(self, username: str, duration_ms: float):
|
| 245 |
"""Calculate token cost asynchronously with enhanced error handling"""
|
| 246 |
def _background():
|
| 247 |
-
result = self._make_request("/
|
| 248 |
if result:
|
| 249 |
try:
|
| 250 |
data = json.loads(result)
|
|
@@ -1442,7 +1445,11 @@ class ZeroEngine:
|
|
| 1442 |
yield history
|
| 1443 |
time.sleep(0.5) # Brief pause for user to see the message
|
| 1444 |
|
| 1445 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1446 |
cached_response = backend.get_cached_response(full_input)
|
| 1447 |
if cached_response:
|
| 1448 |
logger.info("⚡ BACKEND CACHE HIT - Instant response!")
|
|
@@ -1451,15 +1458,13 @@ class ZeroEngine:
|
|
| 1451 |
yield history
|
| 1452 |
return
|
| 1453 |
|
| 1454 |
-
# Prepare input with optimized formatting
|
| 1455 |
-
full_input = f"{ghost_context}\n{prompt}" if ghost_context else prompt
|
| 1456 |
-
formatted_prompt = f"User: {full_input}\nAssistant: "
|
| 1457 |
-
|
| 1458 |
# Add User Message & Empty Assistant Message for Streaming
|
| 1459 |
history.append({"role": "user", "content": prompt})
|
| 1460 |
history.append({"role": "assistant", "content": "..."})
|
| 1461 |
yield history
|
| 1462 |
|
|
|
|
|
|
|
| 1463 |
response_text = ""
|
| 1464 |
start_time = time.time()
|
| 1465 |
tokens_count = 0
|
|
@@ -1652,10 +1657,8 @@ kernel = ZeroEngine()
|
|
| 1652 |
# Session ID for token tracking
|
| 1653 |
session_id = "turtle170"
|
| 1654 |
|
| 1655 |
-
with gr.Blocks(title="ZeroEngine V0.2") as demo:
|
| 1656 |
-
|
| 1657 |
-
if hasattr(demo, 'css'):
|
| 1658 |
-
demo.css = demo_css
|
| 1659 |
gr.LoginButton()
|
| 1660 |
# Header with Token Display
|
| 1661 |
with gr.Row():
|
|
|
|
| 14 |
import hashlib
|
| 15 |
|
| 16 |
# Backend processor connection
|
| 17 |
+
BACKEND_URL = "turtle170/ZeroEngine-Backend"
|
| 18 |
+
BACKEND_FALLBACK_URL = None # Not needed
|
| 19 |
CONNECTION_TIMEOUT = 60 # seconds
|
| 20 |
MAX_RETRIES = 3
|
| 21 |
RETRY_DELAY = 5 # seconds
|
|
|
|
| 47 |
if current_time - self.last_connect_attempt < self.connect_cooldown and not force_reconnect:
|
| 48 |
return False
|
| 49 |
|
| 50 |
+
# Try simple repo ID connection
|
| 51 |
+
urls_to_try = [BACKEND_URL]
|
| 52 |
|
| 53 |
for attempt in range(MAX_RETRIES):
|
| 54 |
for url in urls_to_try:
|
|
|
|
| 65 |
self.client = Client(url, **client_kwargs)
|
| 66 |
|
| 67 |
# Test connection with a simple API call
|
| 68 |
+
test_result = self.client.predict("test connection", api_name="//predict")
|
| 69 |
response_time = time.time() - start_time
|
| 70 |
|
| 71 |
+
logger.info(f"[BACKEND] Connection test response: {test_result}")
|
| 72 |
+
|
| 73 |
if test_result:
|
| 74 |
self.connected = True
|
| 75 |
self.connection_url = url
|
|
|
|
| 216 |
prompt_hash = hashlib.md5(prompt.encode()).hexdigest()[:16]
|
| 217 |
|
| 218 |
def _background():
|
| 219 |
+
result = self._make_request("/predict_4", prompt_hash, response) # ✅ CORRECT - /predict_4 for cache_response
|
| 220 |
if result:
|
| 221 |
try:
|
| 222 |
data = json.loads(result)
|
|
|
|
| 226 |
logger.warning(f"[BACKEND] Cache response parsing failed: {e}")
|
| 227 |
|
| 228 |
threading.Thread(target=_background, daemon=True).start()
|
| 229 |
+
return None
|
| 230 |
|
| 231 |
def get_cached_response(self, prompt: str) -> Optional[str]:
|
| 232 |
"""Try to get cached response (synchronous) with enhanced error handling"""
|
| 233 |
prompt_hash = hashlib.md5(prompt.encode()).hexdigest()[:16]
|
| 234 |
|
| 235 |
+
result = self._make_request("/predict_5", prompt_hash)
|
| 236 |
if result:
|
| 237 |
try:
|
| 238 |
data = json.loads(result)
|
| 239 |
+
if data.get("success") and data.get("found"):
|
| 240 |
logger.info(f"[BACKEND] ⚡ CACHE HIT: {prompt_hash}")
|
| 241 |
return data["response"]
|
| 242 |
except Exception as e:
|
|
|
|
| 247 |
def charge_tokens_async(self, username: str, duration_ms: float):
|
| 248 |
"""Calculate token cost asynchronously with enhanced error handling"""
|
| 249 |
def _background():
|
| 250 |
+
result = self._make_request("/predict_6", username, duration_ms)
|
| 251 |
if result:
|
| 252 |
try:
|
| 253 |
data = json.loads(result)
|
|
|
|
| 1445 |
yield history
|
| 1446 |
time.sleep(0.5) # Brief pause for user to see the message
|
| 1447 |
|
| 1448 |
+
# Prepare input with optimized formatting
|
| 1449 |
+
full_input = f"{ghost_context}\n{prompt}" if ghost_context else prompt
|
| 1450 |
+
formatted_prompt = f"User: {full_input}\nAssistant: "
|
| 1451 |
+
|
| 1452 |
+
# Try backend cache first
|
| 1453 |
cached_response = backend.get_cached_response(full_input)
|
| 1454 |
if cached_response:
|
| 1455 |
logger.info("⚡ BACKEND CACHE HIT - Instant response!")
|
|
|
|
| 1458 |
yield history
|
| 1459 |
return
|
| 1460 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1461 |
# Add User Message & Empty Assistant Message for Streaming
|
| 1462 |
history.append({"role": "user", "content": prompt})
|
| 1463 |
history.append({"role": "assistant", "content": "..."})
|
| 1464 |
yield history
|
| 1465 |
|
| 1466 |
+
cache_key = f"{ghost_context}:{prompt}" # ← ADD THIS LINE
|
| 1467 |
+
|
| 1468 |
response_text = ""
|
| 1469 |
start_time = time.time()
|
| 1470 |
tokens_count = 0
|
|
|
|
| 1657 |
# Session ID for token tracking
|
| 1658 |
session_id = "turtle170"
|
| 1659 |
|
| 1660 |
+
with gr.Blocks(title="ZeroEngine V0.2", css=CUSTOM_CSS) as demo:
|
| 1661 |
+
# CSS applied in Blocks constructor for Gradio 6.5.0
|
|
|
|
|
|
|
| 1662 |
gr.LoginButton()
|
| 1663 |
# Header with Token Display
|
| 1664 |
with gr.Row():
|