turtle170 commited on
Commit
8806e23
·
verified ·
1 Parent(s): 4166b44

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -18
app.py CHANGED
@@ -14,8 +14,8 @@ from gradio_client import Client
14
  import hashlib
15
 
16
  # Backend processor connection
17
- BACKEND_URL = "https://turtle170-ZeroEngine-Backend.hf.space"
18
- BACKEND_FALLBACK_URL = "turtle170/ZeroEngine-Backend" # Fallback to repo ID
19
  CONNECTION_TIMEOUT = 60 # seconds
20
  MAX_RETRIES = 3
21
  RETRY_DELAY = 5 # seconds
@@ -47,8 +47,8 @@ class BackendProcessor:
47
  if current_time - self.last_connect_attempt < self.connect_cooldown and not force_reconnect:
48
  return False
49
 
50
- # Try multiple connection strategies
51
- urls_to_try = [BACKEND_URL, BACKEND_FALLBACK_URL] if BACKEND_URL != BACKEND_FALLBACK_URL else [BACKEND_URL]
52
 
53
  for attempt in range(MAX_RETRIES):
54
  for url in urls_to_try:
@@ -65,9 +65,11 @@ class BackendProcessor:
65
  self.client = Client(url, **client_kwargs)
66
 
67
  # Test connection with a simple API call
68
- test_result = self.client.predict(api_name="//predict")
69
  response_time = time.time() - start_time
70
 
 
 
71
  if test_result:
72
  self.connected = True
73
  self.connection_url = url
@@ -214,7 +216,7 @@ class BackendProcessor:
214
  prompt_hash = hashlib.md5(prompt.encode()).hexdigest()[:16]
215
 
216
  def _background():
217
- result = self._make_request("/predict_2", prompt_hash, response)
218
  if result:
219
  try:
220
  data = json.loads(result)
@@ -224,16 +226,17 @@ class BackendProcessor:
224
  logger.warning(f"[BACKEND] Cache response parsing failed: {e}")
225
 
226
  threading.Thread(target=_background, daemon=True).start()
 
227
 
228
  def get_cached_response(self, prompt: str) -> Optional[str]:
229
  """Try to get cached response (synchronous) with enhanced error handling"""
230
  prompt_hash = hashlib.md5(prompt.encode()).hexdigest()[:16]
231
 
232
- result = self._make_request("/predict_3", prompt_hash)
233
  if result:
234
  try:
235
  data = json.loads(result)
236
- if data.get("success"):
237
  logger.info(f"[BACKEND] ⚡ CACHE HIT: {prompt_hash}")
238
  return data["response"]
239
  except Exception as e:
@@ -244,7 +247,7 @@ class BackendProcessor:
244
  def charge_tokens_async(self, username: str, duration_ms: float):
245
  """Calculate token cost asynchronously with enhanced error handling"""
246
  def _background():
247
- result = self._make_request("/predict_4", username, duration_ms)
248
  if result:
249
  try:
250
  data = json.loads(result)
@@ -1442,7 +1445,11 @@ class ZeroEngine:
1442
  yield history
1443
  time.sleep(0.5) # Brief pause for user to see the message
1444
 
1445
- # Check prompt cache for exact matches (instant response)
 
 
 
 
1446
  cached_response = backend.get_cached_response(full_input)
1447
  if cached_response:
1448
  logger.info("⚡ BACKEND CACHE HIT - Instant response!")
@@ -1451,15 +1458,13 @@ class ZeroEngine:
1451
  yield history
1452
  return
1453
 
1454
- # Prepare input with optimized formatting
1455
- full_input = f"{ghost_context}\n{prompt}" if ghost_context else prompt
1456
- formatted_prompt = f"User: {full_input}\nAssistant: "
1457
-
1458
  # Add User Message & Empty Assistant Message for Streaming
1459
  history.append({"role": "user", "content": prompt})
1460
  history.append({"role": "assistant", "content": "..."})
1461
  yield history
1462
 
 
 
1463
  response_text = ""
1464
  start_time = time.time()
1465
  tokens_count = 0
@@ -1652,10 +1657,8 @@ kernel = ZeroEngine()
1652
  # Session ID for token tracking
1653
  session_id = "turtle170"
1654
 
1655
- with gr.Blocks(title="ZeroEngine V0.2") as demo:
1656
- demo_css = CUSTOM_CSS
1657
- if hasattr(demo, 'css'):
1658
- demo.css = demo_css
1659
  gr.LoginButton()
1660
  # Header with Token Display
1661
  with gr.Row():
 
14
  import hashlib
15
 
16
  # Backend processor connection
17
+ BACKEND_URL = "turtle170/ZeroEngine-Backend"
18
+ BACKEND_FALLBACK_URL = None # Not needed
19
  CONNECTION_TIMEOUT = 60 # seconds
20
  MAX_RETRIES = 3
21
  RETRY_DELAY = 5 # seconds
 
47
  if current_time - self.last_connect_attempt < self.connect_cooldown and not force_reconnect:
48
  return False
49
 
50
+ # Try simple repo ID connection
51
+ urls_to_try = [BACKEND_URL]
52
 
53
  for attempt in range(MAX_RETRIES):
54
  for url in urls_to_try:
 
65
  self.client = Client(url, **client_kwargs)
66
 
67
  # Test connection with a simple API call
68
+ test_result = self.client.predict("test connection", api_name="//predict")
69
  response_time = time.time() - start_time
70
 
71
+ logger.info(f"[BACKEND] Connection test response: {test_result}")
72
+
73
  if test_result:
74
  self.connected = True
75
  self.connection_url = url
 
216
  prompt_hash = hashlib.md5(prompt.encode()).hexdigest()[:16]
217
 
218
  def _background():
219
+ result = self._make_request("/predict_4", prompt_hash, response) # ✅ CORRECT - /predict_4 for cache_response
220
  if result:
221
  try:
222
  data = json.loads(result)
 
226
  logger.warning(f"[BACKEND] Cache response parsing failed: {e}")
227
 
228
  threading.Thread(target=_background, daemon=True).start()
229
+ return None
230
 
231
  def get_cached_response(self, prompt: str) -> Optional[str]:
232
  """Try to get cached response (synchronous) with enhanced error handling"""
233
  prompt_hash = hashlib.md5(prompt.encode()).hexdigest()[:16]
234
 
235
+ result = self._make_request("/predict_5", prompt_hash)
236
  if result:
237
  try:
238
  data = json.loads(result)
239
+ if data.get("success") and data.get("found"):
240
  logger.info(f"[BACKEND] ⚡ CACHE HIT: {prompt_hash}")
241
  return data["response"]
242
  except Exception as e:
 
247
  def charge_tokens_async(self, username: str, duration_ms: float):
248
  """Calculate token cost asynchronously with enhanced error handling"""
249
  def _background():
250
+ result = self._make_request("/predict_6", username, duration_ms)
251
  if result:
252
  try:
253
  data = json.loads(result)
 
1445
  yield history
1446
  time.sleep(0.5) # Brief pause for user to see the message
1447
 
1448
+ # Prepare input with optimized formatting
1449
+ full_input = f"{ghost_context}\n{prompt}" if ghost_context else prompt
1450
+ formatted_prompt = f"User: {full_input}\nAssistant: "
1451
+
1452
+ # Try backend cache first
1453
  cached_response = backend.get_cached_response(full_input)
1454
  if cached_response:
1455
  logger.info("⚡ BACKEND CACHE HIT - Instant response!")
 
1458
  yield history
1459
  return
1460
 
 
 
 
 
1461
  # Add User Message & Empty Assistant Message for Streaming
1462
  history.append({"role": "user", "content": prompt})
1463
  history.append({"role": "assistant", "content": "..."})
1464
  yield history
1465
 
1466
+ cache_key = f"{ghost_context}:{prompt}" # ← ADD THIS LINE
1467
+
1468
  response_text = ""
1469
  start_time = time.time()
1470
  tokens_count = 0
 
1657
  # Session ID for token tracking
1658
  session_id = "turtle170"
1659
 
1660
+ with gr.Blocks(title="ZeroEngine V0.2", css=CUSTOM_CSS) as demo:
1661
+ # CSS applied in Blocks constructor for Gradio 6.5.0
 
 
1662
  gr.LoginButton()
1663
  # Header with Token Display
1664
  with gr.Row():