Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -65,7 +65,7 @@ class BackendProcessor:
|
|
| 65 |
self.client = Client(url, **client_kwargs)
|
| 66 |
|
| 67 |
# Test connection with a simple API call
|
| 68 |
-
test_result = self.client.predict(
|
| 69 |
response_time = time.time() - start_time
|
| 70 |
|
| 71 |
if test_result:
|
|
@@ -77,17 +77,17 @@ class BackendProcessor:
|
|
| 77 |
if len(self.response_times) > 10:
|
| 78 |
self.response_times.pop(0)
|
| 79 |
|
| 80 |
-
logger.info(f"[BACKEND]
|
| 81 |
return True
|
| 82 |
|
| 83 |
except Exception as e:
|
| 84 |
-
logger.warning(f"[BACKEND]
|
| 85 |
self.connected = False
|
| 86 |
|
| 87 |
if attempt < MAX_RETRIES - 1:
|
| 88 |
time.sleep(RETRY_DELAY)
|
| 89 |
|
| 90 |
-
logger.error(f"[BACKEND]
|
| 91 |
self.connected = False
|
| 92 |
return False
|
| 93 |
|
|
@@ -112,7 +112,7 @@ class BackendProcessor:
|
|
| 112 |
# Test tokenization
|
| 113 |
try:
|
| 114 |
start_time = time.time()
|
| 115 |
-
result = self.client.predict(
|
| 116 |
response_time = time.time() - start_time
|
| 117 |
endpoints_status["tokenize"] = {"status": "ok", "response_time": response_time}
|
| 118 |
except Exception as e:
|
|
@@ -121,7 +121,7 @@ class BackendProcessor:
|
|
| 121 |
# Test cache stats
|
| 122 |
try:
|
| 123 |
start_time = time.time()
|
| 124 |
-
result = self.client.predict(api_name="
|
| 125 |
response_time = time.time() - start_time
|
| 126 |
data = json.loads(result)
|
| 127 |
if data.get("success"):
|
|
@@ -194,7 +194,7 @@ class BackendProcessor:
|
|
| 194 |
return
|
| 195 |
|
| 196 |
def _background():
|
| 197 |
-
result = self._make_request("
|
| 198 |
if result:
|
| 199 |
try:
|
| 200 |
data = json.loads(result)
|
|
@@ -210,7 +210,7 @@ class BackendProcessor:
|
|
| 210 |
prompt_hash = hashlib.md5(prompt.encode()).hexdigest()[:16]
|
| 211 |
|
| 212 |
def _background():
|
| 213 |
-
result = self._make_request("
|
| 214 |
if result:
|
| 215 |
try:
|
| 216 |
data = json.loads(result)
|
|
@@ -225,7 +225,7 @@ class BackendProcessor:
|
|
| 225 |
"""Try to get cached response (synchronous) with enhanced error handling"""
|
| 226 |
prompt_hash = hashlib.md5(prompt.encode()).hexdigest()[:16]
|
| 227 |
|
| 228 |
-
result = self._make_request("
|
| 229 |
if result:
|
| 230 |
try:
|
| 231 |
data = json.loads(result)
|
|
@@ -240,7 +240,7 @@ class BackendProcessor:
|
|
| 240 |
def charge_tokens_async(self, username: str, duration_ms: float):
|
| 241 |
"""Calculate token cost asynchronously with enhanced error handling"""
|
| 242 |
def _background():
|
| 243 |
-
result = self._make_request("
|
| 244 |
if result:
|
| 245 |
try:
|
| 246 |
data = json.loads(result)
|
|
|
|
| 65 |
self.client = Client(url, **client_kwargs)
|
| 66 |
|
| 67 |
# Test connection with a simple API call
|
| 68 |
+
test_result = self.client.predict(api_name="predict")
|
| 69 |
response_time = time.time() - start_time
|
| 70 |
|
| 71 |
if test_result:
|
|
|
|
| 77 |
if len(self.response_times) > 10:
|
| 78 |
self.response_times.pop(0)
|
| 79 |
|
| 80 |
+
logger.info(f"[BACKEND] Connected to {url} (attempt {attempt+1}, {response_time:.2f}s)")
|
| 81 |
return True
|
| 82 |
|
| 83 |
except Exception as e:
|
| 84 |
+
logger.warning(f"[BACKEND] Connection failed to {url} (attempt {attempt+1}): {e}")
|
| 85 |
self.connected = False
|
| 86 |
|
| 87 |
if attempt < MAX_RETRIES - 1:
|
| 88 |
time.sleep(RETRY_DELAY)
|
| 89 |
|
| 90 |
+
logger.error(f"[BACKEND] All connection attempts failed after {MAX_RETRIES} tries")
|
| 91 |
self.connected = False
|
| 92 |
return False
|
| 93 |
|
|
|
|
| 112 |
# Test tokenization
|
| 113 |
try:
|
| 114 |
start_time = time.time()
|
| 115 |
+
result = self.client.predict(api_name="predict")
|
| 116 |
response_time = time.time() - start_time
|
| 117 |
endpoints_status["tokenize"] = {"status": "ok", "response_time": response_time}
|
| 118 |
except Exception as e:
|
|
|
|
| 121 |
# Test cache stats
|
| 122 |
try:
|
| 123 |
start_time = time.time()
|
| 124 |
+
result = self.client.predict(api_name="predict_6")
|
| 125 |
response_time = time.time() - start_time
|
| 126 |
data = json.loads(result)
|
| 127 |
if data.get("success"):
|
|
|
|
| 194 |
return
|
| 195 |
|
| 196 |
def _background():
|
| 197 |
+
result = self._make_request("predict", text)
|
| 198 |
if result:
|
| 199 |
try:
|
| 200 |
data = json.loads(result)
|
|
|
|
| 210 |
prompt_hash = hashlib.md5(prompt.encode()).hexdigest()[:16]
|
| 211 |
|
| 212 |
def _background():
|
| 213 |
+
result = self._make_request("predict_2", prompt_hash, response)
|
| 214 |
if result:
|
| 215 |
try:
|
| 216 |
data = json.loads(result)
|
|
|
|
| 225 |
"""Try to get cached response (synchronous) with enhanced error handling"""
|
| 226 |
prompt_hash = hashlib.md5(prompt.encode()).hexdigest()[:16]
|
| 227 |
|
| 228 |
+
result = self._make_request("predict_3", prompt_hash)
|
| 229 |
if result:
|
| 230 |
try:
|
| 231 |
data = json.loads(result)
|
|
|
|
| 240 |
def charge_tokens_async(self, username: str, duration_ms: float):
|
| 241 |
"""Calculate token cost asynchronously with enhanced error handling"""
|
| 242 |
def _background():
|
| 243 |
+
result = self._make_request("predict_4", username, duration_ms)
|
| 244 |
if result:
|
| 245 |
try:
|
| 246 |
data = json.loads(result)
|