Spaces:
Sleeping
Sleeping
inital
Browse files- src/main.py +141 -83
src/main.py
CHANGED
|
@@ -16,6 +16,18 @@ from fastapi.security import APIKeyHeader
|
|
| 16 |
|
| 17 |
import httpx
|
| 18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
# Custom UUIDv7 implementation (using correct Unix epoch)
|
| 20 |
def uuid7():
|
| 21 |
"""
|
|
@@ -206,6 +218,23 @@ async def get_initial_data():
|
|
| 206 |
except Exception as e:
|
| 207 |
print(f"β An error occurred during initial data retrieval: {e}")
|
| 208 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
@app.on_event("startup")
|
| 210 |
async def startup_event():
|
| 211 |
# Ensure config and models files exist
|
|
@@ -213,7 +242,10 @@ async def startup_event():
|
|
| 213 |
save_models(get_models())
|
| 214 |
# Load usage stats from config
|
| 215 |
load_usage_stats()
|
|
|
|
| 216 |
asyncio.create_task(get_initial_data())
|
|
|
|
|
|
|
| 217 |
|
| 218 |
# --- UI Endpoints (Login/Dashboard) ---
|
| 219 |
|
|
@@ -1004,30 +1036,30 @@ async def list_models(api_key: dict = Depends(rate_limit_api_key)):
|
|
| 1004 |
|
| 1005 |
@app.post("/api/v1/chat/completions")
|
| 1006 |
async def api_chat_completions(request: Request, api_key: dict = Depends(rate_limit_api_key)):
|
| 1007 |
-
|
| 1008 |
-
|
| 1009 |
-
|
| 1010 |
|
| 1011 |
try:
|
| 1012 |
body = await request.json()
|
| 1013 |
-
|
| 1014 |
|
| 1015 |
model_public_name = body.get("model")
|
| 1016 |
messages = body.get("messages", [])
|
| 1017 |
stream = body.get("stream", False)
|
| 1018 |
|
| 1019 |
-
|
| 1020 |
|
| 1021 |
-
|
| 1022 |
-
|
| 1023 |
|
| 1024 |
if not model_public_name or not messages:
|
| 1025 |
-
|
| 1026 |
raise HTTPException(status_code=400, detail="Missing 'model' or 'messages' in request body.")
|
| 1027 |
|
| 1028 |
# Find model ID from public name
|
| 1029 |
models = get_models()
|
| 1030 |
-
|
| 1031 |
|
| 1032 |
model_id = None
|
| 1033 |
for m in models:
|
|
@@ -1036,13 +1068,13 @@ async def api_chat_completions(request: Request, api_key: dict = Depends(rate_li
|
|
| 1036 |
break
|
| 1037 |
|
| 1038 |
if not model_id:
|
| 1039 |
-
|
| 1040 |
raise HTTPException(
|
| 1041 |
status_code=404,
|
| 1042 |
detail=f"Model '{model_public_name}' not found. Use /api/v1/models to see available models."
|
| 1043 |
)
|
| 1044 |
|
| 1045 |
-
|
| 1046 |
|
| 1047 |
# Log usage
|
| 1048 |
model_usage_stats[model_public_name] += 1
|
|
@@ -1056,48 +1088,48 @@ async def api_chat_completions(request: Request, api_key: dict = Depends(rate_li
|
|
| 1056 |
|
| 1057 |
# Validate prompt is a string and not too large
|
| 1058 |
if not isinstance(prompt, str):
|
| 1059 |
-
|
| 1060 |
raise HTTPException(status_code=400, detail="Message content must be a string.")
|
| 1061 |
|
| 1062 |
if not prompt:
|
| 1063 |
-
|
| 1064 |
raise HTTPException(status_code=400, detail="Last message must have content.")
|
| 1065 |
|
| 1066 |
# Log prompt length for debugging character limit issues
|
| 1067 |
-
|
| 1068 |
-
|
| 1069 |
|
| 1070 |
# Check for reasonable character limit (LMArena appears to have limits)
|
| 1071 |
# Typical limit seems to be around 32K-64K characters based on testing
|
| 1072 |
MAX_PROMPT_LENGTH = 50000 # Conservative estimate
|
| 1073 |
if len(prompt) > MAX_PROMPT_LENGTH:
|
| 1074 |
error_msg = f"Prompt too long ({len(prompt)} characters). LMArena has a character limit of approximately {MAX_PROMPT_LENGTH} characters. Please reduce the message size."
|
| 1075 |
-
|
| 1076 |
raise HTTPException(status_code=400, detail=error_msg)
|
| 1077 |
|
| 1078 |
# Use API key + conversation tracking
|
| 1079 |
api_key_str = api_key["key"]
|
| 1080 |
conversation_id = body.get("conversation_id", f"conv-{uuid.uuid4()}")
|
| 1081 |
|
| 1082 |
-
|
| 1083 |
-
|
| 1084 |
|
| 1085 |
headers = get_request_headers()
|
| 1086 |
-
|
| 1087 |
|
| 1088 |
# Check if conversation exists for this API key
|
| 1089 |
session = chat_sessions[api_key_str].get(conversation_id)
|
| 1090 |
|
| 1091 |
if not session:
|
| 1092 |
-
|
| 1093 |
# New conversation - Generate all IDs at once (like the browser does)
|
| 1094 |
session_id = str(uuid7())
|
| 1095 |
user_msg_id = str(uuid7())
|
| 1096 |
model_msg_id = str(uuid7())
|
| 1097 |
|
| 1098 |
-
|
| 1099 |
-
|
| 1100 |
-
|
| 1101 |
|
| 1102 |
payload = {
|
| 1103 |
"id": session_id,
|
|
@@ -1135,15 +1167,15 @@ async def api_chat_completions(request: Request, api_key: dict = Depends(rate_li
|
|
| 1135 |
"modality": "chat"
|
| 1136 |
}
|
| 1137 |
url = "https://lmarena.ai/nextjs-api/stream/create-evaluation"
|
| 1138 |
-
|
| 1139 |
-
|
| 1140 |
else:
|
| 1141 |
-
|
| 1142 |
# Follow-up message - Generate new message IDs
|
| 1143 |
user_msg_id = str(uuid7())
|
| 1144 |
-
|
| 1145 |
model_msg_id = str(uuid7())
|
| 1146 |
-
|
| 1147 |
|
| 1148 |
# Build full conversation history using stored messages with their original IDs
|
| 1149 |
conversation_messages = []
|
|
@@ -1205,11 +1237,11 @@ async def api_chat_completions(request: Request, api_key: dict = Depends(rate_li
|
|
| 1205 |
"modality": "chat"
|
| 1206 |
}
|
| 1207 |
url = f"https://lmarena.ai/nextjs-api/stream/post-to-evaluation/{session['conversation_id']}"
|
| 1208 |
-
|
| 1209 |
-
|
| 1210 |
|
| 1211 |
-
|
| 1212 |
-
|
| 1213 |
|
| 1214 |
# Handle streaming mode
|
| 1215 |
if stream:
|
|
@@ -1219,9 +1251,9 @@ async def api_chat_completions(request: Request, api_key: dict = Depends(rate_li
|
|
| 1219 |
|
| 1220 |
async with httpx.AsyncClient() as client:
|
| 1221 |
try:
|
| 1222 |
-
|
| 1223 |
async with client.stream('POST', url, json=payload, headers=headers, timeout=120) as response:
|
| 1224 |
-
|
| 1225 |
response.raise_for_status()
|
| 1226 |
|
| 1227 |
async for line in response.aiter_lines():
|
|
@@ -1298,7 +1330,7 @@ async def api_chat_completions(request: Request, api_key: dict = Depends(rate_li
|
|
| 1298 |
chat_sessions[api_key_str][conversation_id]["last_message_id"] = model_msg_id
|
| 1299 |
|
| 1300 |
yield "data: [DONE]\n\n"
|
| 1301 |
-
|
| 1302 |
|
| 1303 |
except httpx.HTTPStatusError as e:
|
| 1304 |
error_msg = f"LMArena API error: {e.response.status_code}"
|
|
@@ -1326,17 +1358,17 @@ async def api_chat_completions(request: Request, api_key: dict = Depends(rate_li
|
|
| 1326 |
# Handle non-streaming mode (original code)
|
| 1327 |
async with httpx.AsyncClient() as client:
|
| 1328 |
try:
|
| 1329 |
-
|
| 1330 |
response = await client.post(url, json=payload, headers=headers, timeout=120)
|
| 1331 |
|
| 1332 |
-
|
| 1333 |
-
|
| 1334 |
-
|
| 1335 |
|
| 1336 |
response.raise_for_status()
|
| 1337 |
|
| 1338 |
-
|
| 1339 |
-
|
| 1340 |
|
| 1341 |
# Process response in lmarena format
|
| 1342 |
# Format: a0:"text chunk" for content, ad:{...} for metadata
|
|
@@ -1346,7 +1378,7 @@ async def api_chat_completions(request: Request, api_key: dict = Depends(rate_li
|
|
| 1346 |
text_chunks_found = 0
|
| 1347 |
metadata_found = 0
|
| 1348 |
|
| 1349 |
-
|
| 1350 |
|
| 1351 |
error_message = None
|
| 1352 |
for line in response.text.splitlines():
|
|
@@ -1364,9 +1396,9 @@ async def api_chat_completions(request: Request, api_key: dict = Depends(rate_li
|
|
| 1364 |
text_chunk = json.loads(chunk_data)
|
| 1365 |
response_text += text_chunk
|
| 1366 |
if text_chunks_found <= 3: # Log first 3 chunks
|
| 1367 |
-
|
| 1368 |
except json.JSONDecodeError as e:
|
| 1369 |
-
|
| 1370 |
continue
|
| 1371 |
|
| 1372 |
# Parse error messages: a3:"An error occurred"
|
|
@@ -1374,9 +1406,9 @@ async def api_chat_completions(request: Request, api_key: dict = Depends(rate_li
|
|
| 1374 |
error_data = line[3:] # Remove "a3:" prefix
|
| 1375 |
try:
|
| 1376 |
error_message = json.loads(error_data)
|
| 1377 |
-
|
| 1378 |
except json.JSONDecodeError as e:
|
| 1379 |
-
|
| 1380 |
error_message = error_data
|
| 1381 |
|
| 1382 |
# Parse metadata: ad:{"finishReason":"stop"}
|
|
@@ -1386,34 +1418,48 @@ async def api_chat_completions(request: Request, api_key: dict = Depends(rate_li
|
|
| 1386 |
try:
|
| 1387 |
metadata = json.loads(metadata_data)
|
| 1388 |
finish_reason = metadata.get("finishReason")
|
| 1389 |
-
|
| 1390 |
except json.JSONDecodeError as e:
|
| 1391 |
-
|
| 1392 |
continue
|
| 1393 |
elif line.strip(): # Non-empty line that doesn't match expected format
|
| 1394 |
if line_count <= 5: # Log first 5 unexpected lines
|
| 1395 |
-
|
| 1396 |
-
|
| 1397 |
-
|
| 1398 |
-
|
| 1399 |
-
|
| 1400 |
-
|
| 1401 |
-
|
| 1402 |
-
|
| 1403 |
|
| 1404 |
if not response_text:
|
| 1405 |
-
|
| 1406 |
-
|
| 1407 |
if error_message:
|
| 1408 |
-
error_detail = f"LMArena API
|
| 1409 |
-
print(f"β
|
| 1410 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1411 |
else:
|
| 1412 |
error_detail = "LMArena API returned empty response. This could be due to: invalid auth token, expired cf_clearance, model unavailable, or API rate limiting."
|
| 1413 |
-
|
| 1414 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1415 |
else:
|
| 1416 |
-
|
| 1417 |
|
| 1418 |
# Update session - Store message history with IDs
|
| 1419 |
if not session:
|
|
@@ -1425,7 +1471,7 @@ async def api_chat_completions(request: Request, api_key: dict = Depends(rate_li
|
|
| 1425 |
{"id": model_msg_id, "role": "assistant", "content": response_text.strip()}
|
| 1426 |
]
|
| 1427 |
}
|
| 1428 |
-
|
| 1429 |
else:
|
| 1430 |
# Append new messages to history
|
| 1431 |
chat_sessions[api_key_str][conversation_id]["messages"].append(
|
|
@@ -1434,7 +1480,7 @@ async def api_chat_completions(request: Request, api_key: dict = Depends(rate_li
|
|
| 1434 |
chat_sessions[api_key_str][conversation_id]["messages"].append(
|
| 1435 |
{"id": model_msg_id, "role": "assistant", "content": response_text.strip()}
|
| 1436 |
)
|
| 1437 |
-
|
| 1438 |
|
| 1439 |
final_response = {
|
| 1440 |
"id": f"chatcmpl-{uuid.uuid4()}",
|
|
@@ -1457,8 +1503,8 @@ async def api_chat_completions(request: Request, api_key: dict = Depends(rate_li
|
|
| 1457 |
}
|
| 1458 |
}
|
| 1459 |
|
| 1460 |
-
|
| 1461 |
-
|
| 1462 |
|
| 1463 |
return final_response
|
| 1464 |
|
|
@@ -1472,28 +1518,33 @@ async def api_chat_completions(request: Request, api_key: dict = Depends(rate_li
|
|
| 1472 |
print(f"\nβ HTTP STATUS ERROR")
|
| 1473 |
print(f"π Error detail: {error_detail}")
|
| 1474 |
print(f"π€ Request URL: {url}")
|
| 1475 |
-
|
| 1476 |
-
|
| 1477 |
print("="*80 + "\n")
|
| 1478 |
|
| 1479 |
-
#
|
| 1480 |
-
if e.response.status_code == 429
|
| 1481 |
-
|
| 1482 |
-
|
| 1483 |
-
|
| 1484 |
-
|
| 1485 |
-
|
| 1486 |
-
|
| 1487 |
-
|
| 1488 |
-
|
| 1489 |
-
raise HTTPException(status_code=502, detail=error_detail)
|
| 1490 |
|
| 1491 |
except httpx.TimeoutException as e:
|
| 1492 |
print(f"\nβ±οΈ TIMEOUT ERROR")
|
| 1493 |
print(f"π Request timed out after 120 seconds")
|
| 1494 |
print(f"π€ Request URL: {url}")
|
| 1495 |
print("="*80 + "\n")
|
| 1496 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1497 |
|
| 1498 |
except Exception as e:
|
| 1499 |
print(f"\nβ UNEXPECTED ERROR IN HTTP CLIENT")
|
|
@@ -1501,7 +1552,14 @@ async def api_chat_completions(request: Request, api_key: dict = Depends(rate_li
|
|
| 1501 |
print(f"π Error message: {str(e)}")
|
| 1502 |
print(f"π€ Request URL: {url}")
|
| 1503 |
print("="*80 + "\n")
|
| 1504 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1505 |
|
| 1506 |
except HTTPException:
|
| 1507 |
raise
|
|
|
|
| 16 |
|
| 17 |
import httpx
|
| 18 |
|
| 19 |
+
# ============================================================
|
| 20 |
+
# DEBUG CONFIGURATION
|
| 21 |
+
# ============================================================
|
| 22 |
+
# Set to True for detailed logging, False for minimal logging
|
| 23 |
+
DEBUG = False
|
| 24 |
+
# ============================================================
|
| 25 |
+
|
| 26 |
+
def debug_print(*args, **kwargs):
|
| 27 |
+
"""Print debug messages only if DEBUG is True"""
|
| 28 |
+
if DEBUG:
|
| 29 |
+
print(*args, **kwargs)
|
| 30 |
+
|
| 31 |
# Custom UUIDv7 implementation (using correct Unix epoch)
|
| 32 |
def uuid7():
|
| 33 |
"""
|
|
|
|
| 218 |
except Exception as e:
|
| 219 |
print(f"β An error occurred during initial data retrieval: {e}")
|
| 220 |
|
| 221 |
+
async def periodic_refresh_task():
|
| 222 |
+
"""Background task to refresh cf_clearance and models every 30 minutes"""
|
| 223 |
+
while True:
|
| 224 |
+
try:
|
| 225 |
+
# Wait 30 minutes (1800 seconds)
|
| 226 |
+
await asyncio.sleep(1800)
|
| 227 |
+
print("\n" + "="*60)
|
| 228 |
+
print("π Starting scheduled 30-minute refresh...")
|
| 229 |
+
print("="*60)
|
| 230 |
+
await get_initial_data()
|
| 231 |
+
print("β
Scheduled refresh completed")
|
| 232 |
+
print("="*60 + "\n")
|
| 233 |
+
except Exception as e:
|
| 234 |
+
print(f"β Error in periodic refresh task: {e}")
|
| 235 |
+
# Continue the loop even if there's an error
|
| 236 |
+
continue
|
| 237 |
+
|
| 238 |
@app.on_event("startup")
|
| 239 |
async def startup_event():
|
| 240 |
# Ensure config and models files exist
|
|
|
|
| 242 |
save_models(get_models())
|
| 243 |
# Load usage stats from config
|
| 244 |
load_usage_stats()
|
| 245 |
+
# Start initial data fetch
|
| 246 |
asyncio.create_task(get_initial_data())
|
| 247 |
+
# Start periodic refresh task (every 30 minutes)
|
| 248 |
+
asyncio.create_task(periodic_refresh_task())
|
| 249 |
|
| 250 |
# --- UI Endpoints (Login/Dashboard) ---
|
| 251 |
|
|
|
|
| 1036 |
|
| 1037 |
@app.post("/api/v1/chat/completions")
|
| 1038 |
async def api_chat_completions(request: Request, api_key: dict = Depends(rate_limit_api_key)):
|
| 1039 |
+
debug_print("\n" + "="*80)
|
| 1040 |
+
debug_print("π΅ NEW API REQUEST RECEIVED")
|
| 1041 |
+
debug_print("="*80)
|
| 1042 |
|
| 1043 |
try:
|
| 1044 |
body = await request.json()
|
| 1045 |
+
debug_print(f"π₯ Request body keys: {list(body.keys())}")
|
| 1046 |
|
| 1047 |
model_public_name = body.get("model")
|
| 1048 |
messages = body.get("messages", [])
|
| 1049 |
stream = body.get("stream", False)
|
| 1050 |
|
| 1051 |
+
debug_print(f"π Stream mode: {stream}")
|
| 1052 |
|
| 1053 |
+
debug_print(f"π€ Requested model: {model_public_name}")
|
| 1054 |
+
debug_print(f"π¬ Number of messages: {len(messages)}")
|
| 1055 |
|
| 1056 |
if not model_public_name or not messages:
|
| 1057 |
+
debug_print("β Missing model or messages in request")
|
| 1058 |
raise HTTPException(status_code=400, detail="Missing 'model' or 'messages' in request body.")
|
| 1059 |
|
| 1060 |
# Find model ID from public name
|
| 1061 |
models = get_models()
|
| 1062 |
+
debug_print(f"π Total models loaded: {len(models)}")
|
| 1063 |
|
| 1064 |
model_id = None
|
| 1065 |
for m in models:
|
|
|
|
| 1068 |
break
|
| 1069 |
|
| 1070 |
if not model_id:
|
| 1071 |
+
debug_print(f"β Model '{model_public_name}' not found in model list")
|
| 1072 |
raise HTTPException(
|
| 1073 |
status_code=404,
|
| 1074 |
detail=f"Model '{model_public_name}' not found. Use /api/v1/models to see available models."
|
| 1075 |
)
|
| 1076 |
|
| 1077 |
+
debug_print(f"β
Found model ID: {model_id}")
|
| 1078 |
|
| 1079 |
# Log usage
|
| 1080 |
model_usage_stats[model_public_name] += 1
|
|
|
|
| 1088 |
|
| 1089 |
# Validate prompt is a string and not too large
|
| 1090 |
if not isinstance(prompt, str):
|
| 1091 |
+
debug_print("β Prompt content must be a string")
|
| 1092 |
raise HTTPException(status_code=400, detail="Message content must be a string.")
|
| 1093 |
|
| 1094 |
if not prompt:
|
| 1095 |
+
debug_print("β Last message has no content")
|
| 1096 |
raise HTTPException(status_code=400, detail="Last message must have content.")
|
| 1097 |
|
| 1098 |
# Log prompt length for debugging character limit issues
|
| 1099 |
+
debug_print(f"π User prompt length: {len(prompt)} characters")
|
| 1100 |
+
debug_print(f"π User prompt preview: {prompt[:100]}..." if len(prompt) > 100 else f"π User prompt: {prompt}")
|
| 1101 |
|
| 1102 |
# Check for reasonable character limit (LMArena appears to have limits)
|
| 1103 |
# Typical limit seems to be around 32K-64K characters based on testing
|
| 1104 |
MAX_PROMPT_LENGTH = 50000 # Conservative estimate
|
| 1105 |
if len(prompt) > MAX_PROMPT_LENGTH:
|
| 1106 |
error_msg = f"Prompt too long ({len(prompt)} characters). LMArena has a character limit of approximately {MAX_PROMPT_LENGTH} characters. Please reduce the message size."
|
| 1107 |
+
debug_print(f"β {error_msg}")
|
| 1108 |
raise HTTPException(status_code=400, detail=error_msg)
|
| 1109 |
|
| 1110 |
# Use API key + conversation tracking
|
| 1111 |
api_key_str = api_key["key"]
|
| 1112 |
conversation_id = body.get("conversation_id", f"conv-{uuid.uuid4()}")
|
| 1113 |
|
| 1114 |
+
debug_print(f"π API Key: {api_key_str[:20]}...")
|
| 1115 |
+
debug_print(f"π Conversation ID: {conversation_id}")
|
| 1116 |
|
| 1117 |
headers = get_request_headers()
|
| 1118 |
+
debug_print(f"π Headers prepared (auth token length: {len(headers.get('Cookie', '').split('arena-auth-prod-v1=')[-1].split(';')[0])} chars)")
|
| 1119 |
|
| 1120 |
# Check if conversation exists for this API key
|
| 1121 |
session = chat_sessions[api_key_str].get(conversation_id)
|
| 1122 |
|
| 1123 |
if not session:
|
| 1124 |
+
debug_print("π Creating NEW conversation session")
|
| 1125 |
# New conversation - Generate all IDs at once (like the browser does)
|
| 1126 |
session_id = str(uuid7())
|
| 1127 |
user_msg_id = str(uuid7())
|
| 1128 |
model_msg_id = str(uuid7())
|
| 1129 |
|
| 1130 |
+
debug_print(f"π Generated session_id: {session_id}")
|
| 1131 |
+
debug_print(f"π€ Generated user_msg_id: {user_msg_id}")
|
| 1132 |
+
debug_print(f"π€ Generated model_msg_id: {model_msg_id}")
|
| 1133 |
|
| 1134 |
payload = {
|
| 1135 |
"id": session_id,
|
|
|
|
| 1167 |
"modality": "chat"
|
| 1168 |
}
|
| 1169 |
url = "https://lmarena.ai/nextjs-api/stream/create-evaluation"
|
| 1170 |
+
debug_print(f"π€ Target URL: {url}")
|
| 1171 |
+
debug_print(f"π¦ Payload structure: {len(payload['messages'])} messages")
|
| 1172 |
else:
|
| 1173 |
+
debug_print("π Using EXISTING conversation session")
|
| 1174 |
# Follow-up message - Generate new message IDs
|
| 1175 |
user_msg_id = str(uuid7())
|
| 1176 |
+
debug_print(f"π€ Generated followup user_msg_id: {user_msg_id}")
|
| 1177 |
model_msg_id = str(uuid7())
|
| 1178 |
+
debug_print(f"π€ Generated followup model_msg_id: {model_msg_id}")
|
| 1179 |
|
| 1180 |
# Build full conversation history using stored messages with their original IDs
|
| 1181 |
conversation_messages = []
|
|
|
|
| 1237 |
"modality": "chat"
|
| 1238 |
}
|
| 1239 |
url = f"https://lmarena.ai/nextjs-api/stream/post-to-evaluation/{session['conversation_id']}"
|
| 1240 |
+
debug_print(f"π€ Target URL: {url}")
|
| 1241 |
+
debug_print(f"π¦ Payload structure: {len(payload['messages'])} messages")
|
| 1242 |
|
| 1243 |
+
debug_print(f"\nπ Making API request to LMArena...")
|
| 1244 |
+
debug_print(f"β±οΈ Timeout set to: 120 seconds")
|
| 1245 |
|
| 1246 |
# Handle streaming mode
|
| 1247 |
if stream:
|
|
|
|
| 1251 |
|
| 1252 |
async with httpx.AsyncClient() as client:
|
| 1253 |
try:
|
| 1254 |
+
debug_print("π‘ Sending POST request for streaming...")
|
| 1255 |
async with client.stream('POST', url, json=payload, headers=headers, timeout=120) as response:
|
| 1256 |
+
debug_print(f"β
Stream opened - Status: {response.status_code}")
|
| 1257 |
response.raise_for_status()
|
| 1258 |
|
| 1259 |
async for line in response.aiter_lines():
|
|
|
|
| 1330 |
chat_sessions[api_key_str][conversation_id]["last_message_id"] = model_msg_id
|
| 1331 |
|
| 1332 |
yield "data: [DONE]\n\n"
|
| 1333 |
+
debug_print(f"β
Stream completed - {len(response_text)} chars sent")
|
| 1334 |
|
| 1335 |
except httpx.HTTPStatusError as e:
|
| 1336 |
error_msg = f"LMArena API error: {e.response.status_code}"
|
|
|
|
| 1358 |
# Handle non-streaming mode (original code)
|
| 1359 |
async with httpx.AsyncClient() as client:
|
| 1360 |
try:
|
| 1361 |
+
debug_print("π‘ Sending POST request...")
|
| 1362 |
response = await client.post(url, json=payload, headers=headers, timeout=120)
|
| 1363 |
|
| 1364 |
+
debug_print(f"β
Response received - Status: {response.status_code}")
|
| 1365 |
+
debug_print(f"π Response length: {len(response.text)} characters")
|
| 1366 |
+
debug_print(f"π Response headers: {dict(response.headers)}")
|
| 1367 |
|
| 1368 |
response.raise_for_status()
|
| 1369 |
|
| 1370 |
+
debug_print(f"π Processing response...")
|
| 1371 |
+
debug_print(f"π First 500 chars of response:\n{response.text[:500]}")
|
| 1372 |
|
| 1373 |
# Process response in lmarena format
|
| 1374 |
# Format: a0:"text chunk" for content, ad:{...} for metadata
|
|
|
|
| 1378 |
text_chunks_found = 0
|
| 1379 |
metadata_found = 0
|
| 1380 |
|
| 1381 |
+
debug_print(f"π Parsing response lines...")
|
| 1382 |
|
| 1383 |
error_message = None
|
| 1384 |
for line in response.text.splitlines():
|
|
|
|
| 1396 |
text_chunk = json.loads(chunk_data)
|
| 1397 |
response_text += text_chunk
|
| 1398 |
if text_chunks_found <= 3: # Log first 3 chunks
|
| 1399 |
+
debug_print(f" β
Chunk {text_chunks_found}: {repr(text_chunk[:50])}")
|
| 1400 |
except json.JSONDecodeError as e:
|
| 1401 |
+
debug_print(f" β οΈ Failed to parse text chunk on line {line_count}: {chunk_data[:100]} - {e}")
|
| 1402 |
continue
|
| 1403 |
|
| 1404 |
# Parse error messages: a3:"An error occurred"
|
|
|
|
| 1406 |
error_data = line[3:] # Remove "a3:" prefix
|
| 1407 |
try:
|
| 1408 |
error_message = json.loads(error_data)
|
| 1409 |
+
debug_print(f" β Error message received: {error_message}")
|
| 1410 |
except json.JSONDecodeError as e:
|
| 1411 |
+
debug_print(f" β οΈ Failed to parse error message on line {line_count}: {error_data[:100]} - {e}")
|
| 1412 |
error_message = error_data
|
| 1413 |
|
| 1414 |
# Parse metadata: ad:{"finishReason":"stop"}
|
|
|
|
| 1418 |
try:
|
| 1419 |
metadata = json.loads(metadata_data)
|
| 1420 |
finish_reason = metadata.get("finishReason")
|
| 1421 |
+
debug_print(f" π Metadata found: finishReason={finish_reason}")
|
| 1422 |
except json.JSONDecodeError as e:
|
| 1423 |
+
debug_print(f" β οΈ Failed to parse metadata on line {line_count}: {metadata_data[:100]} - {e}")
|
| 1424 |
continue
|
| 1425 |
elif line.strip(): # Non-empty line that doesn't match expected format
|
| 1426 |
if line_count <= 5: # Log first 5 unexpected lines
|
| 1427 |
+
debug_print(f" β Unexpected line format {line_count}: {line[:100]}")
|
| 1428 |
+
|
| 1429 |
+
debug_print(f"\nπ Parsing Summary:")
|
| 1430 |
+
debug_print(f" - Total lines: {line_count}")
|
| 1431 |
+
debug_print(f" - Text chunks found: {text_chunks_found}")
|
| 1432 |
+
debug_print(f" - Metadata entries: {metadata_found}")
|
| 1433 |
+
debug_print(f" - Final response length: {len(response_text)} chars")
|
| 1434 |
+
debug_print(f" - Finish reason: {finish_reason}")
|
| 1435 |
|
| 1436 |
if not response_text:
|
| 1437 |
+
debug_print(f"\nβ οΈ WARNING: Empty response text!")
|
| 1438 |
+
debug_print(f"π Full raw response:\n{response.text}")
|
| 1439 |
if error_message:
|
| 1440 |
+
error_detail = f"LMArena API error: {error_message}"
|
| 1441 |
+
print(f"β {error_detail}")
|
| 1442 |
+
# Return OpenAI-compatible error response
|
| 1443 |
+
return {
|
| 1444 |
+
"error": {
|
| 1445 |
+
"message": error_detail,
|
| 1446 |
+
"type": "upstream_error",
|
| 1447 |
+
"code": "lmarena_error"
|
| 1448 |
+
}
|
| 1449 |
+
}
|
| 1450 |
else:
|
| 1451 |
error_detail = "LMArena API returned empty response. This could be due to: invalid auth token, expired cf_clearance, model unavailable, or API rate limiting."
|
| 1452 |
+
debug_print(f"β {error_detail}")
|
| 1453 |
+
# Return OpenAI-compatible error response
|
| 1454 |
+
return {
|
| 1455 |
+
"error": {
|
| 1456 |
+
"message": error_detail,
|
| 1457 |
+
"type": "upstream_error",
|
| 1458 |
+
"code": "empty_response"
|
| 1459 |
+
}
|
| 1460 |
+
}
|
| 1461 |
else:
|
| 1462 |
+
debug_print(f"β
Response text preview: {response_text[:200]}...")
|
| 1463 |
|
| 1464 |
# Update session - Store message history with IDs
|
| 1465 |
if not session:
|
|
|
|
| 1471 |
{"id": model_msg_id, "role": "assistant", "content": response_text.strip()}
|
| 1472 |
]
|
| 1473 |
}
|
| 1474 |
+
debug_print(f"πΎ Saved new session for conversation {conversation_id}")
|
| 1475 |
else:
|
| 1476 |
# Append new messages to history
|
| 1477 |
chat_sessions[api_key_str][conversation_id]["messages"].append(
|
|
|
|
| 1480 |
chat_sessions[api_key_str][conversation_id]["messages"].append(
|
| 1481 |
{"id": model_msg_id, "role": "assistant", "content": response_text.strip()}
|
| 1482 |
)
|
| 1483 |
+
debug_print(f"πΎ Updated existing session for conversation {conversation_id}")
|
| 1484 |
|
| 1485 |
final_response = {
|
| 1486 |
"id": f"chatcmpl-{uuid.uuid4()}",
|
|
|
|
| 1503 |
}
|
| 1504 |
}
|
| 1505 |
|
| 1506 |
+
debug_print(f"\nβ
REQUEST COMPLETED SUCCESSFULLY")
|
| 1507 |
+
debug_print("="*80 + "\n")
|
| 1508 |
|
| 1509 |
return final_response
|
| 1510 |
|
|
|
|
| 1518 |
print(f"\nβ HTTP STATUS ERROR")
|
| 1519 |
print(f"π Error detail: {error_detail}")
|
| 1520 |
print(f"π€ Request URL: {url}")
|
| 1521 |
+
debug_print(f"π€ Request payload (truncated): {json.dumps(payload, indent=2)[:500]}")
|
| 1522 |
+
debug_print(f"π₯ Response text: {e.response.text[:500]}")
|
| 1523 |
print("="*80 + "\n")
|
| 1524 |
|
| 1525 |
+
# Return OpenAI-compatible error response
|
| 1526 |
+
error_type = "rate_limit_error" if e.response.status_code == 429 else "upstream_error"
|
| 1527 |
+
return {
|
| 1528 |
+
"error": {
|
| 1529 |
+
"message": error_detail,
|
| 1530 |
+
"type": error_type,
|
| 1531 |
+
"code": f"http_{e.response.status_code}"
|
| 1532 |
+
}
|
| 1533 |
+
}
|
|
|
|
|
|
|
| 1534 |
|
| 1535 |
except httpx.TimeoutException as e:
|
| 1536 |
print(f"\nβ±οΈ TIMEOUT ERROR")
|
| 1537 |
print(f"π Request timed out after 120 seconds")
|
| 1538 |
print(f"π€ Request URL: {url}")
|
| 1539 |
print("="*80 + "\n")
|
| 1540 |
+
# Return OpenAI-compatible error response
|
| 1541 |
+
return {
|
| 1542 |
+
"error": {
|
| 1543 |
+
"message": "Request to LMArena API timed out after 120 seconds",
|
| 1544 |
+
"type": "timeout_error",
|
| 1545 |
+
"code": "request_timeout"
|
| 1546 |
+
}
|
| 1547 |
+
}
|
| 1548 |
|
| 1549 |
except Exception as e:
|
| 1550 |
print(f"\nβ UNEXPECTED ERROR IN HTTP CLIENT")
|
|
|
|
| 1552 |
print(f"π Error message: {str(e)}")
|
| 1553 |
print(f"π€ Request URL: {url}")
|
| 1554 |
print("="*80 + "\n")
|
| 1555 |
+
# Return OpenAI-compatible error response
|
| 1556 |
+
return {
|
| 1557 |
+
"error": {
|
| 1558 |
+
"message": f"Unexpected error: {str(e)}",
|
| 1559 |
+
"type": "internal_error",
|
| 1560 |
+
"code": type(e).__name__.lower()
|
| 1561 |
+
}
|
| 1562 |
+
}
|
| 1563 |
|
| 1564 |
except HTTPException:
|
| 1565 |
raise
|