waddie commited on
Commit
7b69e96
Β·
1 Parent(s): ddf7346
Files changed (1) hide show
  1. src/main.py +141 -83
src/main.py CHANGED
@@ -16,6 +16,18 @@ from fastapi.security import APIKeyHeader
16
 
17
  import httpx
18
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  # Custom UUIDv7 implementation (using correct Unix epoch)
20
  def uuid7():
21
  """
@@ -206,6 +218,23 @@ async def get_initial_data():
206
  except Exception as e:
207
  print(f"❌ An error occurred during initial data retrieval: {e}")
208
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
  @app.on_event("startup")
210
  async def startup_event():
211
  # Ensure config and models files exist
@@ -213,7 +242,10 @@ async def startup_event():
213
  save_models(get_models())
214
  # Load usage stats from config
215
  load_usage_stats()
 
216
  asyncio.create_task(get_initial_data())
 
 
217
 
218
  # --- UI Endpoints (Login/Dashboard) ---
219
 
@@ -1004,30 +1036,30 @@ async def list_models(api_key: dict = Depends(rate_limit_api_key)):
1004
 
1005
  @app.post("/api/v1/chat/completions")
1006
  async def api_chat_completions(request: Request, api_key: dict = Depends(rate_limit_api_key)):
1007
- print("\n" + "="*80)
1008
- print("πŸ”΅ NEW API REQUEST RECEIVED")
1009
- print("="*80)
1010
 
1011
  try:
1012
  body = await request.json()
1013
- print(f"πŸ“₯ Request body keys: {list(body.keys())}")
1014
 
1015
  model_public_name = body.get("model")
1016
  messages = body.get("messages", [])
1017
  stream = body.get("stream", False)
1018
 
1019
- print(f"🌊 Stream mode: {stream}")
1020
 
1021
- print(f"πŸ€– Requested model: {model_public_name}")
1022
- print(f"πŸ’¬ Number of messages: {len(messages)}")
1023
 
1024
  if not model_public_name or not messages:
1025
- print("❌ Missing model or messages in request")
1026
  raise HTTPException(status_code=400, detail="Missing 'model' or 'messages' in request body.")
1027
 
1028
  # Find model ID from public name
1029
  models = get_models()
1030
- print(f"πŸ“š Total models loaded: {len(models)}")
1031
 
1032
  model_id = None
1033
  for m in models:
@@ -1036,13 +1068,13 @@ async def api_chat_completions(request: Request, api_key: dict = Depends(rate_li
1036
  break
1037
 
1038
  if not model_id:
1039
- print(f"❌ Model '{model_public_name}' not found in model list")
1040
  raise HTTPException(
1041
  status_code=404,
1042
  detail=f"Model '{model_public_name}' not found. Use /api/v1/models to see available models."
1043
  )
1044
 
1045
- print(f"βœ… Found model ID: {model_id}")
1046
 
1047
  # Log usage
1048
  model_usage_stats[model_public_name] += 1
@@ -1056,48 +1088,48 @@ async def api_chat_completions(request: Request, api_key: dict = Depends(rate_li
1056
 
1057
  # Validate prompt is a string and not too large
1058
  if not isinstance(prompt, str):
1059
- print("❌ Prompt content must be a string")
1060
  raise HTTPException(status_code=400, detail="Message content must be a string.")
1061
 
1062
  if not prompt:
1063
- print("❌ Last message has no content")
1064
  raise HTTPException(status_code=400, detail="Last message must have content.")
1065
 
1066
  # Log prompt length for debugging character limit issues
1067
- print(f"πŸ“ User prompt length: {len(prompt)} characters")
1068
- print(f"πŸ“ User prompt preview: {prompt[:100]}..." if len(prompt) > 100 else f"πŸ“ User prompt: {prompt}")
1069
 
1070
  # Check for reasonable character limit (LMArena appears to have limits)
1071
  # Typical limit seems to be around 32K-64K characters based on testing
1072
  MAX_PROMPT_LENGTH = 50000 # Conservative estimate
1073
  if len(prompt) > MAX_PROMPT_LENGTH:
1074
  error_msg = f"Prompt too long ({len(prompt)} characters). LMArena has a character limit of approximately {MAX_PROMPT_LENGTH} characters. Please reduce the message size."
1075
- print(f"❌ {error_msg}")
1076
  raise HTTPException(status_code=400, detail=error_msg)
1077
 
1078
  # Use API key + conversation tracking
1079
  api_key_str = api_key["key"]
1080
  conversation_id = body.get("conversation_id", f"conv-{uuid.uuid4()}")
1081
 
1082
- print(f"πŸ”‘ API Key: {api_key_str[:20]}...")
1083
- print(f"πŸ’­ Conversation ID: {conversation_id}")
1084
 
1085
  headers = get_request_headers()
1086
- print(f"πŸ“‹ Headers prepared (auth token length: {len(headers.get('Cookie', '').split('arena-auth-prod-v1=')[-1].split(';')[0])} chars)")
1087
 
1088
  # Check if conversation exists for this API key
1089
  session = chat_sessions[api_key_str].get(conversation_id)
1090
 
1091
  if not session:
1092
- print("πŸ†• Creating NEW conversation session")
1093
  # New conversation - Generate all IDs at once (like the browser does)
1094
  session_id = str(uuid7())
1095
  user_msg_id = str(uuid7())
1096
  model_msg_id = str(uuid7())
1097
 
1098
- print(f"πŸ”‘ Generated session_id: {session_id}")
1099
- print(f"πŸ‘€ Generated user_msg_id: {user_msg_id}")
1100
- print(f"πŸ€– Generated model_msg_id: {model_msg_id}")
1101
 
1102
  payload = {
1103
  "id": session_id,
@@ -1135,15 +1167,15 @@ async def api_chat_completions(request: Request, api_key: dict = Depends(rate_li
1135
  "modality": "chat"
1136
  }
1137
  url = "https://lmarena.ai/nextjs-api/stream/create-evaluation"
1138
- print(f"πŸ“€ Target URL: {url}")
1139
- print(f"πŸ“¦ Payload structure: {len(payload['messages'])} messages")
1140
  else:
1141
- print("πŸ”„ Using EXISTING conversation session")
1142
  # Follow-up message - Generate new message IDs
1143
  user_msg_id = str(uuid7())
1144
- print(f"πŸ‘€ Generated followup user_msg_id: {user_msg_id}")
1145
  model_msg_id = str(uuid7())
1146
- print(f"πŸ€– Generated followup model_msg_id: {model_msg_id}")
1147
 
1148
  # Build full conversation history using stored messages with their original IDs
1149
  conversation_messages = []
@@ -1205,11 +1237,11 @@ async def api_chat_completions(request: Request, api_key: dict = Depends(rate_li
1205
  "modality": "chat"
1206
  }
1207
  url = f"https://lmarena.ai/nextjs-api/stream/post-to-evaluation/{session['conversation_id']}"
1208
- print(f"πŸ“€ Target URL: {url}")
1209
- print(f"πŸ“¦ Payload structure: {len(payload['messages'])} messages")
1210
 
1211
- print(f"\nπŸš€ Making API request to LMArena...")
1212
- print(f"⏱️ Timeout set to: 120 seconds")
1213
 
1214
  # Handle streaming mode
1215
  if stream:
@@ -1219,9 +1251,9 @@ async def api_chat_completions(request: Request, api_key: dict = Depends(rate_li
1219
 
1220
  async with httpx.AsyncClient() as client:
1221
  try:
1222
- print("πŸ“‘ Sending POST request for streaming...")
1223
  async with client.stream('POST', url, json=payload, headers=headers, timeout=120) as response:
1224
- print(f"βœ… Stream opened - Status: {response.status_code}")
1225
  response.raise_for_status()
1226
 
1227
  async for line in response.aiter_lines():
@@ -1298,7 +1330,7 @@ async def api_chat_completions(request: Request, api_key: dict = Depends(rate_li
1298
  chat_sessions[api_key_str][conversation_id]["last_message_id"] = model_msg_id
1299
 
1300
  yield "data: [DONE]\n\n"
1301
- print(f"βœ… Stream completed - {len(response_text)} chars sent")
1302
 
1303
  except httpx.HTTPStatusError as e:
1304
  error_msg = f"LMArena API error: {e.response.status_code}"
@@ -1326,17 +1358,17 @@ async def api_chat_completions(request: Request, api_key: dict = Depends(rate_li
1326
  # Handle non-streaming mode (original code)
1327
  async with httpx.AsyncClient() as client:
1328
  try:
1329
- print("πŸ“‘ Sending POST request...")
1330
  response = await client.post(url, json=payload, headers=headers, timeout=120)
1331
 
1332
- print(f"βœ… Response received - Status: {response.status_code}")
1333
- print(f"πŸ“ Response length: {len(response.text)} characters")
1334
- print(f"πŸ“‹ Response headers: {dict(response.headers)}")
1335
 
1336
  response.raise_for_status()
1337
 
1338
- print(f"πŸ” Processing response...")
1339
- print(f"πŸ“„ First 500 chars of response:\n{response.text[:500]}")
1340
 
1341
  # Process response in lmarena format
1342
  # Format: a0:"text chunk" for content, ad:{...} for metadata
@@ -1346,7 +1378,7 @@ async def api_chat_completions(request: Request, api_key: dict = Depends(rate_li
1346
  text_chunks_found = 0
1347
  metadata_found = 0
1348
 
1349
- print(f"πŸ“Š Parsing response lines...")
1350
 
1351
  error_message = None
1352
  for line in response.text.splitlines():
@@ -1364,9 +1396,9 @@ async def api_chat_completions(request: Request, api_key: dict = Depends(rate_li
1364
  text_chunk = json.loads(chunk_data)
1365
  response_text += text_chunk
1366
  if text_chunks_found <= 3: # Log first 3 chunks
1367
- print(f" βœ… Chunk {text_chunks_found}: {repr(text_chunk[:50])}")
1368
  except json.JSONDecodeError as e:
1369
- print(f" ⚠️ Failed to parse text chunk on line {line_count}: {chunk_data[:100]} - {e}")
1370
  continue
1371
 
1372
  # Parse error messages: a3:"An error occurred"
@@ -1374,9 +1406,9 @@ async def api_chat_completions(request: Request, api_key: dict = Depends(rate_li
1374
  error_data = line[3:] # Remove "a3:" prefix
1375
  try:
1376
  error_message = json.loads(error_data)
1377
- print(f" ❌ Error message received: {error_message}")
1378
  except json.JSONDecodeError as e:
1379
- print(f" ⚠️ Failed to parse error message on line {line_count}: {error_data[:100]} - {e}")
1380
  error_message = error_data
1381
 
1382
  # Parse metadata: ad:{"finishReason":"stop"}
@@ -1386,34 +1418,48 @@ async def api_chat_completions(request: Request, api_key: dict = Depends(rate_li
1386
  try:
1387
  metadata = json.loads(metadata_data)
1388
  finish_reason = metadata.get("finishReason")
1389
- print(f" πŸ“‹ Metadata found: finishReason={finish_reason}")
1390
  except json.JSONDecodeError as e:
1391
- print(f" ⚠️ Failed to parse metadata on line {line_count}: {metadata_data[:100]} - {e}")
1392
  continue
1393
  elif line.strip(): # Non-empty line that doesn't match expected format
1394
  if line_count <= 5: # Log first 5 unexpected lines
1395
- print(f" ❓ Unexpected line format {line_count}: {line[:100]}")
1396
-
1397
- print(f"\nπŸ“Š Parsing Summary:")
1398
- print(f" - Total lines: {line_count}")
1399
- print(f" - Text chunks found: {text_chunks_found}")
1400
- print(f" - Metadata entries: {metadata_found}")
1401
- print(f" - Final response length: {len(response_text)} chars")
1402
- print(f" - Finish reason: {finish_reason}")
1403
 
1404
  if not response_text:
1405
- print(f"\n⚠️ WARNING: Empty response text!")
1406
- print(f"πŸ“„ Full raw response:\n{response.text}")
1407
  if error_message:
1408
- error_detail = f"LMArena API returned an error: {error_message}"
1409
- print(f"❌ Raising HTTPException with error: {error_detail}")
1410
- raise HTTPException(status_code=502, detail=error_detail)
 
 
 
 
 
 
 
1411
  else:
1412
  error_detail = "LMArena API returned empty response. This could be due to: invalid auth token, expired cf_clearance, model unavailable, or API rate limiting."
1413
- print(f"❌ Raising HTTPException: {error_detail}")
1414
- raise HTTPException(status_code=502, detail=error_detail)
 
 
 
 
 
 
 
1415
  else:
1416
- print(f"βœ… Response text preview: {response_text[:200]}...")
1417
 
1418
  # Update session - Store message history with IDs
1419
  if not session:
@@ -1425,7 +1471,7 @@ async def api_chat_completions(request: Request, api_key: dict = Depends(rate_li
1425
  {"id": model_msg_id, "role": "assistant", "content": response_text.strip()}
1426
  ]
1427
  }
1428
- print(f"πŸ’Ύ Saved new session for conversation {conversation_id}")
1429
  else:
1430
  # Append new messages to history
1431
  chat_sessions[api_key_str][conversation_id]["messages"].append(
@@ -1434,7 +1480,7 @@ async def api_chat_completions(request: Request, api_key: dict = Depends(rate_li
1434
  chat_sessions[api_key_str][conversation_id]["messages"].append(
1435
  {"id": model_msg_id, "role": "assistant", "content": response_text.strip()}
1436
  )
1437
- print(f"πŸ’Ύ Updated existing session for conversation {conversation_id}")
1438
 
1439
  final_response = {
1440
  "id": f"chatcmpl-{uuid.uuid4()}",
@@ -1457,8 +1503,8 @@ async def api_chat_completions(request: Request, api_key: dict = Depends(rate_li
1457
  }
1458
  }
1459
 
1460
- print(f"\nβœ… REQUEST COMPLETED SUCCESSFULLY")
1461
- print("="*80 + "\n")
1462
 
1463
  return final_response
1464
 
@@ -1472,28 +1518,33 @@ async def api_chat_completions(request: Request, api_key: dict = Depends(rate_li
1472
  print(f"\n❌ HTTP STATUS ERROR")
1473
  print(f"πŸ“› Error detail: {error_detail}")
1474
  print(f"πŸ“€ Request URL: {url}")
1475
- print(f"πŸ“€ Request payload (truncated): {json.dumps(payload, indent=2)[:500]}")
1476
- print(f"πŸ“₯ Response text: {e.response.text[:500]}")
1477
  print("="*80 + "\n")
1478
 
1479
- # Handle 429 from LMArena - propagate Retry-After if available
1480
- if e.response.status_code == 429:
1481
- retry_after = e.response.headers.get("Retry-After", "60") # Default 60s
1482
- print(f"⏱️ LMArena rate limit - Retry-After: {retry_after}s")
1483
- raise HTTPException(
1484
- status_code=429,
1485
- detail=f"LMArena rate limit exceeded: {error_detail}",
1486
- headers={"Retry-After": retry_after}
1487
- )
1488
-
1489
- raise HTTPException(status_code=502, detail=error_detail)
1490
 
1491
  except httpx.TimeoutException as e:
1492
  print(f"\n⏱️ TIMEOUT ERROR")
1493
  print(f"πŸ“› Request timed out after 120 seconds")
1494
  print(f"πŸ“€ Request URL: {url}")
1495
  print("="*80 + "\n")
1496
- raise HTTPException(status_code=504, detail="Request to LMArena API timed out")
 
 
 
 
 
 
 
1497
 
1498
  except Exception as e:
1499
  print(f"\n❌ UNEXPECTED ERROR IN HTTP CLIENT")
@@ -1501,7 +1552,14 @@ async def api_chat_completions(request: Request, api_key: dict = Depends(rate_li
1501
  print(f"πŸ“› Error message: {str(e)}")
1502
  print(f"πŸ“€ Request URL: {url}")
1503
  print("="*80 + "\n")
1504
- raise
 
 
 
 
 
 
 
1505
 
1506
  except HTTPException:
1507
  raise
 
16
 
17
  import httpx
18
 
19
+ # ============================================================
20
+ # DEBUG CONFIGURATION
21
+ # ============================================================
22
+ # Set to True for detailed logging, False for minimal logging
23
+ DEBUG = False
24
+ # ============================================================
25
+
26
+ def debug_print(*args, **kwargs):
27
+ """Print debug messages only if DEBUG is True"""
28
+ if DEBUG:
29
+ print(*args, **kwargs)
30
+
31
  # Custom UUIDv7 implementation (using correct Unix epoch)
32
  def uuid7():
33
  """
 
218
  except Exception as e:
219
  print(f"❌ An error occurred during initial data retrieval: {e}")
220
 
221
+ async def periodic_refresh_task():
222
+ """Background task to refresh cf_clearance and models every 30 minutes"""
223
+ while True:
224
+ try:
225
+ # Wait 30 minutes (1800 seconds)
226
+ await asyncio.sleep(1800)
227
+ print("\n" + "="*60)
228
+ print("πŸ”„ Starting scheduled 30-minute refresh...")
229
+ print("="*60)
230
+ await get_initial_data()
231
+ print("βœ… Scheduled refresh completed")
232
+ print("="*60 + "\n")
233
+ except Exception as e:
234
+ print(f"❌ Error in periodic refresh task: {e}")
235
+ # Continue the loop even if there's an error
236
+ continue
237
+
238
  @app.on_event("startup")
239
  async def startup_event():
240
  # Ensure config and models files exist
 
242
  save_models(get_models())
243
  # Load usage stats from config
244
  load_usage_stats()
245
+ # Start initial data fetch
246
  asyncio.create_task(get_initial_data())
247
+ # Start periodic refresh task (every 30 minutes)
248
+ asyncio.create_task(periodic_refresh_task())
249
 
250
  # --- UI Endpoints (Login/Dashboard) ---
251
 
 
1036
 
1037
  @app.post("/api/v1/chat/completions")
1038
  async def api_chat_completions(request: Request, api_key: dict = Depends(rate_limit_api_key)):
1039
+ debug_print("\n" + "="*80)
1040
+ debug_print("πŸ”΅ NEW API REQUEST RECEIVED")
1041
+ debug_print("="*80)
1042
 
1043
  try:
1044
  body = await request.json()
1045
+ debug_print(f"πŸ“₯ Request body keys: {list(body.keys())}")
1046
 
1047
  model_public_name = body.get("model")
1048
  messages = body.get("messages", [])
1049
  stream = body.get("stream", False)
1050
 
1051
+ debug_print(f"🌊 Stream mode: {stream}")
1052
 
1053
+ debug_print(f"πŸ€– Requested model: {model_public_name}")
1054
+ debug_print(f"πŸ’¬ Number of messages: {len(messages)}")
1055
 
1056
  if not model_public_name or not messages:
1057
+ debug_print("❌ Missing model or messages in request")
1058
  raise HTTPException(status_code=400, detail="Missing 'model' or 'messages' in request body.")
1059
 
1060
  # Find model ID from public name
1061
  models = get_models()
1062
+ debug_print(f"πŸ“š Total models loaded: {len(models)}")
1063
 
1064
  model_id = None
1065
  for m in models:
 
1068
  break
1069
 
1070
  if not model_id:
1071
+ debug_print(f"❌ Model '{model_public_name}' not found in model list")
1072
  raise HTTPException(
1073
  status_code=404,
1074
  detail=f"Model '{model_public_name}' not found. Use /api/v1/models to see available models."
1075
  )
1076
 
1077
+ debug_print(f"βœ… Found model ID: {model_id}")
1078
 
1079
  # Log usage
1080
  model_usage_stats[model_public_name] += 1
 
1088
 
1089
  # Validate prompt is a string and not too large
1090
  if not isinstance(prompt, str):
1091
+ debug_print("❌ Prompt content must be a string")
1092
  raise HTTPException(status_code=400, detail="Message content must be a string.")
1093
 
1094
  if not prompt:
1095
+ debug_print("❌ Last message has no content")
1096
  raise HTTPException(status_code=400, detail="Last message must have content.")
1097
 
1098
  # Log prompt length for debugging character limit issues
1099
+ debug_print(f"πŸ“ User prompt length: {len(prompt)} characters")
1100
+ debug_print(f"πŸ“ User prompt preview: {prompt[:100]}..." if len(prompt) > 100 else f"πŸ“ User prompt: {prompt}")
1101
 
1102
  # Check for reasonable character limit (LMArena appears to have limits)
1103
  # Typical limit seems to be around 32K-64K characters based on testing
1104
  MAX_PROMPT_LENGTH = 50000 # Conservative estimate
1105
  if len(prompt) > MAX_PROMPT_LENGTH:
1106
  error_msg = f"Prompt too long ({len(prompt)} characters). LMArena has a character limit of approximately {MAX_PROMPT_LENGTH} characters. Please reduce the message size."
1107
+ debug_print(f"❌ {error_msg}")
1108
  raise HTTPException(status_code=400, detail=error_msg)
1109
 
1110
  # Use API key + conversation tracking
1111
  api_key_str = api_key["key"]
1112
  conversation_id = body.get("conversation_id", f"conv-{uuid.uuid4()}")
1113
 
1114
+ debug_print(f"πŸ”‘ API Key: {api_key_str[:20]}...")
1115
+ debug_print(f"πŸ’­ Conversation ID: {conversation_id}")
1116
 
1117
  headers = get_request_headers()
1118
+ debug_print(f"πŸ“‹ Headers prepared (auth token length: {len(headers.get('Cookie', '').split('arena-auth-prod-v1=')[-1].split(';')[0])} chars)")
1119
 
1120
  # Check if conversation exists for this API key
1121
  session = chat_sessions[api_key_str].get(conversation_id)
1122
 
1123
  if not session:
1124
+ debug_print("πŸ†• Creating NEW conversation session")
1125
  # New conversation - Generate all IDs at once (like the browser does)
1126
  session_id = str(uuid7())
1127
  user_msg_id = str(uuid7())
1128
  model_msg_id = str(uuid7())
1129
 
1130
+ debug_print(f"πŸ”‘ Generated session_id: {session_id}")
1131
+ debug_print(f"πŸ‘€ Generated user_msg_id: {user_msg_id}")
1132
+ debug_print(f"πŸ€– Generated model_msg_id: {model_msg_id}")
1133
 
1134
  payload = {
1135
  "id": session_id,
 
1167
  "modality": "chat"
1168
  }
1169
  url = "https://lmarena.ai/nextjs-api/stream/create-evaluation"
1170
+ debug_print(f"πŸ“€ Target URL: {url}")
1171
+ debug_print(f"πŸ“¦ Payload structure: {len(payload['messages'])} messages")
1172
  else:
1173
+ debug_print("πŸ”„ Using EXISTING conversation session")
1174
  # Follow-up message - Generate new message IDs
1175
  user_msg_id = str(uuid7())
1176
+ debug_print(f"πŸ‘€ Generated followup user_msg_id: {user_msg_id}")
1177
  model_msg_id = str(uuid7())
1178
+ debug_print(f"πŸ€– Generated followup model_msg_id: {model_msg_id}")
1179
 
1180
  # Build full conversation history using stored messages with their original IDs
1181
  conversation_messages = []
 
1237
  "modality": "chat"
1238
  }
1239
  url = f"https://lmarena.ai/nextjs-api/stream/post-to-evaluation/{session['conversation_id']}"
1240
+ debug_print(f"πŸ“€ Target URL: {url}")
1241
+ debug_print(f"πŸ“¦ Payload structure: {len(payload['messages'])} messages")
1242
 
1243
+ debug_print(f"\nπŸš€ Making API request to LMArena...")
1244
+ debug_print(f"⏱️ Timeout set to: 120 seconds")
1245
 
1246
  # Handle streaming mode
1247
  if stream:
 
1251
 
1252
  async with httpx.AsyncClient() as client:
1253
  try:
1254
+ debug_print("πŸ“‘ Sending POST request for streaming...")
1255
  async with client.stream('POST', url, json=payload, headers=headers, timeout=120) as response:
1256
+ debug_print(f"βœ… Stream opened - Status: {response.status_code}")
1257
  response.raise_for_status()
1258
 
1259
  async for line in response.aiter_lines():
 
1330
  chat_sessions[api_key_str][conversation_id]["last_message_id"] = model_msg_id
1331
 
1332
  yield "data: [DONE]\n\n"
1333
+ debug_print(f"βœ… Stream completed - {len(response_text)} chars sent")
1334
 
1335
  except httpx.HTTPStatusError as e:
1336
  error_msg = f"LMArena API error: {e.response.status_code}"
 
1358
  # Handle non-streaming mode (original code)
1359
  async with httpx.AsyncClient() as client:
1360
  try:
1361
+ debug_print("πŸ“‘ Sending POST request...")
1362
  response = await client.post(url, json=payload, headers=headers, timeout=120)
1363
 
1364
+ debug_print(f"βœ… Response received - Status: {response.status_code}")
1365
+ debug_print(f"πŸ“ Response length: {len(response.text)} characters")
1366
+ debug_print(f"πŸ“‹ Response headers: {dict(response.headers)}")
1367
 
1368
  response.raise_for_status()
1369
 
1370
+ debug_print(f"πŸ” Processing response...")
1371
+ debug_print(f"πŸ“„ First 500 chars of response:\n{response.text[:500]}")
1372
 
1373
  # Process response in lmarena format
1374
  # Format: a0:"text chunk" for content, ad:{...} for metadata
 
1378
  text_chunks_found = 0
1379
  metadata_found = 0
1380
 
1381
+ debug_print(f"πŸ“Š Parsing response lines...")
1382
 
1383
  error_message = None
1384
  for line in response.text.splitlines():
 
1396
  text_chunk = json.loads(chunk_data)
1397
  response_text += text_chunk
1398
  if text_chunks_found <= 3: # Log first 3 chunks
1399
+ debug_print(f" βœ… Chunk {text_chunks_found}: {repr(text_chunk[:50])}")
1400
  except json.JSONDecodeError as e:
1401
+ debug_print(f" ⚠️ Failed to parse text chunk on line {line_count}: {chunk_data[:100]} - {e}")
1402
  continue
1403
 
1404
  # Parse error messages: a3:"An error occurred"
 
1406
  error_data = line[3:] # Remove "a3:" prefix
1407
  try:
1408
  error_message = json.loads(error_data)
1409
+ debug_print(f" ❌ Error message received: {error_message}")
1410
  except json.JSONDecodeError as e:
1411
+ debug_print(f" ⚠️ Failed to parse error message on line {line_count}: {error_data[:100]} - {e}")
1412
  error_message = error_data
1413
 
1414
  # Parse metadata: ad:{"finishReason":"stop"}
 
1418
  try:
1419
  metadata = json.loads(metadata_data)
1420
  finish_reason = metadata.get("finishReason")
1421
+ debug_print(f" πŸ“‹ Metadata found: finishReason={finish_reason}")
1422
  except json.JSONDecodeError as e:
1423
+ debug_print(f" ⚠️ Failed to parse metadata on line {line_count}: {metadata_data[:100]} - {e}")
1424
  continue
1425
  elif line.strip(): # Non-empty line that doesn't match expected format
1426
  if line_count <= 5: # Log first 5 unexpected lines
1427
+ debug_print(f" ❓ Unexpected line format {line_count}: {line[:100]}")
1428
+
1429
+ debug_print(f"\nπŸ“Š Parsing Summary:")
1430
+ debug_print(f" - Total lines: {line_count}")
1431
+ debug_print(f" - Text chunks found: {text_chunks_found}")
1432
+ debug_print(f" - Metadata entries: {metadata_found}")
1433
+ debug_print(f" - Final response length: {len(response_text)} chars")
1434
+ debug_print(f" - Finish reason: {finish_reason}")
1435
 
1436
  if not response_text:
1437
+ debug_print(f"\n⚠️ WARNING: Empty response text!")
1438
+ debug_print(f"πŸ“„ Full raw response:\n{response.text}")
1439
  if error_message:
1440
+ error_detail = f"LMArena API error: {error_message}"
1441
+ print(f"❌ {error_detail}")
1442
+ # Return OpenAI-compatible error response
1443
+ return {
1444
+ "error": {
1445
+ "message": error_detail,
1446
+ "type": "upstream_error",
1447
+ "code": "lmarena_error"
1448
+ }
1449
+ }
1450
  else:
1451
  error_detail = "LMArena API returned empty response. This could be due to: invalid auth token, expired cf_clearance, model unavailable, or API rate limiting."
1452
+ debug_print(f"❌ {error_detail}")
1453
+ # Return OpenAI-compatible error response
1454
+ return {
1455
+ "error": {
1456
+ "message": error_detail,
1457
+ "type": "upstream_error",
1458
+ "code": "empty_response"
1459
+ }
1460
+ }
1461
  else:
1462
+ debug_print(f"βœ… Response text preview: {response_text[:200]}...")
1463
 
1464
  # Update session - Store message history with IDs
1465
  if not session:
 
1471
  {"id": model_msg_id, "role": "assistant", "content": response_text.strip()}
1472
  ]
1473
  }
1474
+ debug_print(f"πŸ’Ύ Saved new session for conversation {conversation_id}")
1475
  else:
1476
  # Append new messages to history
1477
  chat_sessions[api_key_str][conversation_id]["messages"].append(
 
1480
  chat_sessions[api_key_str][conversation_id]["messages"].append(
1481
  {"id": model_msg_id, "role": "assistant", "content": response_text.strip()}
1482
  )
1483
+ debug_print(f"πŸ’Ύ Updated existing session for conversation {conversation_id}")
1484
 
1485
  final_response = {
1486
  "id": f"chatcmpl-{uuid.uuid4()}",
 
1503
  }
1504
  }
1505
 
1506
+ debug_print(f"\nβœ… REQUEST COMPLETED SUCCESSFULLY")
1507
+ debug_print("="*80 + "\n")
1508
 
1509
  return final_response
1510
 
 
1518
  print(f"\n❌ HTTP STATUS ERROR")
1519
  print(f"πŸ“› Error detail: {error_detail}")
1520
  print(f"πŸ“€ Request URL: {url}")
1521
+ debug_print(f"πŸ“€ Request payload (truncated): {json.dumps(payload, indent=2)[:500]}")
1522
+ debug_print(f"πŸ“₯ Response text: {e.response.text[:500]}")
1523
  print("="*80 + "\n")
1524
 
1525
+ # Return OpenAI-compatible error response
1526
+ error_type = "rate_limit_error" if e.response.status_code == 429 else "upstream_error"
1527
+ return {
1528
+ "error": {
1529
+ "message": error_detail,
1530
+ "type": error_type,
1531
+ "code": f"http_{e.response.status_code}"
1532
+ }
1533
+ }
 
 
1534
 
1535
  except httpx.TimeoutException as e:
1536
  print(f"\n⏱️ TIMEOUT ERROR")
1537
  print(f"πŸ“› Request timed out after 120 seconds")
1538
  print(f"πŸ“€ Request URL: {url}")
1539
  print("="*80 + "\n")
1540
+ # Return OpenAI-compatible error response
1541
+ return {
1542
+ "error": {
1543
+ "message": "Request to LMArena API timed out after 120 seconds",
1544
+ "type": "timeout_error",
1545
+ "code": "request_timeout"
1546
+ }
1547
+ }
1548
 
1549
  except Exception as e:
1550
  print(f"\n❌ UNEXPECTED ERROR IN HTTP CLIENT")
 
1552
  print(f"πŸ“› Error message: {str(e)}")
1553
  print(f"πŸ“€ Request URL: {url}")
1554
  print("="*80 + "\n")
1555
+ # Return OpenAI-compatible error response
1556
+ return {
1557
+ "error": {
1558
+ "message": f"Unexpected error: {str(e)}",
1559
+ "type": "internal_error",
1560
+ "code": type(e).__name__.lower()
1561
+ }
1562
+ }
1563
 
1564
  except HTTPException:
1565
  raise