gabejavitt commited on
Commit
ed23d35
Β·
verified Β·
1 Parent(s): 9329283

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +125 -24
app.py CHANGED
@@ -1421,34 +1421,49 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
1421
  print(agent_code)
1422
 
1423
  # 2. Fetch Questions
 
 
 
1424
  print(f"Fetching questions from: {questions_url}")
 
1425
  try:
1426
  response = requests.get(questions_url, timeout=15)
1427
  response.raise_for_status()
1428
  questions_data = response.json()
1429
  if not questions_data:
1430
- print("Fetched questions list is empty.")
1431
- return "Fetched questions list is empty or invalid format.", None
1432
- print(f"Fetched {len(questions_data)} questions.")
 
1433
  except requests.exceptions.RequestException as e:
1434
- print(f"Error fetching questions: {e}")
1435
  return f"Error fetching questions: {e}", None
1436
  except requests.exceptions.JSONDecodeError as e:
1437
- print(f"Error decoding JSON response from questions endpoint: {e}")
1438
- print(f"Response text: {response.text[:500]}")
1439
- return f"Error decoding server response for questions: {e}", None
1440
  except Exception as e:
1441
- print(f"An unexpected error occurred fetching questions: {e}")
1442
  return f"An unexpected error occurred fetching questions: {e}", None
1443
 
1444
  # 3. Run your Agent
 
 
 
 
 
 
1445
  results_log = []
1446
  answers_payload = []
1447
- print(f"Running agent on {len(questions_data)} questions...")
1448
- for item in questions_data:
1449
-
 
 
 
1450
  task_id = item.get("task_id")
1451
  question_text = item.get("question")
 
1452
 
1453
  # Initialize file variables for the current question
1454
  local_file_path = None
@@ -1461,29 +1476,31 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
1461
  # Extract the original file name to preserve the extension
1462
  original_filename = file_path_from_api.split('/')[-1]
1463
 
1464
- # Set the path where the file will be saved locally
1465
  local_file_path = original_filename
1466
 
1467
  print(f"πŸ“₯ Downloading file for task {task_id}...")
1468
  print(f" URL: {file_download_url}")
 
1469
  print(f" Saving to: {local_file_path}")
1470
 
1471
  try:
1472
  file_response = requests.get(file_download_url, timeout=15)
1473
  file_response.raise_for_status()
1474
 
 
1475
  with open(local_file_path, 'wb') as f:
1476
  f.write(file_response.content)
1477
 
1478
  file_size = os.path.getsize(local_file_path)
1479
  print(f"βœ… Downloaded file: {original_filename} ({file_size} bytes)")
1480
 
1481
- # Add verification
1482
  if not os.path.exists(local_file_path):
1483
  print(f"⚠️ Warning: File saved but cannot be found at {local_file_path}")
1484
  local_file_path = None
1485
  else:
1486
- print(f"βœ“ File accessible at: {os.path.abspath(local_file_path)}") # Debug line
1487
 
1488
  except requests.exceptions.RequestException as e:
1489
  error_message = f"[FILE DOWNLOAD ERROR: Could not fetch file: {e}]"
@@ -1495,34 +1512,93 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
1495
  local_file_path = None
1496
 
1497
  if not task_id or question_text is None:
1498
- print(f"Skipping item with missing task_id or question: {item}")
1499
  continue
1500
 
1501
  try:
1502
  # Pass file_path to agent
1503
  submitted_answer = agent(question_text, local_file_path)
1504
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
1505
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1506
  except Exception as e:
1507
- print(f"Error running agent on task {task_id}: {e}")
1508
- print(traceback.format_exc())
1509
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1510
 
1511
  if not answers_payload:
1512
- print("Agent did not produce any answers to submit.")
1513
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
1514
 
1515
  # 4. Prepare Submission
1516
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
1517
- status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
1518
- print(status_update)
1519
 
1520
  # 5. Submit
1521
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
 
 
 
 
 
 
 
1522
  try:
 
1523
  response = requests.post(submit_url, json=submission_data, timeout=60)
 
 
1524
  response.raise_for_status()
1525
  result_data = response.json()
 
 
 
 
 
 
 
1526
  final_status = (
1527
  f"Submission Successful!\n"
1528
  f"User: {result_data.get('username')}\n"
@@ -1530,9 +1606,14 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
1530
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
1531
  f"Message: {result_data.get('message', 'No message received.')}"
1532
  )
1533
- print("Submission successful.")
 
 
 
 
1534
  results_df = pd.DataFrame(results_log)
1535
  return final_status, results_df
 
1536
  except requests.exceptions.HTTPError as e:
1537
  error_detail = f"Server responded with status {e.response.status_code}."
1538
  try:
@@ -1541,22 +1622,42 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
1541
  except requests.exceptions.JSONDecodeError:
1542
  error_detail += f" Response: {e.response.text[:500]}"
1543
  status_message = f"Submission Failed: {error_detail}"
 
 
 
1544
  print(status_message)
 
1545
  results_df = pd.DataFrame(results_log)
1546
  return status_message, results_df
 
1547
  except requests.exceptions.Timeout:
1548
  status_message = "Submission Failed: The request timed out."
 
 
 
1549
  print(status_message)
 
1550
  results_df = pd.DataFrame(results_log)
1551
  return status_message, results_df
 
1552
  except requests.exceptions.RequestException as e:
1553
  status_message = f"Submission Failed: Network error - {e}"
 
 
 
1554
  print(status_message)
 
1555
  results_df = pd.DataFrame(results_log)
1556
  return status_message, results_df
 
1557
  except Exception as e:
1558
  status_message = f"An unexpected error occurred during submission: {e}"
 
 
 
1559
  print(status_message)
 
 
1560
  results_df = pd.DataFrame(results_log)
1561
  return status_message, results_df
1562
 
 
1421
  print(agent_code)
1422
 
1423
  # 2. Fetch Questions
1424
+ print(f"\n{'='*70}")
1425
+ print(f"πŸ“₯ FETCHING QUESTIONS")
1426
+ print(f"{'='*70}")
1427
  print(f"Fetching questions from: {questions_url}")
1428
+
1429
  try:
1430
  response = requests.get(questions_url, timeout=15)
1431
  response.raise_for_status()
1432
  questions_data = response.json()
1433
  if not questions_data:
1434
+ print("Fetched questions list is empty.")
1435
+ return "Fetched questions list is empty or invalid format.", None
1436
+ print(f"βœ… Fetched {len(questions_data)} questions.")
1437
+ print(f"{'='*70}\n")
1438
  except requests.exceptions.RequestException as e:
1439
+ print(f"❌ Error fetching questions: {e}")
1440
  return f"Error fetching questions: {e}", None
1441
  except requests.exceptions.JSONDecodeError as e:
1442
+ print(f"❌ Error decoding JSON response from questions endpoint: {e}")
1443
+ print(f"Response text: {response.text[:500]}")
1444
+ return f"Error decoding server response for questions: {e}", None
1445
  except Exception as e:
1446
+ print(f"❌ An unexpected error occurred fetching questions: {e}")
1447
  return f"An unexpected error occurred fetching questions: {e}", None
1448
 
1449
  # 3. Run your Agent
1450
+ print(f"\n{'='*70}")
1451
+ print(f"πŸš€ STARTING EVALUATION")
1452
+ print(f"{'='*70}")
1453
+ print(f"Total questions to process: {len(questions_data)}")
1454
+ print(f"{'='*70}\n")
1455
+
1456
  results_log = []
1457
  answers_payload = []
1458
+
1459
+ for idx, item in enumerate(questions_data, 1):
1460
+ print(f"\n{'='*70}")
1461
+ print(f"πŸ“ PROCESSING QUESTION {idx}/{len(questions_data)}")
1462
+ print(f"{'='*70}")
1463
+
1464
  task_id = item.get("task_id")
1465
  question_text = item.get("question")
1466
+ correct_answer = item.get("answer", "N/A") # Get correct answer from API
1467
 
1468
  # Initialize file variables for the current question
1469
  local_file_path = None
 
1476
  # Extract the original file name to preserve the extension
1477
  original_filename = file_path_from_api.split('/')[-1]
1478
 
1479
+ # Save to current directory instead of /tmp
1480
  local_file_path = original_filename
1481
 
1482
  print(f"πŸ“₯ Downloading file for task {task_id}...")
1483
  print(f" URL: {file_download_url}")
1484
+ print(f" Original filename: {original_filename}")
1485
  print(f" Saving to: {local_file_path}")
1486
 
1487
  try:
1488
  file_response = requests.get(file_download_url, timeout=15)
1489
  file_response.raise_for_status()
1490
 
1491
+ # Save the raw bytes content to the local file path
1492
  with open(local_file_path, 'wb') as f:
1493
  f.write(file_response.content)
1494
 
1495
  file_size = os.path.getsize(local_file_path)
1496
  print(f"βœ… Downloaded file: {original_filename} ({file_size} bytes)")
1497
 
1498
+ # Verify file exists and is readable
1499
  if not os.path.exists(local_file_path):
1500
  print(f"⚠️ Warning: File saved but cannot be found at {local_file_path}")
1501
  local_file_path = None
1502
  else:
1503
+ print(f"βœ“ File accessible at: {os.path.abspath(local_file_path)}")
1504
 
1505
  except requests.exceptions.RequestException as e:
1506
  error_message = f"[FILE DOWNLOAD ERROR: Could not fetch file: {e}]"
 
1512
  local_file_path = None
1513
 
1514
  if not task_id or question_text is None:
1515
+ print(f"⚠️ Skipping item with missing task_id or question: {item}")
1516
  continue
1517
 
1518
  try:
1519
  # Pass file_path to agent
1520
  submitted_answer = agent(question_text, local_file_path)
1521
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
1522
+
1523
+ # Check if answer is correct
1524
+ is_correct = submitted_answer.strip().lower() == correct_answer.strip().lower()
1525
+ correctness = "βœ… CORRECT" if is_correct else "❌ WRONG"
1526
+
1527
+ # Log with correctness indicator
1528
+ print(f"\n{correctness} - Task {task_id}")
1529
+ print(f" Submitted: '{submitted_answer}'")
1530
+ print(f" Expected: '{correct_answer}'")
1531
+
1532
+ results_log.append({
1533
+ "Task ID": task_id,
1534
+ "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
1535
+ "Submitted Answer": submitted_answer,
1536
+ "Correct Answer": correct_answer,
1537
+ "Status": "βœ…" if is_correct else "❌"
1538
+ })
1539
+
1540
+ print(f"βœ… Question {idx}/{len(questions_data)} completed")
1541
+
1542
  except Exception as e:
1543
+ print(f"❌ Error running agent on task {task_id}: {e}")
1544
+ print(traceback.format_exc())
1545
+ results_log.append({
1546
+ "Task ID": task_id,
1547
+ "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
1548
+ "Submitted Answer": f"AGENT ERROR: {e}",
1549
+ "Correct Answer": correct_answer,
1550
+ "Status": "❌"
1551
+ })
1552
+ # Continue with other questions even if one fails
1553
+ answers_payload.append({"task_id": task_id, "submitted_answer": f"ERROR: {str(e)[:100]}"})
1554
+
1555
+ # Summary after all questions processed
1556
+ print(f"\n{'='*70}")
1557
+ print(f"βœ… ALL QUESTIONS PROCESSED")
1558
+ print(f"{'='*70}")
1559
+ print(f"Total answers collected: {len(answers_payload)}")
1560
+
1561
+ # Calculate pre-submission accuracy
1562
+ correct_count = sum(1 for log in results_log if log.get("Status") == "βœ…")
1563
+ total_count = len(results_log)
1564
+ accuracy = (correct_count / total_count * 100) if total_count > 0 else 0
1565
+
1566
+ print(f"\n{'='*70}")
1567
+ print(f"πŸ“Š PRE-SUBMISSION SUMMARY")
1568
+ print(f"{'='*70}")
1569
+ print(f"Correct: {correct_count}/{total_count} ({accuracy:.1f}%)")
1570
+ print(f"{'='*70}\n")
1571
 
1572
  if not answers_payload:
1573
+ print("⚠️ Agent did not produce any answers to submit.")
1574
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
1575
 
1576
  # 4. Prepare Submission
1577
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
 
 
1578
 
1579
  # 5. Submit
1580
+ print(f"\n{'='*70}")
1581
+ print(f"πŸ“€ SUBMITTING TO API")
1582
+ print(f"{'='*70}")
1583
+ print(f"URL: {submit_url}")
1584
+ print(f"Username: {username}")
1585
+ print(f"Answers to submit: {len(answers_payload)}")
1586
+ print(f"{'='*70}\n")
1587
+
1588
  try:
1589
+ print("⏳ Sending POST request...")
1590
  response = requests.post(submit_url, json=submission_data, timeout=60)
1591
+ print(f"βœ… Got response: Status {response.status_code}")
1592
+
1593
  response.raise_for_status()
1594
  result_data = response.json()
1595
+
1596
+ print(f"\n{'='*70}")
1597
+ print(f"πŸ“Š SUBMISSION RESULTS")
1598
+ print(f"{'='*70}")
1599
+ print(f"Response data: {result_data}")
1600
+ print(f"{'='*70}\n")
1601
+
1602
  final_status = (
1603
  f"Submission Successful!\n"
1604
  f"User: {result_data.get('username')}\n"
 
1606
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
1607
  f"Message: {result_data.get('message', 'No message received.')}"
1608
  )
1609
+
1610
+ print(final_status)
1611
+ print("="*70)
1612
+ print("βœ… Submission successful.")
1613
+
1614
  results_df = pd.DataFrame(results_log)
1615
  return final_status, results_df
1616
+
1617
  except requests.exceptions.HTTPError as e:
1618
  error_detail = f"Server responded with status {e.response.status_code}."
1619
  try:
 
1622
  except requests.exceptions.JSONDecodeError:
1623
  error_detail += f" Response: {e.response.text[:500]}"
1624
  status_message = f"Submission Failed: {error_detail}"
1625
+ print(f"\n{'='*70}")
1626
+ print(f"❌ SUBMISSION FAILED")
1627
+ print(f"{'='*70}")
1628
  print(status_message)
1629
+ print(f"{'='*70}\n")
1630
  results_df = pd.DataFrame(results_log)
1631
  return status_message, results_df
1632
+
1633
  except requests.exceptions.Timeout:
1634
  status_message = "Submission Failed: The request timed out."
1635
+ print(f"\n{'='*70}")
1636
+ print(f"❌ SUBMISSION FAILED")
1637
+ print(f"{'='*70}")
1638
  print(status_message)
1639
+ print(f"{'='*70}\n")
1640
  results_df = pd.DataFrame(results_log)
1641
  return status_message, results_df
1642
+
1643
  except requests.exceptions.RequestException as e:
1644
  status_message = f"Submission Failed: Network error - {e}"
1645
+ print(f"\n{'='*70}")
1646
+ print(f"❌ SUBMISSION FAILED")
1647
+ print(f"{'='*70}")
1648
  print(status_message)
1649
+ print(f"{'='*70}\n")
1650
  results_df = pd.DataFrame(results_log)
1651
  return status_message, results_df
1652
+
1653
  except Exception as e:
1654
  status_message = f"An unexpected error occurred during submission: {e}"
1655
+ print(f"\n{'='*70}")
1656
+ print(f"❌ SUBMISSION FAILED")
1657
+ print(f"{'='*70}")
1658
  print(status_message)
1659
+ print(traceback.format_exc())
1660
+ print(f"{'='*70}\n")
1661
  results_df = pd.DataFrame(results_log)
1662
  return status_message, results_df
1663