Chris commited on
Commit
6f7648f
Β·
1 Parent(s): f798fcd

Final 6.5.3

Browse files
Files changed (1) hide show
  1. src/app.py +207 -307
src/app.py CHANGED
@@ -1282,32 +1282,28 @@ def create_interface():
1282
  """
1283
  ### πŸ” Authentication Status: Not Logged In
1284
 
1285
- Please log in to access GAIA evaluation features with full inference access.
1286
 
1287
- **What you can do:**
1288
- - βœ… Manual question testing (limited functionality)
1289
- - ❌ Official GAIA benchmark evaluation (requires login)
 
1290
 
1291
- **πŸ”‘ OAuth Configuration**: Login now requests both `read` and `inference` scopes for optimal performance.
1292
- **πŸ“ˆ Expected Performance**: 30%+ GAIA score with full inference access.
1293
  """,
1294
  elem_classes=["oauth-login"]
1295
  )
1296
 
 
 
 
1297
  with gr.Row():
1298
- login_button = gr.LoginButton(
1299
- value="πŸ”‘ Login with Full Inference Access",
1300
- # Note: Gradio 4.44.0 may not support scopes parameter directly
1301
- # The scopes will be configured at the interface level
1302
- )
1303
  refresh_auth_button = gr.Button("πŸ”„ Refresh Auth Status", variant="secondary", scale=1)
1304
- debug_auth_button = gr.Button("πŸ” Debug OAuth", variant="secondary", scale=1)
1305
 
1306
  unit4_run_button = gr.Button(
1307
- "πŸ”’ Login Required for GAIA Evaluation",
1308
  variant="primary",
1309
- scale=2,
1310
- interactive=False # Disabled until login
1311
  )
1312
 
1313
  unit4_status_output = gr.Textbox(
@@ -1426,344 +1422,242 @@ Please log in to access GAIA evaluation features with full inference access.
1426
  elem_classes=["reasoning-box"]
1427
  )
1428
 
1429
- # Event handlers for Unit 4 API
1430
- def handle_evaluation_results(request: gr.Request):
1431
- """Handle evaluation and update download visibility"""
1432
- # Use the same OAuth profile extraction logic that works in other functions
1433
- profile = None
1434
- oauth_token = None
1435
- username = None
 
 
 
 
 
 
 
 
 
 
1436
 
 
 
 
 
 
 
 
 
 
 
 
1437
  try:
1438
- # Try to get OAuth info from request using multiple methods
1439
- if hasattr(request, 'user') and request.user:
1440
- username = getattr(request.user, 'username', None)
1441
- if username:
1442
- # Create a profile-like object if we have user info
1443
- class GradioProfile:
1444
- def __init__(self, username):
1445
- self.username = username
1446
- self.oauth_token = None
1447
- profile = GradioProfile(username)
1448
- logger.info(f"πŸ”‘ Found user via request.user: {username}")
1449
-
1450
- # Try additional Gradio OAuth attributes
1451
- if not profile:
1452
- # Check for standard Gradio OAuth profile
1453
- for attr in ['oauth_profile', 'profile', 'user_profile']:
1454
- if hasattr(request, attr):
1455
- oauth_profile = getattr(request, attr)
1456
- logger.info(f"πŸ” DEBUG: Found request.{attr} = {type(oauth_profile)}")
1457
- if oauth_profile and hasattr(oauth_profile, 'username'):
1458
- profile = oauth_profile
1459
- username = oauth_profile.username
1460
- logger.info(f"πŸ”‘ Found profile via request.{attr}: {username}")
1461
- break
1462
-
1463
- # Alternative: Check session or headers for OAuth token
1464
- if hasattr(request, 'session'):
1465
- session = request.session
1466
- logger.info(f"πŸ” DEBUG: Session available, keys: {list(session.keys()) if hasattr(session, 'keys') else 'no keys method'}")
1467
- oauth_token = session.get('oauth_token') or session.get('access_token')
1468
- if oauth_token:
1469
- logger.info("πŸ”‘ Found OAuth token in session")
1470
-
1471
- # Check request headers for authorization
1472
- if hasattr(request, 'headers'):
1473
- auth_header = request.headers.get('authorization', '')
1474
- logger.info(f"πŸ” DEBUG: Authorization header present: {bool(auth_header)}")
1475
- if auth_header.startswith('Bearer '):
1476
- oauth_token = auth_header[7:]
1477
- logger.info("πŸ”‘ Found OAuth token in headers")
1478
-
1479
- # Try to extract token from profile if we have one
1480
- if profile and not oauth_token:
1481
- profile_attrs = [attr for attr in dir(profile) if not attr.startswith('_')]
1482
- logger.info(f"πŸ” DEBUG: Profile attributes: {profile_attrs}")
1483
- for token_attr in ['oauth_token', 'token', 'access_token', 'id_token', 'bearer_token']:
1484
- if hasattr(profile, token_attr):
1485
- token = getattr(profile, token_attr)
1486
- if token:
1487
- oauth_token = token
1488
- logger.info(f"πŸ”‘ Found OAuth token via profile.{token_attr}")
1489
- break
1490
-
1491
- # If we found a token, add it to the profile
1492
- if oauth_token and profile:
1493
- profile.oauth_token = oauth_token
1494
- logger.info(f"βœ… OAuth profile created: user={username}, token=present")
1495
- elif profile and not oauth_token:
1496
- logger.info(f"βœ… OAuth profile created: user={username}, token=missing")
1497
- elif not profile and not oauth_token:
1498
- logger.warning("⚠️ No OAuth profile or token found in request")
1499
 
 
 
 
 
 
 
 
 
1500
  except Exception as e:
1501
- logger.error(f"❌ Error extracting OAuth profile: {e}")
1502
- profile = None
1503
-
1504
- results = run_and_submit_all(profile)
1505
- status, table, auth_status, csv_file, json_file, summary_file = results
1506
-
1507
- # Update download file visibility and values
1508
- csv_update = gr.update(value=csv_file, visible=csv_file is not None)
1509
- json_update = gr.update(value=json_file, visible=json_file is not None)
1510
- summary_update = gr.update(value=summary_file, visible=summary_file is not None)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1511
 
1512
- return status, table, auth_status, csv_update, json_update, summary_update
1513
-
1514
- def refresh_auth_status(request: gr.Request):
1515
- """Refresh authentication status display with enhanced debugging"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1516
  try:
1517
- # Use Gradio's built-in OAuth support
1518
- # In newer Gradio versions with HF Spaces, OAuth info should be accessible
1519
- profile = None
1520
- oauth_token = None
1521
- username = None
1522
 
1523
- # Try to get OAuth info from request
1524
- if hasattr(request, 'user') and request.user:
1525
- username = getattr(request.user, 'username', None)
1526
- if username:
1527
- # Create a profile-like object if we have user info
1528
- class GradioProfile:
1529
- def __init__(self, username):
1530
- self.username = username
1531
- self.oauth_token = None
1532
- profile = GradioProfile(username)
1533
-
1534
- # Alternative: Check session or headers for OAuth token
1535
- if hasattr(request, 'session'):
1536
- session = request.session
1537
- oauth_token = session.get('oauth_token') or session.get('access_token')
1538
 
1539
- # Check request headers for authorization
1540
- if hasattr(request, 'headers'):
1541
- auth_header = request.headers.get('authorization', '')
1542
- if auth_header.startswith('Bearer '):
1543
- oauth_token = auth_header[7:]
 
 
 
 
1544
 
1545
- # If we found a token, add it to the profile
1546
- if oauth_token and profile:
1547
- profile.oauth_token = oauth_token
1548
-
1549
- logger.info(f"πŸ” OAuth Debug - Profile: {profile is not None}, Username: {username}, Token: {oauth_token is not None}")
1550
 
1551
- return format_auth_status(profile)
 
 
 
 
 
 
 
 
 
 
1552
 
1553
- except Exception as e:
1554
- logger.error(f"❌ Error in refresh_auth_status: {e}")
 
 
1555
 
1556
- # Fallback: Check environment variables and provide helpful info
1557
- oauth_scopes = os.getenv("OAUTH_SCOPES")
1558
- oauth_client_id = os.getenv("OAUTH_CLIENT_ID")
1559
 
1560
- if oauth_client_id and oauth_scopes:
1561
- return f"""
1562
- ### πŸ” OAuth Configuration Detected
1563
-
1564
- **🏠 Space OAuth**: βœ… Configured with scopes: {oauth_scopes}
1565
-
1566
- **⚠️ Authentication Detection Issue**: {str(e)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1567
 
1568
- **πŸ”§ Gradio OAuth Integration**: The Space has OAuth enabled, but we're having trouble accessing your authentication status through the Gradio interface.
1569
 
1570
- **πŸ’‘ This is likely a Gradio version compatibility issue**. Your login should still work for the GAIA evaluation.
 
 
 
1571
 
1572
- **🎯 Try This**: Click "πŸš€ Run GAIA Evaluation & Submit All Answers" button - it may work even if the status display has issues.
1573
  """
1574
- else:
1575
- return f"### ❌ Authentication Error\n\nError checking auth status: {str(e)}"
1576
-
1577
- def check_login_state(request: gr.Request):
1578
- """Check if user is logged in and update UI accordingly with enhanced detection"""
1579
- try:
1580
- # Simplified approach - just try to determine if user is logged in
1581
- # without accessing request.user directly
1582
-
1583
- # Check if OAuth environment is configured
1584
- oauth_client_id = os.getenv("OAUTH_CLIENT_ID")
1585
- oauth_scopes = os.getenv("OAUTH_SCOPES")
1586
-
1587
- if oauth_client_id and oauth_scopes:
1588
- # OAuth is configured, assume user can log in
1589
- # Enable the button and let the actual authentication happen at runtime
1590
- auth_status = f"""
1591
- ### 🏠 OAuth Configured Space
1592
 
1593
- **πŸ”‘ OAuth Status**: Space is configured with OAuth scopes: {oauth_scopes}
1594
 
1595
- **🎯 Ready for GAIA Evaluation**: Click the button below to start evaluation with your HuggingFace login.
 
 
 
1596
 
1597
- **πŸ’‘ Note**: Authentication happens when you click "Run GAIA Evaluation" - you'll be prompted to login if needed.
1598
  """
1599
- button_update = gr.update(interactive=True, value="πŸš€ Run GAIA Evaluation & Submit All Answers")
1600
- logger.info("βœ… OAuth environment detected, enabling GAIA evaluation")
1601
- return auth_status, button_update
1602
- else:
1603
- # No OAuth configured
1604
- auth_status = format_auth_status(None)
1605
- button_update = gr.update(interactive=False, value="πŸ”’ OAuth Not Configured")
1606
- logger.info("ℹ️ No OAuth environment detected")
1607
- return auth_status, button_update
1608
-
1609
- except Exception as e:
1610
- logger.error(f"❌ Error in check_login_state: {e}")
1611
- # Return safe defaults
1612
- auth_status = f"### ❌ Error\n\nError checking login state: {str(e)}"
1613
- button_update = gr.update(interactive=False, value="πŸ”’ Login Error")
1614
- return auth_status, button_update
1615
 
1616
  # Set up automatic login state checking
1617
  interface.load(
1618
- fn=check_login_state,
1619
- outputs=[auth_status_display, unit4_run_button]
1620
  )
1621
 
1622
  unit4_run_button.click(
1623
- fn=handle_evaluation_results,
1624
- inputs=[], # No inputs needed - profile comes from session
1625
- outputs=[unit4_status_output, unit4_results_table, auth_status_display,
1626
- csv_download, json_download, summary_download]
1627
  )
1628
 
1629
  # Refresh authentication status manually
1630
  refresh_auth_button.click(
1631
- fn=refresh_auth_status,
1632
- outputs=[auth_status_display]
1633
- )
1634
-
1635
- # Debug OAuth information
1636
- def debug_oauth_info(request: gr.Request):
1637
- """Debug function to show OAuth information"""
1638
- try:
1639
- debug_info = []
1640
- debug_info.append("# πŸ” OAuth Debug Information\n")
1641
-
1642
- # Check HuggingFace Spaces OAuth Environment Variables
1643
- debug_info.append("## 🏠 HuggingFace Spaces OAuth Environment")
1644
- oauth_client_id = os.getenv("OAUTH_CLIENT_ID")
1645
- oauth_client_secret = os.getenv("OAUTH_CLIENT_SECRET")
1646
- oauth_scopes = os.getenv("OAUTH_SCOPES")
1647
- openid_provider_url = os.getenv("OPENID_PROVIDER_URL")
1648
-
1649
- debug_info.append(f"**OAUTH_CLIENT_ID**: {oauth_client_id is not None}")
1650
- debug_info.append(f"**OAUTH_CLIENT_SECRET**: {oauth_client_secret is not None}")
1651
- debug_info.append(f"**OAUTH_SCOPES**: {oauth_scopes}")
1652
- debug_info.append(f"**OPENID_PROVIDER_URL**: {openid_provider_url}")
1653
-
1654
- if oauth_scopes:
1655
- scopes_list = oauth_scopes.split()
1656
- debug_info.append(f"**Available Scopes**: {', '.join(scopes_list)}")
1657
- # Check for both 'inference-api' and 'inference' as valid inference scopes
1658
- has_inference = 'inference-api' in scopes_list or 'inference' in scopes_list
1659
- debug_info.append(f"**Has inference scope**: {has_inference}")
1660
- else:
1661
- debug_info.append("**⚠️ No OAuth scopes configured**")
1662
-
1663
- # Check README.md OAuth configuration
1664
- debug_info.append("\n## πŸ“„ README.md OAuth Configuration")
1665
- try:
1666
- with open('README.md', 'r') as f:
1667
- readme_content = f.read()
1668
- has_oauth = 'hf_oauth: true' in readme_content
1669
- has_scopes = 'hf_oauth_scopes:' in readme_content
1670
- has_inference = 'inference-api' in readme_content
1671
-
1672
- debug_info.append(f"**hf_oauth: true**: {has_oauth}")
1673
- debug_info.append(f"**hf_oauth_scopes defined**: {has_scopes}")
1674
- debug_info.append(f"**inference-api scope**: {has_inference}")
1675
- except Exception as readme_error:
1676
- debug_info.append(f"**README.md check error**: {readme_error}")
1677
-
1678
- # Environment Variables
1679
- debug_info.append("\n## πŸ”§ Environment Variables")
1680
- hf_token = os.getenv("HF_TOKEN")
1681
- debug_info.append(f"**HF_TOKEN Available**: {hf_token is not None}")
1682
- if hf_token:
1683
- debug_info.append(f"**HF_TOKEN Length**: {len(hf_token)} chars")
1684
-
1685
- space_host = os.getenv("SPACE_HOST")
1686
- space_id = os.getenv("SPACE_ID")
1687
- debug_info.append(f"**SPACE_HOST**: {space_host}")
1688
- debug_info.append(f"**SPACE_ID**: {space_id}")
1689
-
1690
- # Gradio-specific OAuth checks
1691
- debug_info.append("\n## 🎨 Gradio OAuth Integration")
1692
- try:
1693
- import gradio as gr
1694
- debug_info.append(f"**Gradio Version**: {gr.__version__}")
1695
- debug_info.append(f"**OAuth Profile Support**: Gradio should handle OAuth automatically in HF Spaces")
1696
-
1697
- except Exception as gradio_error:
1698
- debug_info.append(f"**Gradio OAuth Error**: {gradio_error}")
1699
-
1700
- # Authentication Test
1701
- debug_info.append("\n## πŸ§ͺ Authentication Test")
1702
-
1703
- if oauth_client_id and oauth_scopes:
1704
- debug_info.append("**βœ… OAuth Environment**: Properly configured")
1705
-
1706
- # Check for both scope formats
1707
- has_inference_scope = "inference-api" in oauth_scopes or "inference" in oauth_scopes
1708
- if has_inference_scope:
1709
- debug_info.append("**βœ… inference-api Scope**: Available for Qwen model access")
1710
- debug_info.append("**🎯 Expected Behavior**: Login should provide Qwen model access")
1711
- else:
1712
- debug_info.append("**❌ inference-api Scope**: Missing - Qwen models won't work")
1713
- debug_info.append("**πŸ”§ Fix**: Add 'inference-api' to hf_oauth_scopes in README.md")
1714
- else:
1715
- debug_info.append("**❌ OAuth Environment**: Not properly configured")
1716
-
1717
- # Success Indicators
1718
- debug_info.append("\n## βœ… Success Indicators")
1719
-
1720
- if oauth_client_id:
1721
- debug_info.append("- βœ… OAuth is enabled for this Space")
1722
- else:
1723
- debug_info.append("- ❌ OAuth is not enabled (missing OAUTH_CLIENT_ID)")
1724
-
1725
- # Check for both scope formats in success indicators
1726
- inference_available = oauth_scopes and ("inference-api" in oauth_scopes or "inference" in oauth_scopes)
1727
- if inference_available:
1728
- debug_info.append("- βœ… inference-api scope is configured")
1729
- debug_info.append("- βœ… Should have Qwen model access when logged in")
1730
- else:
1731
- debug_info.append("- ❌ inference-api scope is missing")
1732
- debug_info.append("- ❌ Will not have Qwen model access")
1733
-
1734
- # Login status detection (avoid AuthenticationMiddleware error)
1735
- debug_info.append("\n## πŸ‘€ Login Status")
1736
- debug_info.append("**Note**: Due to Gradio OAuth integration, login status is detected at runtime")
1737
- debug_info.append("**Current Status**: Check by clicking 'Run GAIA Evaluation' - you'll be prompted to login if needed")
1738
-
1739
- return "\n".join(debug_info)
1740
-
1741
- except Exception as e:
1742
- return f"# ❌ Debug Error\n\nError during OAuth debug: {str(e)}"
1743
-
1744
- debug_auth_button.click(
1745
- fn=debug_oauth_info,
1746
  outputs=[auth_status_display]
1747
  )
1748
 
1749
  # Event handlers for manual testing
1750
- def process_and_update(question, file_input, show_reasoning):
1751
  """Process question with authentication check"""
1752
 
1753
  if not question.strip():
1754
  return "❌ Please provide a question", "", "", gr.update(visible=False)
1755
 
1756
- # Check for authentication
1757
  hf_token = os.getenv("HF_TOKEN")
1758
 
1759
- if not hf_token:
1760
  error_msg = """
1761
  ## ❌ Authentication Required
1762
 
1763
  **This system requires authentication to access Qwen models and LangGraph workflow.**
1764
 
1765
  **How to authenticate:**
1766
- 1. πŸ”‘ **Set HF_TOKEN**: Add your HuggingFace token as an environment variable
1767
  2. 🌐 **Use Official Evaluation**: Login via the GAIA Benchmark section above
1768
  3. πŸ“ **Get Token**: Visit https://huggingface.co/settings/tokens to create one with `inference` permissions
1769
 
@@ -1771,9 +1665,15 @@ Please log in to access GAIA evaluation features with full inference access.
1771
  """
1772
  return error_msg, "", "", gr.update(visible=False)
1773
 
 
 
 
 
 
 
1774
  try:
1775
  # Create authenticated app instance for this request
1776
- app = GAIAAgentApp(hf_token=hf_token)
1777
 
1778
  # Process the question
1779
  answer, details, reasoning = app.process_question_detailed(question, file_input, show_reasoning)
@@ -1808,7 +1708,7 @@ Please log in to access GAIA evaluation features with full inference access.
1808
 
1809
  {str(ve)}
1810
 
1811
- **Solution**: Please ensure your HF_TOKEN has `inference` permissions.
1812
  """
1813
  return error_msg, "", "", gr.update(visible=False)
1814
 
 
1282
  """
1283
  ### πŸ” Authentication Status: Not Logged In
1284
 
1285
+ Please log in to access GAIA evaluation with Qwen models and LangGraph workflow.
1286
 
1287
+ **What you need:**
1288
+ - πŸ”‘ HuggingFace login with `read` and `inference` permissions
1289
+ - πŸ€– Access to Qwen 2.5 models via HF Inference API
1290
+ - 🧠 LangGraph multi-agent system capabilities
1291
 
1292
+ **Expected Performance**: 30%+ GAIA score with full LangGraph workflow and Qwen models.
 
1293
  """,
1294
  elem_classes=["oauth-login"]
1295
  )
1296
 
1297
+ # Add Gradio's built-in OAuth login button
1298
+ gr.LoginButton()
1299
+
1300
  with gr.Row():
 
 
 
 
 
1301
  refresh_auth_button = gr.Button("πŸ”„ Refresh Auth Status", variant="secondary", scale=1)
 
1302
 
1303
  unit4_run_button = gr.Button(
1304
+ "πŸš€ Run GAIA Evaluation & Submit All Answers",
1305
  variant="primary",
1306
+ scale=2
 
1307
  )
1308
 
1309
  unit4_status_output = gr.Textbox(
 
1422
  elem_classes=["reasoning-box"]
1423
  )
1424
 
1425
+ # Event handlers for Unit 4 API - Using Gradio's built-in OAuth
1426
+ def run_gaia_evaluation(oauth_token: gr.OAuthToken | None, profile: gr.OAuthProfile | None):
1427
+ """Run GAIA evaluation using Gradio's built-in OAuth"""
1428
+ start_time = time.time()
1429
+
1430
+ # Initialize result logger
1431
+ result_logger = GAIAResultLogger()
1432
+
1433
+ # Check authentication using Gradio's OAuth parameters
1434
+ if oauth_token is None or profile is None:
1435
+ return "❌ Authentication Required: Please login with HuggingFace to access GAIA evaluation.", None, None, None, None, None
1436
+
1437
+ username = profile.username if profile else "unknown_user"
1438
+ hf_token = oauth_token.token if oauth_token else None
1439
+
1440
+ if not hf_token:
1441
+ return "❌ OAuth Token Missing: Could not extract authentication token. Please logout and login again.", None, None, None, None, None
1442
 
1443
+ logger.info(f"βœ… Starting GAIA evaluation for user: {username}")
1444
+
1445
+ # Rest of the function exactly as in run_and_submit_all but using oauth_token.token
1446
+ api_url = DEFAULT_API_URL
1447
+ questions_url = f"{api_url}/questions"
1448
+ submit_url = f"{api_url}/submit"
1449
+
1450
+ # Get space info for code submission
1451
+ space_id = os.getenv("SPACE_ID")
1452
+
1453
+ # 1. Instantiate GAIA Agent with LangGraph workflow
1454
  try:
1455
+ logger.info("πŸš€ Creating GAIA Agent with LangGraph workflow and Qwen models")
1456
+ agent = GAIAAgentApp.create_with_oauth_token(hf_token)
1457
+
1458
+ if not agent.initialized:
1459
+ return "❌ System Error: GAIA Agent failed to initialize with LangGraph workflow", None, None, None, None, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1460
 
1461
+ logger.info("βœ… GAIA Agent initialized successfully")
1462
+
1463
+ except ValueError as ve:
1464
+ logger.error(f"Authentication error: {ve}")
1465
+ return f"❌ Authentication Error: {ve}", None, None, None, None, None
1466
+ except RuntimeError as re:
1467
+ logger.error(f"System initialization error: {re}")
1468
+ return f"❌ System Error: {re}", None, None, None, None, None
1469
  except Exception as e:
1470
+ logger.error(f"Unexpected error initializing agent: {e}")
1471
+ return f"❌ Unexpected Error: {e}. Please check your authentication and try again.", None, None, None, None, None
1472
+
1473
+ # Agent code URL
1474
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Local Development"
1475
+ logger.info(f"Agent code URL: {agent_code}")
1476
+
1477
+ # 2. Fetch Questions
1478
+ logger.info(f"Fetching questions from: {questions_url}")
1479
+ try:
1480
+ response = requests.get(questions_url, timeout=15)
1481
+ response.raise_for_status()
1482
+ questions_data = response.json()
1483
+ if not questions_data:
1484
+ logger.error("Fetched questions list is empty.")
1485
+ return "❌ Fetched questions list is empty or invalid format.", None, None, None, None, None
1486
+ logger.info(f"βœ… Fetched {len(questions_data)} questions.")
1487
+ except requests.exceptions.RequestException as e:
1488
+ logger.error(f"Error fetching questions: {e}")
1489
+ return f"❌ Error fetching questions: {e}", None, None, None, None, None
1490
+ except Exception as e:
1491
+ logger.error(f"An unexpected error occurred fetching questions: {e}")
1492
+ return f"❌ An unexpected error occurred fetching questions: {e}", None, None, None, None, None
1493
+
1494
+ # 3. Run GAIA Agent on questions
1495
+ results_log = []
1496
+ answers_payload = []
1497
+ logger.info(f"πŸ€– Running GAIA Agent on {len(questions_data)} questions with LangGraph workflow...")
1498
 
1499
+ for i, item in enumerate(questions_data, 1):
1500
+ task_id = item.get("task_id")
1501
+ question_text = item.get("question")
1502
+ if not task_id or question_text is None:
1503
+ logger.warning(f"Skipping item with missing task_id or question: {item}")
1504
+ continue
1505
+
1506
+ logger.info(f"Processing question {i}/{len(questions_data)}: {task_id}")
1507
+ try:
1508
+ submitted_answer = agent(question_text)
1509
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
1510
+ results_log.append({
1511
+ "Task ID": task_id,
1512
+ "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
1513
+ "Submitted Answer": submitted_answer[:200] + "..." if len(submitted_answer) > 200 else submitted_answer
1514
+ })
1515
+ logger.info(f"βœ… Question {i} processed successfully")
1516
+ except Exception as e:
1517
+ logger.error(f"Error running GAIA agent on task {task_id}: {e}")
1518
+ error_answer = f"AGENT ERROR: {str(e)}"
1519
+ answers_payload.append({"task_id": task_id, "submitted_answer": error_answer})
1520
+ results_log.append({
1521
+ "Task ID": task_id,
1522
+ "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
1523
+ "Submitted Answer": error_answer
1524
+ })
1525
+
1526
+ if not answers_payload:
1527
+ logger.error("GAIA Agent did not produce any answers to submit.")
1528
+ return "❌ GAIA Agent did not produce any answers to submit.", pd.DataFrame(results_log), None, None, None, None
1529
+
1530
+ # 4. Prepare and submit results
1531
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
1532
+ status_update = f"πŸš€ GAIA Agent finished processing {len(answers_payload)} questions. Submitting results for user '{username}'..."
1533
+ logger.info(status_update)
1534
+
1535
+ # 5. Submit to Unit 4 API
1536
+ logger.info(f"πŸ“€ Submitting {len(answers_payload)} answers to: {submit_url}")
1537
  try:
1538
+ response = requests.post(submit_url, json=submission_data, timeout=120)
1539
+ response.raise_for_status()
1540
+ result_data = response.json()
 
 
1541
 
1542
+ # Calculate execution time
1543
+ execution_time = time.time() - start_time
 
 
 
 
 
 
 
 
 
 
 
 
 
1544
 
1545
+ # 6. Log results to files
1546
+ logger.info("πŸ“ Logging evaluation results...")
1547
+ logged_files = result_logger.log_evaluation_results(
1548
+ username=username,
1549
+ questions_data=questions_data,
1550
+ results_log=results_log,
1551
+ final_result=result_data,
1552
+ execution_time=execution_time
1553
+ )
1554
 
1555
+ # Prepare download files
1556
+ csv_file = logged_files.get("csv")
1557
+ json_file = logged_files.get("json")
1558
+ summary_file = logged_files.get("summary")
 
1559
 
1560
+ final_status = (
1561
+ f"πŸŽ‰ GAIA Agent Evaluation Complete!\n"
1562
+ f"πŸ‘€ User: {result_data.get('username')}\n"
1563
+ f"πŸ† Overall Score: {result_data.get('score', 'N/A')}% "
1564
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
1565
+ f"⏱️ Execution Time: {execution_time:.2f} seconds\n"
1566
+ f"πŸ’¬ API Response: {result_data.get('message', 'No message received.')}\n\n"
1567
+ f"πŸ“ Results saved to {len([f for f in [csv_file, json_file, summary_file] if f])} files for download."
1568
+ )
1569
+ logger.info("βœ… GAIA evaluation completed successfully")
1570
+ results_df = pd.DataFrame(results_log)
1571
 
1572
+ # Update download file visibility and values
1573
+ csv_update = gr.update(value=csv_file, visible=csv_file is not None)
1574
+ json_update = gr.update(value=json_file, visible=json_file is not None)
1575
+ summary_update = gr.update(value=summary_file, visible=summary_file is not None)
1576
 
1577
+ return final_status, results_df, csv_update, json_update, summary_update
 
 
1578
 
1579
+ except requests.exceptions.HTTPError as e:
1580
+ error_detail = f"Server responded with status {e.response.status_code}."
1581
+ try:
1582
+ error_json = e.response.json()
1583
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
1584
+ except requests.exceptions.JSONDecodeError:
1585
+ error_detail += f" Response: {e.response.text[:500]}"
1586
+ status_message = f"❌ Submission Failed: {error_detail}"
1587
+ logger.error(status_message)
1588
+ results_df = pd.DataFrame(results_log)
1589
+ return status_message, results_df, None, None, None
1590
+ except Exception as e:
1591
+ status_message = f"❌ An unexpected error occurred during submission: {e}"
1592
+ logger.error(status_message)
1593
+ results_df = pd.DataFrame(results_log)
1594
+ return status_message, results_df, None, None, None
1595
+
1596
+ def update_auth_status(profile: gr.OAuthProfile | None):
1597
+ """Update authentication status display using Gradio's OAuth"""
1598
+ if profile is None:
1599
+ return """
1600
+ ### πŸ” Authentication Status: Not Logged In
1601
 
1602
+ Please click the "Sign in with Hugging Face" button above to access GAIA evaluation.
1603
 
1604
+ **What you need:**
1605
+ - πŸ”‘ HuggingFace login with `read` and `inference` permissions
1606
+ - πŸ€– Access to Qwen 2.5 models via HF Inference API
1607
+ - 🧠 LangGraph multi-agent system capabilities
1608
 
1609
+ **Expected Performance**: 30%+ GAIA score with full LangGraph workflow and Qwen models.
1610
  """
1611
+ else:
1612
+ return f"""
1613
+ ### πŸ” Authentication Status: βœ… Logged In as {profile.username}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1614
 
1615
+ **βœ… Ready for GAIA Evaluation!**
1616
 
1617
+ - βœ… **OAuth Profile**: {profile.name or profile.username}
1618
+ - βœ… **Qwen Model Access**: Available via HF Inference API
1619
+ - βœ… **LangGraph Workflow**: Multi-agent orchestration ready
1620
+ - βœ… **Official Evaluation**: Click "Run GAIA Evaluation" to start
1621
 
1622
+ 🎯 **Expected Results**: 30%+ GAIA score with full LangGraph workflow and Qwen models.
1623
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1624
 
1625
  # Set up automatic login state checking
1626
  interface.load(
1627
+ fn=update_auth_status,
1628
+ outputs=[auth_status_display]
1629
  )
1630
 
1631
  unit4_run_button.click(
1632
+ fn=run_gaia_evaluation,
1633
+ inputs=[], # Gradio automatically injects OAuth parameters
1634
+ outputs=[unit4_status_output, unit4_results_table, csv_download, json_download, summary_download]
 
1635
  )
1636
 
1637
  # Refresh authentication status manually
1638
  refresh_auth_button.click(
1639
+ fn=update_auth_status,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1640
  outputs=[auth_status_display]
1641
  )
1642
 
1643
  # Event handlers for manual testing
1644
+ def process_and_update(question, file_input, show_reasoning, oauth_token: gr.OAuthToken | None, profile: gr.OAuthProfile | None):
1645
  """Process question with authentication check"""
1646
 
1647
  if not question.strip():
1648
  return "❌ Please provide a question", "", "", gr.update(visible=False)
1649
 
1650
+ # Check for authentication - prioritize HF_TOKEN, then OAuth
1651
  hf_token = os.getenv("HF_TOKEN")
1652
 
1653
+ if not hf_token and (oauth_token is None or profile is None):
1654
  error_msg = """
1655
  ## ❌ Authentication Required
1656
 
1657
  **This system requires authentication to access Qwen models and LangGraph workflow.**
1658
 
1659
  **How to authenticate:**
1660
+ 1. πŸ”‘ **Login with HuggingFace**: Use the "Sign in with Hugging Face" button above
1661
  2. 🌐 **Use Official Evaluation**: Login via the GAIA Benchmark section above
1662
  3. πŸ“ **Get Token**: Visit https://huggingface.co/settings/tokens to create one with `inference` permissions
1663
 
 
1665
  """
1666
  return error_msg, "", "", gr.update(visible=False)
1667
 
1668
+ # Use HF_TOKEN if available, otherwise use OAuth token
1669
+ auth_token = hf_token if hf_token else (oauth_token.token if oauth_token else None)
1670
+
1671
+ if not auth_token:
1672
+ return "❌ No valid authentication token found", "", "", gr.update(visible=False)
1673
+
1674
  try:
1675
  # Create authenticated app instance for this request
1676
+ app = GAIAAgentApp(hf_token=auth_token)
1677
 
1678
  # Process the question
1679
  answer, details, reasoning = app.process_question_detailed(question, file_input, show_reasoning)
 
1708
 
1709
  {str(ve)}
1710
 
1711
+ **Solution**: Please ensure your authentication has `inference` permissions.
1712
  """
1713
  return error_msg, "", "", gr.update(visible=False)
1714