Spaces:
Sleeping
Sleeping
Chris
commited on
Commit
Β·
6f7648f
1
Parent(s):
f798fcd
Final 6.5.3
Browse files- src/app.py +207 -307
src/app.py
CHANGED
|
@@ -1282,32 +1282,28 @@ def create_interface():
|
|
| 1282 |
"""
|
| 1283 |
### π Authentication Status: Not Logged In
|
| 1284 |
|
| 1285 |
-
Please log in to access GAIA evaluation
|
| 1286 |
|
| 1287 |
-
**What you
|
| 1288 |
-
-
|
| 1289 |
-
-
|
|
|
|
| 1290 |
|
| 1291 |
-
|
| 1292 |
-
**π Expected Performance**: 30%+ GAIA score with full inference access.
|
| 1293 |
""",
|
| 1294 |
elem_classes=["oauth-login"]
|
| 1295 |
)
|
| 1296 |
|
|
|
|
|
|
|
|
|
|
| 1297 |
with gr.Row():
|
| 1298 |
-
login_button = gr.LoginButton(
|
| 1299 |
-
value="π Login with Full Inference Access",
|
| 1300 |
-
# Note: Gradio 4.44.0 may not support scopes parameter directly
|
| 1301 |
-
# The scopes will be configured at the interface level
|
| 1302 |
-
)
|
| 1303 |
refresh_auth_button = gr.Button("π Refresh Auth Status", variant="secondary", scale=1)
|
| 1304 |
-
debug_auth_button = gr.Button("π Debug OAuth", variant="secondary", scale=1)
|
| 1305 |
|
| 1306 |
unit4_run_button = gr.Button(
|
| 1307 |
-
"
|
| 1308 |
variant="primary",
|
| 1309 |
-
scale=2
|
| 1310 |
-
interactive=False # Disabled until login
|
| 1311 |
)
|
| 1312 |
|
| 1313 |
unit4_status_output = gr.Textbox(
|
|
@@ -1426,344 +1422,242 @@ Please log in to access GAIA evaluation features with full inference access.
|
|
| 1426 |
elem_classes=["reasoning-box"]
|
| 1427 |
)
|
| 1428 |
|
| 1429 |
-
# Event handlers for Unit 4 API
|
| 1430 |
-
def
|
| 1431 |
-
"""
|
| 1432 |
-
|
| 1433 |
-
|
| 1434 |
-
|
| 1435 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1436 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1437 |
try:
|
| 1438 |
-
|
| 1439 |
-
|
| 1440 |
-
|
| 1441 |
-
|
| 1442 |
-
|
| 1443 |
-
class GradioProfile:
|
| 1444 |
-
def __init__(self, username):
|
| 1445 |
-
self.username = username
|
| 1446 |
-
self.oauth_token = None
|
| 1447 |
-
profile = GradioProfile(username)
|
| 1448 |
-
logger.info(f"π Found user via request.user: {username}")
|
| 1449 |
-
|
| 1450 |
-
# Try additional Gradio OAuth attributes
|
| 1451 |
-
if not profile:
|
| 1452 |
-
# Check for standard Gradio OAuth profile
|
| 1453 |
-
for attr in ['oauth_profile', 'profile', 'user_profile']:
|
| 1454 |
-
if hasattr(request, attr):
|
| 1455 |
-
oauth_profile = getattr(request, attr)
|
| 1456 |
-
logger.info(f"π DEBUG: Found request.{attr} = {type(oauth_profile)}")
|
| 1457 |
-
if oauth_profile and hasattr(oauth_profile, 'username'):
|
| 1458 |
-
profile = oauth_profile
|
| 1459 |
-
username = oauth_profile.username
|
| 1460 |
-
logger.info(f"π Found profile via request.{attr}: {username}")
|
| 1461 |
-
break
|
| 1462 |
-
|
| 1463 |
-
# Alternative: Check session or headers for OAuth token
|
| 1464 |
-
if hasattr(request, 'session'):
|
| 1465 |
-
session = request.session
|
| 1466 |
-
logger.info(f"π DEBUG: Session available, keys: {list(session.keys()) if hasattr(session, 'keys') else 'no keys method'}")
|
| 1467 |
-
oauth_token = session.get('oauth_token') or session.get('access_token')
|
| 1468 |
-
if oauth_token:
|
| 1469 |
-
logger.info("π Found OAuth token in session")
|
| 1470 |
-
|
| 1471 |
-
# Check request headers for authorization
|
| 1472 |
-
if hasattr(request, 'headers'):
|
| 1473 |
-
auth_header = request.headers.get('authorization', '')
|
| 1474 |
-
logger.info(f"π DEBUG: Authorization header present: {bool(auth_header)}")
|
| 1475 |
-
if auth_header.startswith('Bearer '):
|
| 1476 |
-
oauth_token = auth_header[7:]
|
| 1477 |
-
logger.info("π Found OAuth token in headers")
|
| 1478 |
-
|
| 1479 |
-
# Try to extract token from profile if we have one
|
| 1480 |
-
if profile and not oauth_token:
|
| 1481 |
-
profile_attrs = [attr for attr in dir(profile) if not attr.startswith('_')]
|
| 1482 |
-
logger.info(f"π DEBUG: Profile attributes: {profile_attrs}")
|
| 1483 |
-
for token_attr in ['oauth_token', 'token', 'access_token', 'id_token', 'bearer_token']:
|
| 1484 |
-
if hasattr(profile, token_attr):
|
| 1485 |
-
token = getattr(profile, token_attr)
|
| 1486 |
-
if token:
|
| 1487 |
-
oauth_token = token
|
| 1488 |
-
logger.info(f"π Found OAuth token via profile.{token_attr}")
|
| 1489 |
-
break
|
| 1490 |
-
|
| 1491 |
-
# If we found a token, add it to the profile
|
| 1492 |
-
if oauth_token and profile:
|
| 1493 |
-
profile.oauth_token = oauth_token
|
| 1494 |
-
logger.info(f"β
OAuth profile created: user={username}, token=present")
|
| 1495 |
-
elif profile and not oauth_token:
|
| 1496 |
-
logger.info(f"β
OAuth profile created: user={username}, token=missing")
|
| 1497 |
-
elif not profile and not oauth_token:
|
| 1498 |
-
logger.warning("β οΈ No OAuth profile or token found in request")
|
| 1499 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1500 |
except Exception as e:
|
| 1501 |
-
logger.error(f"
|
| 1502 |
-
|
| 1503 |
-
|
| 1504 |
-
|
| 1505 |
-
|
| 1506 |
-
|
| 1507 |
-
|
| 1508 |
-
|
| 1509 |
-
|
| 1510 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1511 |
|
| 1512 |
-
|
| 1513 |
-
|
| 1514 |
-
|
| 1515 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1516 |
try:
|
| 1517 |
-
|
| 1518 |
-
|
| 1519 |
-
|
| 1520 |
-
oauth_token = None
|
| 1521 |
-
username = None
|
| 1522 |
|
| 1523 |
-
#
|
| 1524 |
-
|
| 1525 |
-
username = getattr(request.user, 'username', None)
|
| 1526 |
-
if username:
|
| 1527 |
-
# Create a profile-like object if we have user info
|
| 1528 |
-
class GradioProfile:
|
| 1529 |
-
def __init__(self, username):
|
| 1530 |
-
self.username = username
|
| 1531 |
-
self.oauth_token = None
|
| 1532 |
-
profile = GradioProfile(username)
|
| 1533 |
-
|
| 1534 |
-
# Alternative: Check session or headers for OAuth token
|
| 1535 |
-
if hasattr(request, 'session'):
|
| 1536 |
-
session = request.session
|
| 1537 |
-
oauth_token = session.get('oauth_token') or session.get('access_token')
|
| 1538 |
|
| 1539 |
-
#
|
| 1540 |
-
|
| 1541 |
-
|
| 1542 |
-
|
| 1543 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1544 |
|
| 1545 |
-
#
|
| 1546 |
-
|
| 1547 |
-
|
| 1548 |
-
|
| 1549 |
-
logger.info(f"π OAuth Debug - Profile: {profile is not None}, Username: {username}, Token: {oauth_token is not None}")
|
| 1550 |
|
| 1551 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1552 |
|
| 1553 |
-
|
| 1554 |
-
|
|
|
|
|
|
|
| 1555 |
|
| 1556 |
-
|
| 1557 |
-
oauth_scopes = os.getenv("OAUTH_SCOPES")
|
| 1558 |
-
oauth_client_id = os.getenv("OAUTH_CLIENT_ID")
|
| 1559 |
|
| 1560 |
-
|
| 1561 |
-
|
| 1562 |
-
|
| 1563 |
-
|
| 1564 |
-
|
| 1565 |
-
|
| 1566 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1567 |
|
| 1568 |
-
|
| 1569 |
|
| 1570 |
-
|
|
|
|
|
|
|
|
|
|
| 1571 |
|
| 1572 |
-
|
| 1573 |
"""
|
| 1574 |
-
|
| 1575 |
-
|
| 1576 |
-
|
| 1577 |
-
def check_login_state(request: gr.Request):
|
| 1578 |
-
"""Check if user is logged in and update UI accordingly with enhanced detection"""
|
| 1579 |
-
try:
|
| 1580 |
-
# Simplified approach - just try to determine if user is logged in
|
| 1581 |
-
# without accessing request.user directly
|
| 1582 |
-
|
| 1583 |
-
# Check if OAuth environment is configured
|
| 1584 |
-
oauth_client_id = os.getenv("OAUTH_CLIENT_ID")
|
| 1585 |
-
oauth_scopes = os.getenv("OAUTH_SCOPES")
|
| 1586 |
-
|
| 1587 |
-
if oauth_client_id and oauth_scopes:
|
| 1588 |
-
# OAuth is configured, assume user can log in
|
| 1589 |
-
# Enable the button and let the actual authentication happen at runtime
|
| 1590 |
-
auth_status = f"""
|
| 1591 |
-
### π OAuth Configured Space
|
| 1592 |
|
| 1593 |
-
|
| 1594 |
|
| 1595 |
-
|
|
|
|
|
|
|
|
|
|
| 1596 |
|
| 1597 |
-
|
| 1598 |
"""
|
| 1599 |
-
button_update = gr.update(interactive=True, value="π Run GAIA Evaluation & Submit All Answers")
|
| 1600 |
-
logger.info("β
OAuth environment detected, enabling GAIA evaluation")
|
| 1601 |
-
return auth_status, button_update
|
| 1602 |
-
else:
|
| 1603 |
-
# No OAuth configured
|
| 1604 |
-
auth_status = format_auth_status(None)
|
| 1605 |
-
button_update = gr.update(interactive=False, value="π OAuth Not Configured")
|
| 1606 |
-
logger.info("βΉοΈ No OAuth environment detected")
|
| 1607 |
-
return auth_status, button_update
|
| 1608 |
-
|
| 1609 |
-
except Exception as e:
|
| 1610 |
-
logger.error(f"β Error in check_login_state: {e}")
|
| 1611 |
-
# Return safe defaults
|
| 1612 |
-
auth_status = f"### β Error\n\nError checking login state: {str(e)}"
|
| 1613 |
-
button_update = gr.update(interactive=False, value="π Login Error")
|
| 1614 |
-
return auth_status, button_update
|
| 1615 |
|
| 1616 |
# Set up automatic login state checking
|
| 1617 |
interface.load(
|
| 1618 |
-
fn=
|
| 1619 |
-
outputs=[auth_status_display
|
| 1620 |
)
|
| 1621 |
|
| 1622 |
unit4_run_button.click(
|
| 1623 |
-
fn=
|
| 1624 |
-
inputs=[], #
|
| 1625 |
-
outputs=[unit4_status_output, unit4_results_table,
|
| 1626 |
-
csv_download, json_download, summary_download]
|
| 1627 |
)
|
| 1628 |
|
| 1629 |
# Refresh authentication status manually
|
| 1630 |
refresh_auth_button.click(
|
| 1631 |
-
fn=
|
| 1632 |
-
outputs=[auth_status_display]
|
| 1633 |
-
)
|
| 1634 |
-
|
| 1635 |
-
# Debug OAuth information
|
| 1636 |
-
def debug_oauth_info(request: gr.Request):
|
| 1637 |
-
"""Debug function to show OAuth information"""
|
| 1638 |
-
try:
|
| 1639 |
-
debug_info = []
|
| 1640 |
-
debug_info.append("# π OAuth Debug Information\n")
|
| 1641 |
-
|
| 1642 |
-
# Check HuggingFace Spaces OAuth Environment Variables
|
| 1643 |
-
debug_info.append("## π HuggingFace Spaces OAuth Environment")
|
| 1644 |
-
oauth_client_id = os.getenv("OAUTH_CLIENT_ID")
|
| 1645 |
-
oauth_client_secret = os.getenv("OAUTH_CLIENT_SECRET")
|
| 1646 |
-
oauth_scopes = os.getenv("OAUTH_SCOPES")
|
| 1647 |
-
openid_provider_url = os.getenv("OPENID_PROVIDER_URL")
|
| 1648 |
-
|
| 1649 |
-
debug_info.append(f"**OAUTH_CLIENT_ID**: {oauth_client_id is not None}")
|
| 1650 |
-
debug_info.append(f"**OAUTH_CLIENT_SECRET**: {oauth_client_secret is not None}")
|
| 1651 |
-
debug_info.append(f"**OAUTH_SCOPES**: {oauth_scopes}")
|
| 1652 |
-
debug_info.append(f"**OPENID_PROVIDER_URL**: {openid_provider_url}")
|
| 1653 |
-
|
| 1654 |
-
if oauth_scopes:
|
| 1655 |
-
scopes_list = oauth_scopes.split()
|
| 1656 |
-
debug_info.append(f"**Available Scopes**: {', '.join(scopes_list)}")
|
| 1657 |
-
# Check for both 'inference-api' and 'inference' as valid inference scopes
|
| 1658 |
-
has_inference = 'inference-api' in scopes_list or 'inference' in scopes_list
|
| 1659 |
-
debug_info.append(f"**Has inference scope**: {has_inference}")
|
| 1660 |
-
else:
|
| 1661 |
-
debug_info.append("**β οΈ No OAuth scopes configured**")
|
| 1662 |
-
|
| 1663 |
-
# Check README.md OAuth configuration
|
| 1664 |
-
debug_info.append("\n## π README.md OAuth Configuration")
|
| 1665 |
-
try:
|
| 1666 |
-
with open('README.md', 'r') as f:
|
| 1667 |
-
readme_content = f.read()
|
| 1668 |
-
has_oauth = 'hf_oauth: true' in readme_content
|
| 1669 |
-
has_scopes = 'hf_oauth_scopes:' in readme_content
|
| 1670 |
-
has_inference = 'inference-api' in readme_content
|
| 1671 |
-
|
| 1672 |
-
debug_info.append(f"**hf_oauth: true**: {has_oauth}")
|
| 1673 |
-
debug_info.append(f"**hf_oauth_scopes defined**: {has_scopes}")
|
| 1674 |
-
debug_info.append(f"**inference-api scope**: {has_inference}")
|
| 1675 |
-
except Exception as readme_error:
|
| 1676 |
-
debug_info.append(f"**README.md check error**: {readme_error}")
|
| 1677 |
-
|
| 1678 |
-
# Environment Variables
|
| 1679 |
-
debug_info.append("\n## π§ Environment Variables")
|
| 1680 |
-
hf_token = os.getenv("HF_TOKEN")
|
| 1681 |
-
debug_info.append(f"**HF_TOKEN Available**: {hf_token is not None}")
|
| 1682 |
-
if hf_token:
|
| 1683 |
-
debug_info.append(f"**HF_TOKEN Length**: {len(hf_token)} chars")
|
| 1684 |
-
|
| 1685 |
-
space_host = os.getenv("SPACE_HOST")
|
| 1686 |
-
space_id = os.getenv("SPACE_ID")
|
| 1687 |
-
debug_info.append(f"**SPACE_HOST**: {space_host}")
|
| 1688 |
-
debug_info.append(f"**SPACE_ID**: {space_id}")
|
| 1689 |
-
|
| 1690 |
-
# Gradio-specific OAuth checks
|
| 1691 |
-
debug_info.append("\n## π¨ Gradio OAuth Integration")
|
| 1692 |
-
try:
|
| 1693 |
-
import gradio as gr
|
| 1694 |
-
debug_info.append(f"**Gradio Version**: {gr.__version__}")
|
| 1695 |
-
debug_info.append(f"**OAuth Profile Support**: Gradio should handle OAuth automatically in HF Spaces")
|
| 1696 |
-
|
| 1697 |
-
except Exception as gradio_error:
|
| 1698 |
-
debug_info.append(f"**Gradio OAuth Error**: {gradio_error}")
|
| 1699 |
-
|
| 1700 |
-
# Authentication Test
|
| 1701 |
-
debug_info.append("\n## π§ͺ Authentication Test")
|
| 1702 |
-
|
| 1703 |
-
if oauth_client_id and oauth_scopes:
|
| 1704 |
-
debug_info.append("**β
OAuth Environment**: Properly configured")
|
| 1705 |
-
|
| 1706 |
-
# Check for both scope formats
|
| 1707 |
-
has_inference_scope = "inference-api" in oauth_scopes or "inference" in oauth_scopes
|
| 1708 |
-
if has_inference_scope:
|
| 1709 |
-
debug_info.append("**β
inference-api Scope**: Available for Qwen model access")
|
| 1710 |
-
debug_info.append("**π― Expected Behavior**: Login should provide Qwen model access")
|
| 1711 |
-
else:
|
| 1712 |
-
debug_info.append("**β inference-api Scope**: Missing - Qwen models won't work")
|
| 1713 |
-
debug_info.append("**π§ Fix**: Add 'inference-api' to hf_oauth_scopes in README.md")
|
| 1714 |
-
else:
|
| 1715 |
-
debug_info.append("**β OAuth Environment**: Not properly configured")
|
| 1716 |
-
|
| 1717 |
-
# Success Indicators
|
| 1718 |
-
debug_info.append("\n## β
Success Indicators")
|
| 1719 |
-
|
| 1720 |
-
if oauth_client_id:
|
| 1721 |
-
debug_info.append("- β
OAuth is enabled for this Space")
|
| 1722 |
-
else:
|
| 1723 |
-
debug_info.append("- β OAuth is not enabled (missing OAUTH_CLIENT_ID)")
|
| 1724 |
-
|
| 1725 |
-
# Check for both scope formats in success indicators
|
| 1726 |
-
inference_available = oauth_scopes and ("inference-api" in oauth_scopes or "inference" in oauth_scopes)
|
| 1727 |
-
if inference_available:
|
| 1728 |
-
debug_info.append("- β
inference-api scope is configured")
|
| 1729 |
-
debug_info.append("- β
Should have Qwen model access when logged in")
|
| 1730 |
-
else:
|
| 1731 |
-
debug_info.append("- β inference-api scope is missing")
|
| 1732 |
-
debug_info.append("- β Will not have Qwen model access")
|
| 1733 |
-
|
| 1734 |
-
# Login status detection (avoid AuthenticationMiddleware error)
|
| 1735 |
-
debug_info.append("\n## π€ Login Status")
|
| 1736 |
-
debug_info.append("**Note**: Due to Gradio OAuth integration, login status is detected at runtime")
|
| 1737 |
-
debug_info.append("**Current Status**: Check by clicking 'Run GAIA Evaluation' - you'll be prompted to login if needed")
|
| 1738 |
-
|
| 1739 |
-
return "\n".join(debug_info)
|
| 1740 |
-
|
| 1741 |
-
except Exception as e:
|
| 1742 |
-
return f"# β Debug Error\n\nError during OAuth debug: {str(e)}"
|
| 1743 |
-
|
| 1744 |
-
debug_auth_button.click(
|
| 1745 |
-
fn=debug_oauth_info,
|
| 1746 |
outputs=[auth_status_display]
|
| 1747 |
)
|
| 1748 |
|
| 1749 |
# Event handlers for manual testing
|
| 1750 |
-
def process_and_update(question, file_input, show_reasoning):
|
| 1751 |
"""Process question with authentication check"""
|
| 1752 |
|
| 1753 |
if not question.strip():
|
| 1754 |
return "β Please provide a question", "", "", gr.update(visible=False)
|
| 1755 |
|
| 1756 |
-
# Check for authentication
|
| 1757 |
hf_token = os.getenv("HF_TOKEN")
|
| 1758 |
|
| 1759 |
-
if not hf_token:
|
| 1760 |
error_msg = """
|
| 1761 |
## β Authentication Required
|
| 1762 |
|
| 1763 |
**This system requires authentication to access Qwen models and LangGraph workflow.**
|
| 1764 |
|
| 1765 |
**How to authenticate:**
|
| 1766 |
-
1. π **
|
| 1767 |
2. π **Use Official Evaluation**: Login via the GAIA Benchmark section above
|
| 1768 |
3. π **Get Token**: Visit https://huggingface.co/settings/tokens to create one with `inference` permissions
|
| 1769 |
|
|
@@ -1771,9 +1665,15 @@ Please log in to access GAIA evaluation features with full inference access.
|
|
| 1771 |
"""
|
| 1772 |
return error_msg, "", "", gr.update(visible=False)
|
| 1773 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1774 |
try:
|
| 1775 |
# Create authenticated app instance for this request
|
| 1776 |
-
app = GAIAAgentApp(hf_token=
|
| 1777 |
|
| 1778 |
# Process the question
|
| 1779 |
answer, details, reasoning = app.process_question_detailed(question, file_input, show_reasoning)
|
|
@@ -1808,7 +1708,7 @@ Please log in to access GAIA evaluation features with full inference access.
|
|
| 1808 |
|
| 1809 |
{str(ve)}
|
| 1810 |
|
| 1811 |
-
**Solution**: Please ensure your
|
| 1812 |
"""
|
| 1813 |
return error_msg, "", "", gr.update(visible=False)
|
| 1814 |
|
|
|
|
| 1282 |
"""
|
| 1283 |
### π Authentication Status: Not Logged In
|
| 1284 |
|
| 1285 |
+
Please log in to access GAIA evaluation with Qwen models and LangGraph workflow.
|
| 1286 |
|
| 1287 |
+
**What you need:**
|
| 1288 |
+
- π HuggingFace login with `read` and `inference` permissions
|
| 1289 |
+
- π€ Access to Qwen 2.5 models via HF Inference API
|
| 1290 |
+
- π§ LangGraph multi-agent system capabilities
|
| 1291 |
|
| 1292 |
+
**Expected Performance**: 30%+ GAIA score with full LangGraph workflow and Qwen models.
|
|
|
|
| 1293 |
""",
|
| 1294 |
elem_classes=["oauth-login"]
|
| 1295 |
)
|
| 1296 |
|
| 1297 |
+
# Add Gradio's built-in OAuth login button
|
| 1298 |
+
gr.LoginButton()
|
| 1299 |
+
|
| 1300 |
with gr.Row():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1301 |
refresh_auth_button = gr.Button("π Refresh Auth Status", variant="secondary", scale=1)
|
|
|
|
| 1302 |
|
| 1303 |
unit4_run_button = gr.Button(
|
| 1304 |
+
"π Run GAIA Evaluation & Submit All Answers",
|
| 1305 |
variant="primary",
|
| 1306 |
+
scale=2
|
|
|
|
| 1307 |
)
|
| 1308 |
|
| 1309 |
unit4_status_output = gr.Textbox(
|
|
|
|
| 1422 |
elem_classes=["reasoning-box"]
|
| 1423 |
)
|
| 1424 |
|
| 1425 |
+
# Event handlers for Unit 4 API - Using Gradio's built-in OAuth
|
| 1426 |
+
def run_gaia_evaluation(oauth_token: gr.OAuthToken | None, profile: gr.OAuthProfile | None):
|
| 1427 |
+
"""Run GAIA evaluation using Gradio's built-in OAuth"""
|
| 1428 |
+
start_time = time.time()
|
| 1429 |
+
|
| 1430 |
+
# Initialize result logger
|
| 1431 |
+
result_logger = GAIAResultLogger()
|
| 1432 |
+
|
| 1433 |
+
# Check authentication using Gradio's OAuth parameters
|
| 1434 |
+
if oauth_token is None or profile is None:
|
| 1435 |
+
return "β Authentication Required: Please login with HuggingFace to access GAIA evaluation.", None, None, None, None, None
|
| 1436 |
+
|
| 1437 |
+
username = profile.username if profile else "unknown_user"
|
| 1438 |
+
hf_token = oauth_token.token if oauth_token else None
|
| 1439 |
+
|
| 1440 |
+
if not hf_token:
|
| 1441 |
+
return "β OAuth Token Missing: Could not extract authentication token. Please logout and login again.", None, None, None, None, None
|
| 1442 |
|
| 1443 |
+
logger.info(f"β
Starting GAIA evaluation for user: {username}")
|
| 1444 |
+
|
| 1445 |
+
# Rest of the function exactly as in run_and_submit_all but using oauth_token.token
|
| 1446 |
+
api_url = DEFAULT_API_URL
|
| 1447 |
+
questions_url = f"{api_url}/questions"
|
| 1448 |
+
submit_url = f"{api_url}/submit"
|
| 1449 |
+
|
| 1450 |
+
# Get space info for code submission
|
| 1451 |
+
space_id = os.getenv("SPACE_ID")
|
| 1452 |
+
|
| 1453 |
+
# 1. Instantiate GAIA Agent with LangGraph workflow
|
| 1454 |
try:
|
| 1455 |
+
logger.info("π Creating GAIA Agent with LangGraph workflow and Qwen models")
|
| 1456 |
+
agent = GAIAAgentApp.create_with_oauth_token(hf_token)
|
| 1457 |
+
|
| 1458 |
+
if not agent.initialized:
|
| 1459 |
+
return "β System Error: GAIA Agent failed to initialize with LangGraph workflow", None, None, None, None, None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1460 |
|
| 1461 |
+
logger.info("β
GAIA Agent initialized successfully")
|
| 1462 |
+
|
| 1463 |
+
except ValueError as ve:
|
| 1464 |
+
logger.error(f"Authentication error: {ve}")
|
| 1465 |
+
return f"β Authentication Error: {ve}", None, None, None, None, None
|
| 1466 |
+
except RuntimeError as re:
|
| 1467 |
+
logger.error(f"System initialization error: {re}")
|
| 1468 |
+
return f"β System Error: {re}", None, None, None, None, None
|
| 1469 |
except Exception as e:
|
| 1470 |
+
logger.error(f"Unexpected error initializing agent: {e}")
|
| 1471 |
+
return f"β Unexpected Error: {e}. Please check your authentication and try again.", None, None, None, None, None
|
| 1472 |
+
|
| 1473 |
+
# Agent code URL
|
| 1474 |
+
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Local Development"
|
| 1475 |
+
logger.info(f"Agent code URL: {agent_code}")
|
| 1476 |
+
|
| 1477 |
+
# 2. Fetch Questions
|
| 1478 |
+
logger.info(f"Fetching questions from: {questions_url}")
|
| 1479 |
+
try:
|
| 1480 |
+
response = requests.get(questions_url, timeout=15)
|
| 1481 |
+
response.raise_for_status()
|
| 1482 |
+
questions_data = response.json()
|
| 1483 |
+
if not questions_data:
|
| 1484 |
+
logger.error("Fetched questions list is empty.")
|
| 1485 |
+
return "β Fetched questions list is empty or invalid format.", None, None, None, None, None
|
| 1486 |
+
logger.info(f"β
Fetched {len(questions_data)} questions.")
|
| 1487 |
+
except requests.exceptions.RequestException as e:
|
| 1488 |
+
logger.error(f"Error fetching questions: {e}")
|
| 1489 |
+
return f"β Error fetching questions: {e}", None, None, None, None, None
|
| 1490 |
+
except Exception as e:
|
| 1491 |
+
logger.error(f"An unexpected error occurred fetching questions: {e}")
|
| 1492 |
+
return f"β An unexpected error occurred fetching questions: {e}", None, None, None, None, None
|
| 1493 |
+
|
| 1494 |
+
# 3. Run GAIA Agent on questions
|
| 1495 |
+
results_log = []
|
| 1496 |
+
answers_payload = []
|
| 1497 |
+
logger.info(f"π€ Running GAIA Agent on {len(questions_data)} questions with LangGraph workflow...")
|
| 1498 |
|
| 1499 |
+
for i, item in enumerate(questions_data, 1):
|
| 1500 |
+
task_id = item.get("task_id")
|
| 1501 |
+
question_text = item.get("question")
|
| 1502 |
+
if not task_id or question_text is None:
|
| 1503 |
+
logger.warning(f"Skipping item with missing task_id or question: {item}")
|
| 1504 |
+
continue
|
| 1505 |
+
|
| 1506 |
+
logger.info(f"Processing question {i}/{len(questions_data)}: {task_id}")
|
| 1507 |
+
try:
|
| 1508 |
+
submitted_answer = agent(question_text)
|
| 1509 |
+
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
| 1510 |
+
results_log.append({
|
| 1511 |
+
"Task ID": task_id,
|
| 1512 |
+
"Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
|
| 1513 |
+
"Submitted Answer": submitted_answer[:200] + "..." if len(submitted_answer) > 200 else submitted_answer
|
| 1514 |
+
})
|
| 1515 |
+
logger.info(f"β
Question {i} processed successfully")
|
| 1516 |
+
except Exception as e:
|
| 1517 |
+
logger.error(f"Error running GAIA agent on task {task_id}: {e}")
|
| 1518 |
+
error_answer = f"AGENT ERROR: {str(e)}"
|
| 1519 |
+
answers_payload.append({"task_id": task_id, "submitted_answer": error_answer})
|
| 1520 |
+
results_log.append({
|
| 1521 |
+
"Task ID": task_id,
|
| 1522 |
+
"Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
|
| 1523 |
+
"Submitted Answer": error_answer
|
| 1524 |
+
})
|
| 1525 |
+
|
| 1526 |
+
if not answers_payload:
|
| 1527 |
+
logger.error("GAIA Agent did not produce any answers to submit.")
|
| 1528 |
+
return "β GAIA Agent did not produce any answers to submit.", pd.DataFrame(results_log), None, None, None, None
|
| 1529 |
+
|
| 1530 |
+
# 4. Prepare and submit results
|
| 1531 |
+
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
|
| 1532 |
+
status_update = f"π GAIA Agent finished processing {len(answers_payload)} questions. Submitting results for user '{username}'..."
|
| 1533 |
+
logger.info(status_update)
|
| 1534 |
+
|
| 1535 |
+
# 5. Submit to Unit 4 API
|
| 1536 |
+
logger.info(f"π€ Submitting {len(answers_payload)} answers to: {submit_url}")
|
| 1537 |
try:
|
| 1538 |
+
response = requests.post(submit_url, json=submission_data, timeout=120)
|
| 1539 |
+
response.raise_for_status()
|
| 1540 |
+
result_data = response.json()
|
|
|
|
|
|
|
| 1541 |
|
| 1542 |
+
# Calculate execution time
|
| 1543 |
+
execution_time = time.time() - start_time
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1544 |
|
| 1545 |
+
# 6. Log results to files
|
| 1546 |
+
logger.info("π Logging evaluation results...")
|
| 1547 |
+
logged_files = result_logger.log_evaluation_results(
|
| 1548 |
+
username=username,
|
| 1549 |
+
questions_data=questions_data,
|
| 1550 |
+
results_log=results_log,
|
| 1551 |
+
final_result=result_data,
|
| 1552 |
+
execution_time=execution_time
|
| 1553 |
+
)
|
| 1554 |
|
| 1555 |
+
# Prepare download files
|
| 1556 |
+
csv_file = logged_files.get("csv")
|
| 1557 |
+
json_file = logged_files.get("json")
|
| 1558 |
+
summary_file = logged_files.get("summary")
|
|
|
|
| 1559 |
|
| 1560 |
+
final_status = (
|
| 1561 |
+
f"π GAIA Agent Evaluation Complete!\n"
|
| 1562 |
+
f"π€ User: {result_data.get('username')}\n"
|
| 1563 |
+
f"π Overall Score: {result_data.get('score', 'N/A')}% "
|
| 1564 |
+
f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
|
| 1565 |
+
f"β±οΈ Execution Time: {execution_time:.2f} seconds\n"
|
| 1566 |
+
f"π¬ API Response: {result_data.get('message', 'No message received.')}\n\n"
|
| 1567 |
+
f"π Results saved to {len([f for f in [csv_file, json_file, summary_file] if f])} files for download."
|
| 1568 |
+
)
|
| 1569 |
+
logger.info("β
GAIA evaluation completed successfully")
|
| 1570 |
+
results_df = pd.DataFrame(results_log)
|
| 1571 |
|
| 1572 |
+
# Update download file visibility and values
|
| 1573 |
+
csv_update = gr.update(value=csv_file, visible=csv_file is not None)
|
| 1574 |
+
json_update = gr.update(value=json_file, visible=json_file is not None)
|
| 1575 |
+
summary_update = gr.update(value=summary_file, visible=summary_file is not None)
|
| 1576 |
|
| 1577 |
+
return final_status, results_df, csv_update, json_update, summary_update
|
|
|
|
|
|
|
| 1578 |
|
| 1579 |
+
except requests.exceptions.HTTPError as e:
|
| 1580 |
+
error_detail = f"Server responded with status {e.response.status_code}."
|
| 1581 |
+
try:
|
| 1582 |
+
error_json = e.response.json()
|
| 1583 |
+
error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
|
| 1584 |
+
except requests.exceptions.JSONDecodeError:
|
| 1585 |
+
error_detail += f" Response: {e.response.text[:500]}"
|
| 1586 |
+
status_message = f"β Submission Failed: {error_detail}"
|
| 1587 |
+
logger.error(status_message)
|
| 1588 |
+
results_df = pd.DataFrame(results_log)
|
| 1589 |
+
return status_message, results_df, None, None, None
|
| 1590 |
+
except Exception as e:
|
| 1591 |
+
status_message = f"β An unexpected error occurred during submission: {e}"
|
| 1592 |
+
logger.error(status_message)
|
| 1593 |
+
results_df = pd.DataFrame(results_log)
|
| 1594 |
+
return status_message, results_df, None, None, None
|
| 1595 |
+
|
| 1596 |
+
def update_auth_status(profile: gr.OAuthProfile | None):
|
| 1597 |
+
"""Update authentication status display using Gradio's OAuth"""
|
| 1598 |
+
if profile is None:
|
| 1599 |
+
return """
|
| 1600 |
+
### π Authentication Status: Not Logged In
|
| 1601 |
|
| 1602 |
+
Please click the "Sign in with Hugging Face" button above to access GAIA evaluation.
|
| 1603 |
|
| 1604 |
+
**What you need:**
|
| 1605 |
+
- π HuggingFace login with `read` and `inference` permissions
|
| 1606 |
+
- π€ Access to Qwen 2.5 models via HF Inference API
|
| 1607 |
+
- π§ LangGraph multi-agent system capabilities
|
| 1608 |
|
| 1609 |
+
**Expected Performance**: 30%+ GAIA score with full LangGraph workflow and Qwen models.
|
| 1610 |
"""
|
| 1611 |
+
else:
|
| 1612 |
+
return f"""
|
| 1613 |
+
### π Authentication Status: β
Logged In as {profile.username}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1614 |
|
| 1615 |
+
**β
Ready for GAIA Evaluation!**
|
| 1616 |
|
| 1617 |
+
- β
**OAuth Profile**: {profile.name or profile.username}
|
| 1618 |
+
- β
**Qwen Model Access**: Available via HF Inference API
|
| 1619 |
+
- β
**LangGraph Workflow**: Multi-agent orchestration ready
|
| 1620 |
+
- β
**Official Evaluation**: Click "Run GAIA Evaluation" to start
|
| 1621 |
|
| 1622 |
+
π― **Expected Results**: 30%+ GAIA score with full LangGraph workflow and Qwen models.
|
| 1623 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1624 |
|
| 1625 |
# Set up automatic login state checking
|
| 1626 |
interface.load(
|
| 1627 |
+
fn=update_auth_status,
|
| 1628 |
+
outputs=[auth_status_display]
|
| 1629 |
)
|
| 1630 |
|
| 1631 |
unit4_run_button.click(
|
| 1632 |
+
fn=run_gaia_evaluation,
|
| 1633 |
+
inputs=[], # Gradio automatically injects OAuth parameters
|
| 1634 |
+
outputs=[unit4_status_output, unit4_results_table, csv_download, json_download, summary_download]
|
|
|
|
| 1635 |
)
|
| 1636 |
|
| 1637 |
# Refresh authentication status manually
|
| 1638 |
refresh_auth_button.click(
|
| 1639 |
+
fn=update_auth_status,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1640 |
outputs=[auth_status_display]
|
| 1641 |
)
|
| 1642 |
|
| 1643 |
# Event handlers for manual testing
|
| 1644 |
+
def process_and_update(question, file_input, show_reasoning, oauth_token: gr.OAuthToken | None, profile: gr.OAuthProfile | None):
|
| 1645 |
"""Process question with authentication check"""
|
| 1646 |
|
| 1647 |
if not question.strip():
|
| 1648 |
return "β Please provide a question", "", "", gr.update(visible=False)
|
| 1649 |
|
| 1650 |
+
# Check for authentication - prioritize HF_TOKEN, then OAuth
|
| 1651 |
hf_token = os.getenv("HF_TOKEN")
|
| 1652 |
|
| 1653 |
+
if not hf_token and (oauth_token is None or profile is None):
|
| 1654 |
error_msg = """
|
| 1655 |
## β Authentication Required
|
| 1656 |
|
| 1657 |
**This system requires authentication to access Qwen models and LangGraph workflow.**
|
| 1658 |
|
| 1659 |
**How to authenticate:**
|
| 1660 |
+
1. π **Login with HuggingFace**: Use the "Sign in with Hugging Face" button above
|
| 1661 |
2. π **Use Official Evaluation**: Login via the GAIA Benchmark section above
|
| 1662 |
3. π **Get Token**: Visit https://huggingface.co/settings/tokens to create one with `inference` permissions
|
| 1663 |
|
|
|
|
| 1665 |
"""
|
| 1666 |
return error_msg, "", "", gr.update(visible=False)
|
| 1667 |
|
| 1668 |
+
# Use HF_TOKEN if available, otherwise use OAuth token
|
| 1669 |
+
auth_token = hf_token if hf_token else (oauth_token.token if oauth_token else None)
|
| 1670 |
+
|
| 1671 |
+
if not auth_token:
|
| 1672 |
+
return "β No valid authentication token found", "", "", gr.update(visible=False)
|
| 1673 |
+
|
| 1674 |
try:
|
| 1675 |
# Create authenticated app instance for this request
|
| 1676 |
+
app = GAIAAgentApp(hf_token=auth_token)
|
| 1677 |
|
| 1678 |
# Process the question
|
| 1679 |
answer, details, reasoning = app.process_question_detailed(question, file_input, show_reasoning)
|
|
|
|
| 1708 |
|
| 1709 |
{str(ve)}
|
| 1710 |
|
| 1711 |
+
**Solution**: Please ensure your authentication has `inference` permissions.
|
| 1712 |
"""
|
| 1713 |
return error_msg, "", "", gr.update(visible=False)
|
| 1714 |
|