Chris
commited on
Commit
Β·
748b763
1
Parent(s):
7ef24ef
Final 6.0.3
Browse files- src/app.py +12 -3
- src/validate_oauth_fix.py +0 -131
src/app.py
CHANGED
|
@@ -1240,8 +1240,18 @@ Please log in to access GAIA evaluation features with full inference access.
|
|
| 1240 |
|
| 1241 |
# Examples
|
| 1242 |
gr.Markdown("#### π‘ Example Questions")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1243 |
examples = gr.Examples(
|
| 1244 |
-
examples=
|
| 1245 |
inputs=[question_input],
|
| 1246 |
cache_examples=False
|
| 1247 |
)
|
|
@@ -1307,8 +1317,7 @@ Please log in to access GAIA evaluation features with full inference access.
|
|
| 1307 |
# Set up automatic login state checking
|
| 1308 |
interface.load(
|
| 1309 |
fn=check_login_state,
|
| 1310 |
-
outputs=[auth_status_display, unit4_run_button]
|
| 1311 |
-
every=2 # Check every 2 seconds for login state changes
|
| 1312 |
)
|
| 1313 |
|
| 1314 |
unit4_run_button.click(
|
|
|
|
| 1240 |
|
| 1241 |
# Examples
|
| 1242 |
gr.Markdown("#### π‘ Example Questions")
|
| 1243 |
+
|
| 1244 |
+
example_questions = [
|
| 1245 |
+
"How many studio albums were published by Mercedes Sosa between 2000 and 2009?",
|
| 1246 |
+
"What is the capital of the country that has the most time zones?",
|
| 1247 |
+
"Calculate the compound interest on $1000 at 5% annual rate compounded quarterly for 3 years",
|
| 1248 |
+
"What is the square root of the sum of the first 10 prime numbers?",
|
| 1249 |
+
"Who was the first person to walk on the moon and what year did it happen?",
|
| 1250 |
+
"Compare the GDP of Japan and Germany in 2023 and tell me the difference",
|
| 1251 |
+
]
|
| 1252 |
+
|
| 1253 |
examples = gr.Examples(
|
| 1254 |
+
examples=example_questions,
|
| 1255 |
inputs=[question_input],
|
| 1256 |
cache_examples=False
|
| 1257 |
)
|
|
|
|
| 1317 |
# Set up automatic login state checking
|
| 1318 |
interface.load(
|
| 1319 |
fn=check_login_state,
|
| 1320 |
+
outputs=[auth_status_display, unit4_run_button]
|
|
|
|
| 1321 |
)
|
| 1322 |
|
| 1323 |
unit4_run_button.click(
|
src/validate_oauth_fix.py
DELETED
|
@@ -1,131 +0,0 @@
|
|
| 1 |
-
#!/usr/bin/env python3
|
| 2 |
-
"""
|
| 3 |
-
Final Validation: OAuth Authentication Fix
|
| 4 |
-
Demonstrates that the GAIA Agent OAuth authentication issue is completely resolved
|
| 5 |
-
"""
|
| 6 |
-
|
| 7 |
-
import os
|
| 8 |
-
import logging
|
| 9 |
-
|
| 10 |
-
# Configure logging
|
| 11 |
-
logging.basicConfig(level=logging.INFO)
|
| 12 |
-
logger = logging.getLogger(__name__)
|
| 13 |
-
|
| 14 |
-
def main():
|
| 15 |
-
"""Demonstrate the OAuth authentication fix"""
|
| 16 |
-
|
| 17 |
-
print("π OAUTH AUTHENTICATION FIX VALIDATION")
|
| 18 |
-
print("=" * 60)
|
| 19 |
-
|
| 20 |
-
print("\nπ ISSUE SUMMARY:")
|
| 21 |
-
print("- Problem: Production system had 0% GAIA success rate")
|
| 22 |
-
print("- Cause: OAuth authentication mismatch (HF Spaces vs local)")
|
| 23 |
-
print("- Impact: No LangSmith tracing, models never called")
|
| 24 |
-
|
| 25 |
-
print("\nβ
SOLUTION IMPLEMENTED:")
|
| 26 |
-
print("1. OAuth token extraction from Gradio profile")
|
| 27 |
-
print("2. Dynamic agent creation with OAuth tokens")
|
| 28 |
-
print("3. Robust 3-tier fallback system")
|
| 29 |
-
print("4. SimpleClient with rule-based responses")
|
| 30 |
-
|
| 31 |
-
print("\nπ§ͺ TESTING FALLBACK SYSTEM:")
|
| 32 |
-
|
| 33 |
-
# Test OAuth-compatible GAIAAgentApp
|
| 34 |
-
token = os.getenv('HUGGINGFACE_TOKEN') or os.getenv('HF_TOKEN')
|
| 35 |
-
if token:
|
| 36 |
-
print(f"β
OAuth token available: {token[:10]}...")
|
| 37 |
-
|
| 38 |
-
try:
|
| 39 |
-
from app import GAIAAgentApp
|
| 40 |
-
|
| 41 |
-
# Test OAuth-compatible creation
|
| 42 |
-
app = GAIAAgentApp.create_with_oauth_token(token)
|
| 43 |
-
print("β
GAIAAgentApp created with OAuth token")
|
| 44 |
-
|
| 45 |
-
if app.initialized:
|
| 46 |
-
print("β
App initialized successfully")
|
| 47 |
-
|
| 48 |
-
# Test basic functionality
|
| 49 |
-
test_questions = [
|
| 50 |
-
"What is 2+2?",
|
| 51 |
-
"What is the capital of France?"
|
| 52 |
-
]
|
| 53 |
-
|
| 54 |
-
for question in test_questions:
|
| 55 |
-
try:
|
| 56 |
-
answer = app(question)
|
| 57 |
-
success = "4" in answer or "Paris" in answer
|
| 58 |
-
status = "β
" if success else "β οΈ"
|
| 59 |
-
print(f"{status} '{question}' β {answer[:50]}...")
|
| 60 |
-
except Exception as e:
|
| 61 |
-
print(f"β Question failed: {e}")
|
| 62 |
-
|
| 63 |
-
else:
|
| 64 |
-
print("β App failed to initialize")
|
| 65 |
-
|
| 66 |
-
except Exception as e:
|
| 67 |
-
print(f"β OAuth test failed: {e}")
|
| 68 |
-
else:
|
| 69 |
-
print("β οΈ No token available - but system will still work with SimpleClient")
|
| 70 |
-
|
| 71 |
-
# Test SimpleClient directly
|
| 72 |
-
print("\nπ€ TESTING SIMPLE CLIENT FALLBACK:")
|
| 73 |
-
try:
|
| 74 |
-
from models.simple_client import SimpleClient
|
| 75 |
-
|
| 76 |
-
client = SimpleClient()
|
| 77 |
-
test_questions = [
|
| 78 |
-
"What is 2+2?",
|
| 79 |
-
"What is the capital of France?",
|
| 80 |
-
"Calculate 25% of 200"
|
| 81 |
-
]
|
| 82 |
-
|
| 83 |
-
all_correct = True
|
| 84 |
-
for question in test_questions:
|
| 85 |
-
result = client.generate(question)
|
| 86 |
-
|
| 87 |
-
# Check if answer is correct
|
| 88 |
-
correct = False
|
| 89 |
-
if "2+2" in question and "4" in result.response:
|
| 90 |
-
correct = True
|
| 91 |
-
elif "France" in question and "Paris" in result.response:
|
| 92 |
-
correct = True
|
| 93 |
-
elif "25%" in question and "50" in result.response:
|
| 94 |
-
correct = True
|
| 95 |
-
|
| 96 |
-
if not correct:
|
| 97 |
-
all_correct = False
|
| 98 |
-
|
| 99 |
-
status = "β
" if correct else "β"
|
| 100 |
-
print(f"{status} '{question}' β {result.response[:50]}...")
|
| 101 |
-
|
| 102 |
-
if all_correct:
|
| 103 |
-
print("β
All SimpleClient responses correct!")
|
| 104 |
-
else:
|
| 105 |
-
print("β οΈ Some SimpleClient responses need improvement")
|
| 106 |
-
|
| 107 |
-
except Exception as e:
|
| 108 |
-
print(f"β SimpleClient test failed: {e}")
|
| 109 |
-
|
| 110 |
-
print("\n" + "=" * 60)
|
| 111 |
-
print("π VALIDATION RESULTS")
|
| 112 |
-
print("=" * 60)
|
| 113 |
-
|
| 114 |
-
print("β
OAuth authentication implementation: COMPLETE")
|
| 115 |
-
print("β
Fallback system implementation: COMPLETE")
|
| 116 |
-
print("β
Production reliability: GUARANTEED")
|
| 117 |
-
print("β
User experience: PROFESSIONAL")
|
| 118 |
-
print("β
Deployment readiness: READY")
|
| 119 |
-
|
| 120 |
-
print("\nπ― PRODUCTION EXPECTATIONS:")
|
| 121 |
-
print("- Minimum GAIA Success Rate: 15%+ (guaranteed)")
|
| 122 |
-
print("- Maximum GAIA Success Rate: 30%+ (with advanced models)")
|
| 123 |
-
print("- System Reliability: 100% (always responds)")
|
| 124 |
-
print("- Authentication Issues: 0% (completely resolved)")
|
| 125 |
-
|
| 126 |
-
print("\nπ READY FOR HUGGINGFACE SPACE DEPLOYMENT!")
|
| 127 |
-
print("The OAuth authentication barrier has been eliminated.")
|
| 128 |
-
print("The GAIA Agent is now production-ready with guaranteed reliability.")
|
| 129 |
-
|
| 130 |
-
if __name__ == "__main__":
|
| 131 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|