Chris
commited on
Commit
·
95e7104
1
Parent(s):
1e17e7f
Final 5.6.3
Browse files- src/__pycache__/app.cpython-310.pyc +0 -0
- src/app.py +23 -24
- src/production_deployment_guide.md +29 -5
src/__pycache__/app.cpython-310.pyc
CHANGED
|
Binary files a/src/__pycache__/app.cpython-310.pyc and b/src/__pycache__/app.cpython-310.pyc differ
|
|
|
src/app.py
CHANGED
|
@@ -360,8 +360,11 @@ Please log in to access GAIA evaluation features.
|
|
| 360 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
| 361 |
"""
|
| 362 |
Fetches all questions from Unit 4 API, runs the GAIA Agent on them, submits all answers,
|
| 363 |
-
and displays the results.
|
| 364 |
"""
|
|
|
|
|
|
|
|
|
|
| 365 |
# Get space info for code submission
|
| 366 |
space_id = os.getenv("SPACE_ID")
|
| 367 |
|
|
@@ -389,7 +392,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
| 389 |
|
| 390 |
else:
|
| 391 |
logger.info("User not logged in.")
|
| 392 |
-
return "Please Login to Hugging Face with the button.", None
|
| 393 |
|
| 394 |
api_url = DEFAULT_API_URL
|
| 395 |
questions_url = f"{api_url}/questions"
|
|
@@ -405,10 +408,10 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
| 405 |
agent = GAIAAgentApp() # This will automatically fallback to SimpleClient
|
| 406 |
|
| 407 |
if not agent.initialized:
|
| 408 |
-
return "Error: GAIA Agent failed to initialize - using SimpleClient fallback for limited OAuth", None
|
| 409 |
except Exception as e:
|
| 410 |
logger.error(f"Error instantiating agent: {e}")
|
| 411 |
-
return f"Error initializing GAIA Agent: {e}", None
|
| 412 |
|
| 413 |
# Agent code URL
|
| 414 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Local Development"
|
|
@@ -422,17 +425,17 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
| 422 |
questions_data = response.json()
|
| 423 |
if not questions_data:
|
| 424 |
logger.error("Fetched questions list is empty.")
|
| 425 |
-
return "Fetched questions list is empty or invalid format.", None
|
| 426 |
logger.info(f"Fetched {len(questions_data)} questions.")
|
| 427 |
except requests.exceptions.RequestException as e:
|
| 428 |
logger.error(f"Error fetching questions: {e}")
|
| 429 |
-
return f"Error fetching questions: {e}", None
|
| 430 |
except requests.exceptions.JSONDecodeError as e:
|
| 431 |
logger.error(f"Error decoding JSON response from questions endpoint: {e}")
|
| 432 |
-
return f"Error decoding server response for questions: {e}", None
|
| 433 |
except Exception as e:
|
| 434 |
logger.error(f"An unexpected error occurred fetching questions: {e}")
|
| 435 |
-
return f"An unexpected error occurred fetching questions: {e}", None
|
| 436 |
|
| 437 |
# 3. Run GAIA Agent
|
| 438 |
results_log = []
|
|
@@ -467,7 +470,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
| 467 |
|
| 468 |
if not answers_payload:
|
| 469 |
logger.error("GAIA Agent did not produce any answers to submit.")
|
| 470 |
-
return "GAIA Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
| 471 |
|
| 472 |
# 4. Prepare Submission
|
| 473 |
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
|
|
@@ -489,7 +492,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
| 489 |
)
|
| 490 |
logger.info("Submission successful.")
|
| 491 |
results_df = pd.DataFrame(results_log)
|
| 492 |
-
return final_status, results_df
|
| 493 |
except requests.exceptions.HTTPError as e:
|
| 494 |
error_detail = f"Server responded with status {e.response.status_code}."
|
| 495 |
try:
|
|
@@ -500,22 +503,22 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
| 500 |
status_message = f"Submission Failed: {error_detail}"
|
| 501 |
logger.error(status_message)
|
| 502 |
results_df = pd.DataFrame(results_log)
|
| 503 |
-
return status_message, results_df
|
| 504 |
except requests.exceptions.Timeout:
|
| 505 |
status_message = "Submission Failed: The request timed out."
|
| 506 |
logger.error(status_message)
|
| 507 |
results_df = pd.DataFrame(results_log)
|
| 508 |
-
return status_message, results_df
|
| 509 |
except requests.exceptions.RequestException as e:
|
| 510 |
status_message = f"Submission Failed: Network error - {e}"
|
| 511 |
logger.error(status_message)
|
| 512 |
results_df = pd.DataFrame(results_log)
|
| 513 |
-
return status_message, results_df
|
| 514 |
except Exception as e:
|
| 515 |
status_message = f"An unexpected error occurred during submission: {e}"
|
| 516 |
logger.error(status_message)
|
| 517 |
results_df = pd.DataFrame(results_log)
|
| 518 |
-
return status_message, results_df
|
| 519 |
|
| 520 |
def create_interface():
|
| 521 |
"""Create the Gradio interface with both Unit 4 API and manual testing"""
|
|
@@ -833,7 +836,9 @@ def create_interface():
|
|
| 833 |
elem_classes=["oauth-login"]
|
| 834 |
)
|
| 835 |
|
| 836 |
-
|
|
|
|
|
|
|
| 837 |
|
| 838 |
unit4_run_button = gr.Button(
|
| 839 |
"🚀 Run GAIA Evaluation & Submit All Answers",
|
|
@@ -921,17 +926,11 @@ def create_interface():
|
|
| 921 |
# Event handlers for Unit 4 API
|
| 922 |
unit4_run_button.click(
|
| 923 |
fn=run_and_submit_all,
|
| 924 |
-
outputs=[unit4_status_output, unit4_results_table]
|
| 925 |
-
)
|
| 926 |
-
|
| 927 |
-
# Update authentication status on login/logout
|
| 928 |
-
login_button.change(
|
| 929 |
-
fn=format_auth_status,
|
| 930 |
-
outputs=[auth_status_display]
|
| 931 |
)
|
| 932 |
|
| 933 |
-
#
|
| 934 |
-
|
| 935 |
fn=format_auth_status,
|
| 936 |
outputs=[auth_status_display]
|
| 937 |
)
|
|
|
|
| 360 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
| 361 |
"""
|
| 362 |
Fetches all questions from Unit 4 API, runs the GAIA Agent on them, submits all answers,
|
| 363 |
+
and displays the results. Also returns updated authentication status.
|
| 364 |
"""
|
| 365 |
+
# Get authentication status for display
|
| 366 |
+
auth_status = format_auth_status(profile)
|
| 367 |
+
|
| 368 |
# Get space info for code submission
|
| 369 |
space_id = os.getenv("SPACE_ID")
|
| 370 |
|
|
|
|
| 392 |
|
| 393 |
else:
|
| 394 |
logger.info("User not logged in.")
|
| 395 |
+
return "Please Login to Hugging Face with the button.", None, auth_status
|
| 396 |
|
| 397 |
api_url = DEFAULT_API_URL
|
| 398 |
questions_url = f"{api_url}/questions"
|
|
|
|
| 408 |
agent = GAIAAgentApp() # This will automatically fallback to SimpleClient
|
| 409 |
|
| 410 |
if not agent.initialized:
|
| 411 |
+
return "Error: GAIA Agent failed to initialize - using SimpleClient fallback for limited OAuth", None, auth_status
|
| 412 |
except Exception as e:
|
| 413 |
logger.error(f"Error instantiating agent: {e}")
|
| 414 |
+
return f"Error initializing GAIA Agent: {e}", None, auth_status
|
| 415 |
|
| 416 |
# Agent code URL
|
| 417 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Local Development"
|
|
|
|
| 425 |
questions_data = response.json()
|
| 426 |
if not questions_data:
|
| 427 |
logger.error("Fetched questions list is empty.")
|
| 428 |
+
return "Fetched questions list is empty or invalid format.", None, auth_status
|
| 429 |
logger.info(f"Fetched {len(questions_data)} questions.")
|
| 430 |
except requests.exceptions.RequestException as e:
|
| 431 |
logger.error(f"Error fetching questions: {e}")
|
| 432 |
+
return f"Error fetching questions: {e}", None, auth_status
|
| 433 |
except requests.exceptions.JSONDecodeError as e:
|
| 434 |
logger.error(f"Error decoding JSON response from questions endpoint: {e}")
|
| 435 |
+
return f"Error decoding server response for questions: {e}", None, auth_status
|
| 436 |
except Exception as e:
|
| 437 |
logger.error(f"An unexpected error occurred fetching questions: {e}")
|
| 438 |
+
return f"An unexpected error occurred fetching questions: {e}", None, auth_status
|
| 439 |
|
| 440 |
# 3. Run GAIA Agent
|
| 441 |
results_log = []
|
|
|
|
| 470 |
|
| 471 |
if not answers_payload:
|
| 472 |
logger.error("GAIA Agent did not produce any answers to submit.")
|
| 473 |
+
return "GAIA Agent did not produce any answers to submit.", pd.DataFrame(results_log), auth_status
|
| 474 |
|
| 475 |
# 4. Prepare Submission
|
| 476 |
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
|
|
|
|
| 492 |
)
|
| 493 |
logger.info("Submission successful.")
|
| 494 |
results_df = pd.DataFrame(results_log)
|
| 495 |
+
return final_status, results_df, auth_status
|
| 496 |
except requests.exceptions.HTTPError as e:
|
| 497 |
error_detail = f"Server responded with status {e.response.status_code}."
|
| 498 |
try:
|
|
|
|
| 503 |
status_message = f"Submission Failed: {error_detail}"
|
| 504 |
logger.error(status_message)
|
| 505 |
results_df = pd.DataFrame(results_log)
|
| 506 |
+
return status_message, results_df, auth_status
|
| 507 |
except requests.exceptions.Timeout:
|
| 508 |
status_message = "Submission Failed: The request timed out."
|
| 509 |
logger.error(status_message)
|
| 510 |
results_df = pd.DataFrame(results_log)
|
| 511 |
+
return status_message, results_df, auth_status
|
| 512 |
except requests.exceptions.RequestException as e:
|
| 513 |
status_message = f"Submission Failed: Network error - {e}"
|
| 514 |
logger.error(status_message)
|
| 515 |
results_df = pd.DataFrame(results_log)
|
| 516 |
+
return status_message, results_df, auth_status
|
| 517 |
except Exception as e:
|
| 518 |
status_message = f"An unexpected error occurred during submission: {e}"
|
| 519 |
logger.error(status_message)
|
| 520 |
results_df = pd.DataFrame(results_log)
|
| 521 |
+
return status_message, results_df, auth_status
|
| 522 |
|
| 523 |
def create_interface():
|
| 524 |
"""Create the Gradio interface with both Unit 4 API and manual testing"""
|
|
|
|
| 836 |
elem_classes=["oauth-login"]
|
| 837 |
)
|
| 838 |
|
| 839 |
+
with gr.Row():
|
| 840 |
+
login_button = gr.LoginButton()
|
| 841 |
+
refresh_auth_button = gr.Button("🔄 Refresh Auth Status", variant="secondary", scale=1)
|
| 842 |
|
| 843 |
unit4_run_button = gr.Button(
|
| 844 |
"🚀 Run GAIA Evaluation & Submit All Answers",
|
|
|
|
| 926 |
# Event handlers for Unit 4 API
|
| 927 |
unit4_run_button.click(
|
| 928 |
fn=run_and_submit_all,
|
| 929 |
+
outputs=[unit4_status_output, unit4_results_table, auth_status_display]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 930 |
)
|
| 931 |
|
| 932 |
+
# Refresh authentication status
|
| 933 |
+
refresh_auth_button.click(
|
| 934 |
fn=format_auth_status,
|
| 935 |
outputs=[auth_status_display]
|
| 936 |
)
|
src/production_deployment_guide.md
CHANGED
|
@@ -13,22 +13,38 @@ The production system was failing with 0% success rate because:
|
|
| 13 |
|
| 14 |
### Solution Implemented ✅
|
| 15 |
|
| 16 |
-
Created a **robust 3-tier fallback system**:
|
| 17 |
|
| 18 |
1. **OAuth Token Support**: `GAIAAgentApp.create_with_oauth_token(oauth_token)`
|
| 19 |
2. **Automatic Fallback**: When main models fail, falls back to SimpleClient
|
| 20 |
3. **Rule-Based Responses**: SimpleClient provides reliable answers for common questions
|
| 21 |
4. **Always Works**: System guaranteed to provide responses in production
|
|
|
|
| 22 |
|
| 23 |
#### Technical Implementation:
|
| 24 |
|
| 25 |
```python
|
| 26 |
-
# 1. OAuth Token Extraction
|
| 27 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
| 28 |
oauth_token = getattr(profile, 'oauth_token', None) or getattr(profile, 'token', None)
|
| 29 |
agent = GAIAAgentApp.create_with_oauth_token(oauth_token)
|
| 30 |
-
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
def __init__(self, hf_token: Optional[str] = None):
|
| 33 |
try:
|
| 34 |
# Try main QwenClient with OAuth
|
|
@@ -41,7 +57,7 @@ def __init__(self, hf_token: Optional[str] = None):
|
|
| 41 |
# Fallback to SimpleClient
|
| 42 |
self.llm_client = SimpleClient(hf_token=hf_token)
|
| 43 |
|
| 44 |
-
#
|
| 45 |
class SimpleClient:
|
| 46 |
def _generate_simple_response(self, prompt):
|
| 47 |
# Mathematics: "2+2" → "4", "25% of 200" → "50"
|
|
@@ -49,6 +65,14 @@ class SimpleClient:
|
|
| 49 |
# Always provides meaningful responses
|
| 50 |
```
|
| 51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
## 🎯 Expected Results
|
| 53 |
|
| 54 |
After successful deployment with fallback system:
|
|
|
|
| 13 |
|
| 14 |
### Solution Implemented ✅
|
| 15 |
|
| 16 |
+
Created a **robust 3-tier fallback system** with **OAuth scope detection**:
|
| 17 |
|
| 18 |
1. **OAuth Token Support**: `GAIAAgentApp.create_with_oauth_token(oauth_token)`
|
| 19 |
2. **Automatic Fallback**: When main models fail, falls back to SimpleClient
|
| 20 |
3. **Rule-Based Responses**: SimpleClient provides reliable answers for common questions
|
| 21 |
4. **Always Works**: System guaranteed to provide responses in production
|
| 22 |
+
5. **OAuth Scope Detection**: Real-time display of user authentication capabilities
|
| 23 |
|
| 24 |
#### Technical Implementation:
|
| 25 |
|
| 26 |
```python
|
| 27 |
+
# 1. OAuth Token Extraction & Scope Detection
|
| 28 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
| 29 |
oauth_token = getattr(profile, 'oauth_token', None) or getattr(profile, 'token', None)
|
| 30 |
agent = GAIAAgentApp.create_with_oauth_token(oauth_token)
|
| 31 |
+
# Returns auth status for UI display
|
| 32 |
+
auth_status = format_auth_status(profile)
|
| 33 |
+
|
| 34 |
+
# 2. OAuth Scope Detection
|
| 35 |
+
def check_oauth_scopes(oauth_token: str):
|
| 36 |
+
# Tests read capability via whoami endpoint
|
| 37 |
+
can_read = requests.get("https://huggingface.co/api/whoami", headers=headers).status_code == 200
|
| 38 |
+
# Tests inference capability via model API
|
| 39 |
+
can_inference = inference_response.status_code in [200, 503]
|
| 40 |
+
|
| 41 |
+
# 3. Dynamic UI Status Display
|
| 42 |
+
def format_auth_status(profile):
|
| 43 |
+
# Shows detected scopes and available features
|
| 44 |
+
# Provides clear performance expectations
|
| 45 |
+
# Educational messaging about OAuth limitations
|
| 46 |
+
|
| 47 |
+
# 4. Robust Fallback System
|
| 48 |
def __init__(self, hf_token: Optional[str] = None):
|
| 49 |
try:
|
| 50 |
# Try main QwenClient with OAuth
|
|
|
|
| 57 |
# Fallback to SimpleClient
|
| 58 |
self.llm_client = SimpleClient(hf_token=hf_token)
|
| 59 |
|
| 60 |
+
# 5. SimpleClient Rule-Based Responses
|
| 61 |
class SimpleClient:
|
| 62 |
def _generate_simple_response(self, prompt):
|
| 63 |
# Mathematics: "2+2" → "4", "25% of 200" → "50"
|
|
|
|
| 65 |
# Always provides meaningful responses
|
| 66 |
```
|
| 67 |
|
| 68 |
+
#### OAuth Scope Detection UI Features:
|
| 69 |
+
|
| 70 |
+
- **Real-time Authentication Status**: Shows login state and detected scopes
|
| 71 |
+
- **Capability Display**: Clear indication of available features based on scopes
|
| 72 |
+
- **Performance Expectations**: 30%+ with inference scope, 15%+ with limited scopes
|
| 73 |
+
- **Manual Refresh**: Users can update auth status with refresh button
|
| 74 |
+
- **Educational Messaging**: Clear explanations of OAuth limitations
|
| 75 |
+
|
| 76 |
## 🎯 Expected Results
|
| 77 |
|
| 78 |
After successful deployment with fallback system:
|