|
|
import os |
|
|
import gradio as gr |
|
|
import requests |
|
|
import inspect |
|
|
import pandas as pd |
|
|
|
|
|
|
|
|
from huggingface_hub import login |
|
|
import warnings |
|
|
|
|
|
|
|
|
from smolagents import CodeAgent, InferenceClientModel, tool |
|
|
import re |
|
|
from typing import Optional, Union, Any |
|
|
import json |
|
|
import csv |
|
|
import io |
|
|
import math |
|
|
import statistics |
|
|
|
|
|
|
|
|
import base64 |
|
|
from urllib.parse import urlparse |
|
|
import mimetypes |
|
|
|
|
|
|
|
|
|
|
|
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" |
|
|
|
|
|
|
|
|
|
|
|
@tool |
|
|
def visit_webpage(url: str) -> str: |
|
|
"""Visits a webpage at the given URL and returns its content as text. |
|
|
|
|
|
Args: |
|
|
url: The URL of the webpage to visit |
|
|
|
|
|
Returns: |
|
|
The content of the webpage as text, or an error message if the request fails |
|
|
""" |
|
|
try: |
|
|
import requests |
|
|
from bs4 import BeautifulSoup |
|
|
|
|
|
headers = { |
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' |
|
|
} |
|
|
|
|
|
response = requests.get(url, headers=headers, timeout=10) |
|
|
response.raise_for_status() |
|
|
|
|
|
soup = BeautifulSoup(response.content, 'html.parser') |
|
|
|
|
|
|
|
|
for script in soup(["script", "style"]): |
|
|
script.decompose() |
|
|
|
|
|
|
|
|
text = soup.get_text() |
|
|
|
|
|
|
|
|
lines = (line.strip() for line in text.splitlines()) |
|
|
chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) |
|
|
text = ' '.join(chunk for chunk in chunks if chunk) |
|
|
|
|
|
|
|
|
if len(text) > 8000: |
|
|
text = text[:8000] + "... [Content truncated]" |
|
|
|
|
|
return text |
|
|
|
|
|
except Exception as e: |
|
|
return f"Error visiting webpage: {str(e)}" |
|
|
|
|
|
@tool |
|
|
def calculate_math(expression: str) -> str: |
|
|
"""Safely evaluates mathematical expressions and performs calculations. |
|
|
|
|
|
Args: |
|
|
expression: A mathematical expression to evaluate (e.g., "2+2", "sqrt(16)", "log(100)") |
|
|
|
|
|
Returns: |
|
|
The result of the calculation or an error message |
|
|
""" |
|
|
try: |
|
|
import math |
|
|
import re |
|
|
|
|
|
|
|
|
expression = expression.strip() |
|
|
|
|
|
|
|
|
expression = re.sub(r'\blog\b', 'math.log10', expression) |
|
|
expression = re.sub(r'\bln\b', 'math.log', expression) |
|
|
expression = re.sub(r'\bsqrt\b', 'math.sqrt', expression) |
|
|
expression = re.sub(r'\bsin\b', 'math.sin', expression) |
|
|
expression = re.sub(r'\bcos\b', 'math.cos', expression) |
|
|
expression = re.sub(r'\btan\b', 'math.tan', expression) |
|
|
expression = re.sub(r'\babs\b', 'abs', expression) |
|
|
expression = re.sub(r'\bpi\b', 'math.pi', expression) |
|
|
expression = re.sub(r'\be\b', 'math.e', expression) |
|
|
|
|
|
|
|
|
safe_dict = { |
|
|
"__builtins__": {}, |
|
|
"math": math, |
|
|
"abs": abs, |
|
|
"round": round, |
|
|
"min": min, |
|
|
"max": max, |
|
|
"sum": sum, |
|
|
"len": len, |
|
|
"pow": pow, |
|
|
} |
|
|
|
|
|
result = eval(expression, safe_dict) |
|
|
return str(result) |
|
|
|
|
|
except Exception as e: |
|
|
return f"Error in calculation: {str(e)}" |
|
|
|
|
|
@tool |
|
|
def analyze_data(data: str, operation: str = "summary") -> str: |
|
|
"""Analyzes numerical data and performs statistical operations. |
|
|
|
|
|
Args: |
|
|
data: Comma-separated numerical data or JSON array |
|
|
operation: Type of analysis ("summary", "mean", "median", "std", "count", "sum", "min", "max") |
|
|
|
|
|
Returns: |
|
|
The result of the data analysis |
|
|
""" |
|
|
try: |
|
|
import json |
|
|
import statistics |
|
|
|
|
|
|
|
|
if data.startswith('[') and data.endswith(']'): |
|
|
|
|
|
numbers = json.loads(data) |
|
|
else: |
|
|
|
|
|
numbers = [float(x.strip()) for x in data.split(',') if x.strip()] |
|
|
|
|
|
if not numbers: |
|
|
return "No valid numerical data provided" |
|
|
|
|
|
if operation == "summary": |
|
|
result = { |
|
|
"count": len(numbers), |
|
|
"sum": sum(numbers), |
|
|
"mean": statistics.mean(numbers), |
|
|
"median": statistics.median(numbers), |
|
|
"min": min(numbers), |
|
|
"max": max(numbers) |
|
|
} |
|
|
if len(numbers) > 1: |
|
|
result["std"] = statistics.stdev(numbers) |
|
|
return json.dumps(result, indent=2) |
|
|
elif operation == "mean": |
|
|
return str(statistics.mean(numbers)) |
|
|
elif operation == "median": |
|
|
return str(statistics.median(numbers)) |
|
|
elif operation == "std": |
|
|
return str(statistics.stdev(numbers)) if len(numbers) > 1 else "0" |
|
|
elif operation == "count": |
|
|
return str(len(numbers)) |
|
|
elif operation == "sum": |
|
|
return str(sum(numbers)) |
|
|
elif operation == "min": |
|
|
return str(min(numbers)) |
|
|
elif operation == "max": |
|
|
return str(max(numbers)) |
|
|
else: |
|
|
return f"Unknown operation: {operation}" |
|
|
|
|
|
except Exception as e: |
|
|
return f"Error in data analysis: {str(e)}" |
|
|
|
|
|
@tool |
|
|
def extract_numbers(text: str) -> str: |
|
|
"""Extracts all numbers from a text string. |
|
|
|
|
|
Args: |
|
|
text: Text containing numbers |
|
|
|
|
|
Returns: |
|
|
Comma-separated list of extracted numbers |
|
|
""" |
|
|
try: |
|
|
import re |
|
|
|
|
|
|
|
|
pattern = r'-?\d+(?:\.\d+)?' |
|
|
numbers = re.findall(pattern, text) |
|
|
|
|
|
if not numbers: |
|
|
return "No numbers found in the text" |
|
|
|
|
|
return ', '.join(numbers) |
|
|
|
|
|
except Exception as e: |
|
|
return f"Error extracting numbers: {str(e)}" |
|
|
|
|
|
@tool |
|
|
def process_file_content(file_url: str) -> str: |
|
|
"""Downloads and processes content from a file URL, supporting various formats. |
|
|
|
|
|
Args: |
|
|
file_url: URL to a file (PDF, CSV, TXT, etc.) |
|
|
|
|
|
Returns: |
|
|
The processed content of the file as text |
|
|
""" |
|
|
try: |
|
|
import requests |
|
|
from urllib.parse import urlparse |
|
|
import mimetypes |
|
|
|
|
|
headers = { |
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' |
|
|
} |
|
|
|
|
|
response = requests.get(file_url, headers=headers, timeout=30) |
|
|
response.raise_for_status() |
|
|
|
|
|
|
|
|
content_type = response.headers.get('content-type', '').lower() |
|
|
|
|
|
|
|
|
if 'text/' in content_type or 'csv' in content_type: |
|
|
return response.text |
|
|
elif 'json' in content_type: |
|
|
return json.dumps(response.json(), indent=2) |
|
|
else: |
|
|
|
|
|
return f"Binary file detected. Size: {len(response.content)} bytes. Content-Type: {content_type}" |
|
|
|
|
|
except Exception as e: |
|
|
return f"Error processing file: {str(e)}" |
|
|
|
|
|
@tool |
|
|
def solve_equation(equation: str) -> str: |
|
|
"""Solves mathematical equations and expressions symbolically. |
|
|
|
|
|
Args: |
|
|
equation: Mathematical equation to solve (e.g., "x^2 + 2*x - 3 = 0") |
|
|
|
|
|
Returns: |
|
|
The solution to the equation |
|
|
""" |
|
|
try: |
|
|
import sympy as sp |
|
|
import re |
|
|
|
|
|
|
|
|
equation = equation.replace('=', '==') |
|
|
|
|
|
|
|
|
x, y, z, t = sp.symbols('x y z t') |
|
|
variables = {'x': x, 'y': y, 'z': z, 't': t} |
|
|
|
|
|
|
|
|
equation = re.sub(r'\bsqrt\b', 'sp.sqrt', equation) |
|
|
equation = re.sub(r'\bsin\b', 'sp.sin', equation) |
|
|
equation = re.sub(r'\bcos\b', 'sp.cos', equation) |
|
|
equation = re.sub(r'\btan\b', 'sp.tan', equation) |
|
|
equation = re.sub(r'\blog\b', 'sp.log', equation) |
|
|
equation = re.sub(r'\bexp\b', 'sp.exp', equation) |
|
|
|
|
|
|
|
|
expr = eval(equation, {"sp": sp, "x": x, "y": y, "z": z, "t": t}) |
|
|
|
|
|
if '==' in equation: |
|
|
|
|
|
solution = sp.solve(expr, x) |
|
|
return str(solution) |
|
|
else: |
|
|
|
|
|
simplified = sp.simplify(expr) |
|
|
return str(simplified) |
|
|
|
|
|
except Exception as e: |
|
|
return f"Error solving equation: {str(e)}" |
|
|
|
|
|
@tool |
|
|
def parse_structured_data(data: str, format_type: str = "auto") -> str: |
|
|
"""Parses and analyzes structured data (CSV, JSON, etc.). |
|
|
|
|
|
Args: |
|
|
data: The structured data as a string |
|
|
format_type: Format type ("csv", "json", "auto") |
|
|
|
|
|
Returns: |
|
|
Analysis of the structured data |
|
|
""" |
|
|
try: |
|
|
import pandas as pd |
|
|
import json |
|
|
from io import StringIO |
|
|
|
|
|
if format_type == "auto": |
|
|
|
|
|
data_clean = data.strip() |
|
|
if data_clean.startswith('{') or data_clean.startswith('['): |
|
|
format_type = "json" |
|
|
elif ',' in data_clean and '\n' in data_clean: |
|
|
format_type = "csv" |
|
|
|
|
|
if format_type == "json": |
|
|
parsed = json.loads(data) |
|
|
return json.dumps(parsed, indent=2) |
|
|
elif format_type == "csv": |
|
|
df = pd.read_csv(StringIO(data)) |
|
|
result = f"DataFrame shape: {df.shape}\n" |
|
|
result += f"Columns: {list(df.columns)}\n" |
|
|
result += f"First 5 rows:\n{df.head().to_string()}\n" |
|
|
if df.select_dtypes(include=['number']).columns.any(): |
|
|
result += f"Numerical summary:\n{df.describe().to_string()}" |
|
|
return result |
|
|
else: |
|
|
return f"Unsupported format: {format_type}" |
|
|
|
|
|
except Exception as e: |
|
|
return f"Error parsing data: {str(e)}" |
|
|
|
|
|
def setup_authentication(): |
|
|
"""Setup HuggingFace authentication for the app.""" |
|
|
try: |
|
|
|
|
|
hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN") |
|
|
|
|
|
if hf_token: |
|
|
login(token=hf_token) |
|
|
print("โ
Authenticated with HuggingFace using environment token") |
|
|
return True |
|
|
else: |
|
|
print("โน๏ธ No HF token found in environment") |
|
|
print("๐ก If running locally, please set HF_TOKEN environment variable") |
|
|
print("๐ก For Spaces deployment, this should work automatically") |
|
|
return False |
|
|
except Exception as e: |
|
|
print(f"โ ๏ธ Authentication issue: {e}") |
|
|
return False |
|
|
|
|
|
|
|
|
class GAIAAgent: |
|
|
def __init__(self): |
|
|
print("GAIAAgent initializing with smolagents...") |
|
|
|
|
|
|
|
|
try: |
|
|
|
|
|
hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN") |
|
|
if hf_token: |
|
|
login(token=hf_token) |
|
|
print("โ
Authenticated with HuggingFace using environment token") |
|
|
else: |
|
|
|
|
|
print("โน๏ธ No HF token found in environment, proceeding without explicit login") |
|
|
except Exception as e: |
|
|
print(f"โ ๏ธ Authentication warning: {e}") |
|
|
|
|
|
|
|
|
try: |
|
|
hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN") |
|
|
|
|
|
model_id = "meta-llama/Llama-3.3-70B-Instruct" |
|
|
self.model = InferenceClientModel(model_id=model_id, token=hf_token) |
|
|
print(f"โ
Model initialized successfully: {model_id}") |
|
|
except Exception as e: |
|
|
print(f"โ ๏ธ Error with primary model: {e}") |
|
|
try: |
|
|
|
|
|
fallback_model = "microsoft/DialoGPT-medium" |
|
|
self.model = InferenceClientModel(model_id=fallback_model) |
|
|
print(f"โ
Fallback model initialized: {fallback_model}") |
|
|
except Exception as e2: |
|
|
print(f"โ ๏ธ Error with fallback model: {e2}") |
|
|
try: |
|
|
|
|
|
self.model = InferenceClientModel() |
|
|
print("โ
Default model initialized") |
|
|
except Exception as e3: |
|
|
print(f"โ Critical error - could not initialize any model: {e3}") |
|
|
raise e3 |
|
|
|
|
|
|
|
|
self.custom_tools = [ |
|
|
visit_webpage, |
|
|
calculate_math, |
|
|
analyze_data, |
|
|
extract_numbers, |
|
|
process_file_content, |
|
|
solve_equation, |
|
|
parse_structured_data |
|
|
] |
|
|
|
|
|
|
|
|
try: |
|
|
self.agent = CodeAgent( |
|
|
tools=self.custom_tools, |
|
|
model=self.model, |
|
|
add_base_tools=True, |
|
|
additional_authorized_imports=[ |
|
|
'requests', 'bs4', 'json', 'csv', 'math', 'statistics', |
|
|
're', 'urllib.parse', 'base64', 'datetime', 'calendar', |
|
|
'pandas', 'numpy', 'sympy', 'scipy' |
|
|
], |
|
|
max_steps=15, |
|
|
verbosity_level=1 |
|
|
) |
|
|
print("โ
GAIA Agent initialized successfully with PRO model and enhanced tools") |
|
|
except Exception as e: |
|
|
print(f"โ Error initializing agent: {e}") |
|
|
raise e |
|
|
|
|
|
def __call__(self, question: str) -> str: |
|
|
"""Process a question and return the answer.""" |
|
|
try: |
|
|
print(f"๐ค Processing question: {question[:100]}...") |
|
|
|
|
|
|
|
|
enhanced_prompt = f"""You are an expert AI assistant designed to excel at the GAIA benchmark. You must answer questions with perfect accuracy using a systematic approach. |
|
|
|
|
|
CRITICAL INSTRUCTIONS FOR GAIA SUCCESS: |
|
|
1. ANALYZE THE QUESTION: Read carefully and identify what type of question this is: |
|
|
- Mathematical calculation or equation |
|
|
- Information retrieval from web/files |
|
|
- Data analysis or statistics |
|
|
- Multi-step reasoning problem |
|
|
- Factual lookup |
|
|
|
|
|
2. CHOOSE YOUR APPROACH: |
|
|
- For math: Use calculate_math tool or solve_equation for complex equations |
|
|
- For web info: Use DuckDuckGoSearchTool then visit_webpage for details |
|
|
- For files: Use process_file_content to download and analyze |
|
|
- For data: Use analyze_data or parse_structured_data |
|
|
- For numbers in text: Use extract_numbers first |
|
|
|
|
|
3. BE SYSTEMATIC: |
|
|
- Break complex questions into steps |
|
|
- Use multiple tools if needed |
|
|
- Verify your reasoning |
|
|
- Double-check calculations |
|
|
|
|
|
4. ANSWER FORMAT: |
|
|
- Give ONLY the final answer |
|
|
- No explanations, no "FINAL ANSWER:" prefix |
|
|
- For numbers: just the number (e.g., "42", not "42.0") |
|
|
- For text: just the text without quotes |
|
|
- Be precise with units, dates, and formatting |
|
|
|
|
|
5. ACCURACY IS PARAMOUNT: |
|
|
- GAIA requires exact matches |
|
|
- Round numbers appropriately |
|
|
- Use proper case and spelling |
|
|
- Include units when relevant |
|
|
|
|
|
Question: {question} |
|
|
|
|
|
Think step by step, use the appropriate tools, and provide only the final answer:""" |
|
|
|
|
|
|
|
|
try: |
|
|
result = self.agent.run(enhanced_prompt) |
|
|
except Exception as api_error: |
|
|
if "402" in str(api_error) or "Payment Required" in str(api_error): |
|
|
print(f"โ ๏ธ API quota issue (you have Pro, this shouldn't happen): {api_error}") |
|
|
result = f"API Error: {str(api_error)}" |
|
|
else: |
|
|
raise api_error |
|
|
|
|
|
|
|
|
if isinstance(result, str): |
|
|
result = result.strip() |
|
|
|
|
|
|
|
|
lines = result.split('\n') |
|
|
for i, line in enumerate(lines): |
|
|
line = line.strip() |
|
|
if line and not line.startswith(('Step', 'First', 'Next', 'Then', 'Finally', 'Therefore', 'So,', 'Thus')): |
|
|
result = line |
|
|
break |
|
|
|
|
|
|
|
|
result = re.sub(r'^(FINAL\s*ANSWER\s*:?\s*)', '', result, flags=re.IGNORECASE) |
|
|
result = re.sub(r'^(ANSWER\s*:?\s*)', '', result, flags=re.IGNORECASE) |
|
|
result = re.sub(r'^(RESULT\s*:?\s*)', '', result, flags=re.IGNORECASE) |
|
|
result = re.sub(r'^(THE\s*ANSWER\s*IS\s*:?\s*)', '', result, flags=re.IGNORECASE) |
|
|
|
|
|
|
|
|
if (result.startswith('"') and result.endswith('"')) or (result.startswith("'") and result.endswith("'")): |
|
|
result = result[1:-1] |
|
|
|
|
|
|
|
|
if re.match(r'^\d+\.0+$', result): |
|
|
result = str(int(float(result))) |
|
|
|
|
|
result = result.strip() |
|
|
|
|
|
print(f"โ
Agent response: {result}") |
|
|
return result |
|
|
else: |
|
|
print(f"โ
Agent response: {str(result)}") |
|
|
return str(result) |
|
|
|
|
|
except Exception as e: |
|
|
error_msg = f"Error processing question: {str(e)}" |
|
|
print(f"โ {error_msg}") |
|
|
return error_msg |
|
|
|
|
|
def run_and_submit_all(profile: gr.OAuthProfile | None): |
|
|
""" |
|
|
Fetches all questions, runs the GAIAAgent on them, submits all answers, |
|
|
and displays the results. |
|
|
""" |
|
|
|
|
|
space_id = os.getenv("SPACE_ID") |
|
|
|
|
|
if profile: |
|
|
username = f"{profile.username}" |
|
|
print(f"User logged in: {username}") |
|
|
else: |
|
|
print("User not logged in.") |
|
|
return "Please Login to Hugging Face with the button.", None |
|
|
|
|
|
api_url = DEFAULT_API_URL |
|
|
questions_url = f"{api_url}/questions" |
|
|
submit_url = f"{api_url}/submit" |
|
|
|
|
|
|
|
|
try: |
|
|
print("๐ Initializing GAIA Agent with smolagents...") |
|
|
agent = GAIAAgent() |
|
|
print("โ
Enhanced agent ready for GAIA benchmark!") |
|
|
except Exception as e: |
|
|
error_msg = f"Error initializing agent: {e}" |
|
|
print(f"โ {error_msg}") |
|
|
return error_msg, None |
|
|
|
|
|
|
|
|
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" |
|
|
print(f"Agent code link: {agent_code}") |
|
|
|
|
|
|
|
|
print(f"๐ฅ Fetching questions from: {questions_url}") |
|
|
try: |
|
|
response = requests.get(questions_url, timeout=15) |
|
|
response.raise_for_status() |
|
|
questions_data = response.json() |
|
|
if not questions_data: |
|
|
print("Fetched questions list is empty.") |
|
|
return "Fetched questions list is empty or invalid format.", None |
|
|
print(f"โ
Fetched {len(questions_data)} questions from GAIA benchmark.") |
|
|
except requests.exceptions.RequestException as e: |
|
|
print(f"โ Error fetching questions: {e}") |
|
|
return f"Error fetching questions: {e}", None |
|
|
except requests.exceptions.JSONDecodeError as e: |
|
|
print(f"โ Error decoding JSON response from questions endpoint: {e}") |
|
|
print(f"Response text: {response.text[:500]}") |
|
|
return f"Error decoding server response for questions: {e}", None |
|
|
except Exception as e: |
|
|
print(f"โ An unexpected error occurred fetching questions: {e}") |
|
|
return f"An unexpected error occurred fetching questions: {e}", None |
|
|
|
|
|
|
|
|
results_log = [] |
|
|
answers_payload = [] |
|
|
print(f"๐ค Running enhanced GAIA agent on {len(questions_data)} questions...") |
|
|
|
|
|
for i, item in enumerate(questions_data, 1): |
|
|
task_id = item.get("task_id") |
|
|
question_text = item.get("question") |
|
|
if not task_id or question_text is None: |
|
|
print(f"โ ๏ธ Skipping item with missing task_id or question: {item}") |
|
|
continue |
|
|
|
|
|
print(f"\n๐ Processing question {i}/{len(questions_data)} (ID: {task_id})") |
|
|
try: |
|
|
submitted_answer = agent(question_text) |
|
|
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer}) |
|
|
results_log.append({ |
|
|
"Task ID": task_id, |
|
|
"Question": question_text[:100] + "..." if len(question_text) > 100 else question_text, |
|
|
"Submitted Answer": submitted_answer |
|
|
}) |
|
|
print(f"โ
Answer for {task_id}: {submitted_answer}") |
|
|
except Exception as e: |
|
|
error_msg = f"AGENT ERROR: {e}" |
|
|
print(f"โ Error running agent on task {task_id}: {e}") |
|
|
answers_payload.append({"task_id": task_id, "submitted_answer": error_msg}) |
|
|
results_log.append({ |
|
|
"Task ID": task_id, |
|
|
"Question": question_text[:100] + "..." if len(question_text) > 100 else question_text, |
|
|
"Submitted Answer": error_msg |
|
|
}) |
|
|
|
|
|
if not answers_payload: |
|
|
print("โ Agent did not produce any answers to submit.") |
|
|
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log) |
|
|
|
|
|
|
|
|
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload} |
|
|
status_update = f"๐ Agent finished processing. Submitting {len(answers_payload)} answers for user '{username}'..." |
|
|
print(status_update) |
|
|
|
|
|
|
|
|
print(f"๐ค Submitting {len(answers_payload)} answers to: {submit_url}") |
|
|
try: |
|
|
response = requests.post(submit_url, json=submission_data, timeout=60) |
|
|
response.raise_for_status() |
|
|
result_data = response.json() |
|
|
|
|
|
score = result_data.get('score', 'N/A') |
|
|
correct_count = result_data.get('correct_count', '?') |
|
|
total_attempted = result_data.get('total_attempted', '?') |
|
|
|
|
|
final_status = ( |
|
|
f"๐ Submission Successful!\n" |
|
|
f"๐ค User: {result_data.get('username')}\n" |
|
|
f"๐ Overall Score: {score}% ({correct_count}/{total_attempted} correct)\n" |
|
|
f"๐ฏ Target: >30% for certification\n" |
|
|
f"๐ฌ Message: {result_data.get('message', 'No message received.')}" |
|
|
) |
|
|
|
|
|
if isinstance(score, (int, float)) and score >= 30: |
|
|
final_status += f"\n๐ CONGRATULATIONS! You've achieved the target score of 30%!" |
|
|
elif isinstance(score, (int, float)): |
|
|
final_status += f"\n๐ Keep improving! You need {30-score:.1f}% more to reach the target." |
|
|
|
|
|
print("โ
Submission successful!") |
|
|
results_df = pd.DataFrame(results_log) |
|
|
return final_status, results_df |
|
|
|
|
|
except requests.exceptions.HTTPError as e: |
|
|
error_detail = f"Server responded with status {e.response.status_code}." |
|
|
try: |
|
|
error_json = e.response.json() |
|
|
error_detail += f" Detail: {error_json.get('detail', e.response.text)}" |
|
|
except requests.exceptions.JSONDecodeError: |
|
|
error_detail += f" Response: {e.response.text[:500]}" |
|
|
status_message = f"โ Submission Failed: {error_detail}" |
|
|
print(status_message) |
|
|
results_df = pd.DataFrame(results_log) |
|
|
return status_message, results_df |
|
|
except requests.exceptions.Timeout: |
|
|
status_message = "โ Submission Failed: The request timed out." |
|
|
print(status_message) |
|
|
results_df = pd.DataFrame(results_log) |
|
|
return status_message, results_df |
|
|
except requests.exceptions.RequestException as e: |
|
|
status_message = f"โ Submission Failed: Network error - {e}" |
|
|
print(status_message) |
|
|
results_df = pd.DataFrame(results_log) |
|
|
return status_message, results_df |
|
|
except Exception as e: |
|
|
status_message = f"โ An unexpected error occurred during submission: {e}" |
|
|
print(status_message) |
|
|
results_df = pd.DataFrame(results_log) |
|
|
return status_message, results_df |
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks(title="GAIA Agent Evaluation") as demo: |
|
|
gr.Markdown("# ๐ค Enhanced GAIA Agent Evaluation Runner") |
|
|
gr.Markdown( |
|
|
""" |
|
|
**Enhanced Agent for GAIA Benchmark Certification** |
|
|
|
|
|
This enhanced agent uses Hugging Face's **smolagents** framework with multiple specialized tools: |
|
|
- ๐ **Web Search**: DuckDuckGoSearchTool (from base toolkit) for finding information |
|
|
- ๐ **Python Interpreter**: Code execution capabilities (from base toolkit) |
|
|
- ๐ **Web Scraping**: Custom webpage visitor for content extraction |
|
|
- ๐งฎ **Mathematics**: Advanced calculation capabilities |
|
|
- ๐ **Data Analysis**: Statistical analysis of numerical data |
|
|
- ๐ข **Number Extraction**: Intelligent number parsing from text |
|
|
- ๐ **Text Analysis**: Counting and text processing utilities |
|
|
- ๐ค **LLM Model**: Llama-3.3-70B-Instruct for advanced reasoning |
|
|
|
|
|
**Instructions:** |
|
|
1. ๐ **Clone this space** and customize the agent as needed |
|
|
2. ๐ **Log in** to your Hugging Face account using the button below |
|
|
3. ๐ **Click 'Run Evaluation'** to test your agent on GAIA benchmark questions |
|
|
4. ๐ฏ **Target**: Score >30% for course certification |
|
|
|
|
|
**Goal**: Answer GAIA level 1 validation questions with exact match precision. |
|
|
|
|
|
--- |
|
|
โ ๏ธ **Note**: Processing all questions may take several minutes due to the complexity of reasoning required. |
|
|
""" |
|
|
) |
|
|
|
|
|
gr.LoginButton() |
|
|
|
|
|
run_button = gr.Button("๐ Run Evaluation & Submit All Answers", variant="primary", size="lg") |
|
|
|
|
|
status_output = gr.Textbox( |
|
|
label="๐ Evaluation Status & Results", |
|
|
lines=8, |
|
|
interactive=False, |
|
|
placeholder="Click the button above to start the evaluation..." |
|
|
) |
|
|
|
|
|
results_table = gr.DataFrame( |
|
|
label="๐ Questions and Agent Responses", |
|
|
wrap=True, |
|
|
headers=["Task ID", "Question", "Submitted Answer"] |
|
|
) |
|
|
|
|
|
run_button.click( |
|
|
fn=run_and_submit_all, |
|
|
outputs=[status_output, results_table] |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
print("\n" + "="*60) |
|
|
print("๐ค ENHANCED GAIA AGENT STARTING UP") |
|
|
print("="*60) |
|
|
|
|
|
|
|
|
print("๐ Setting up HuggingFace authentication...") |
|
|
auth_success = setup_authentication() |
|
|
|
|
|
|
|
|
space_host_startup = os.getenv("SPACE_HOST") |
|
|
space_id_startup = os.getenv("SPACE_ID") |
|
|
|
|
|
if space_host_startup: |
|
|
print(f"โ
SPACE_HOST found: {space_host_startup}") |
|
|
print(f" ๐ Runtime URL: https://{space_host_startup}.hf.space") |
|
|
else: |
|
|
print("โน๏ธ SPACE_HOST environment variable not found (running locally?).") |
|
|
if not auth_success: |
|
|
print("๐ก For local testing, you may need to run:") |
|
|
print(" from huggingface_hub import notebook_login") |
|
|
print(" notebook_login()") |
|
|
|
|
|
if space_id_startup: |
|
|
print(f"โ
SPACE_ID found: {space_id_startup}") |
|
|
print(f" ๐ Repo URL: https://huggingface.co/spaces/{space_id_startup}") |
|
|
print(f" ๐ Code URL: https://huggingface.co/spaces/{space_id_startup}/tree/main") |
|
|
else: |
|
|
print("โน๏ธ SPACE_ID environment variable not found (running locally?).") |
|
|
|
|
|
print("="*60) |
|
|
print("๐ Launching Enhanced GAIA Agent Interface...") |
|
|
print("๐ฏ Target: >30% score on GAIA benchmark") |
|
|
print("="*60 + "\n") |
|
|
|
|
|
demo.launch(debug=True, share=False) |