vissutagunawan's picture
Update app.py
c0a01ca verified
import os
import gradio as gr
import requests
import inspect
import pandas as pd
# HuggingFace authentication
from huggingface_hub import login
import warnings
# smolagents imports
from smolagents import CodeAgent, InferenceClientModel, tool
import re
from typing import Optional, Union, Any
import json
import csv
import io
import math
import statistics
# Additional imports for custom tools
import base64
from urllib.parse import urlparse
import mimetypes
# (Keep Constants as is)
# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
# --- Custom Tools for GAIA Tasks ---
@tool
def visit_webpage(url: str) -> str:
"""Visits a webpage at the given URL and returns its content as text.
Args:
url: The URL of the webpage to visit
Returns:
The content of the webpage as text, or an error message if the request fails
"""
try:
import requests
from bs4 import BeautifulSoup
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.content, 'html.parser')
# Remove script and style elements
for script in soup(["script", "style"]):
script.decompose()
# Get text content
text = soup.get_text()
# Clean up text
lines = (line.strip() for line in text.splitlines())
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
text = ' '.join(chunk for chunk in chunks if chunk)
# Limit text length to avoid token limits
if len(text) > 8000:
text = text[:8000] + "... [Content truncated]"
return text
except Exception as e:
return f"Error visiting webpage: {str(e)}"
@tool
def calculate_math(expression: str) -> str:
"""Safely evaluates mathematical expressions and performs calculations.
Args:
expression: A mathematical expression to evaluate (e.g., "2+2", "sqrt(16)", "log(100)")
Returns:
The result of the calculation or an error message
"""
try:
import math
import re
# Clean the expression
expression = expression.strip()
# Replace common mathematical functions
expression = re.sub(r'\blog\b', 'math.log10', expression)
expression = re.sub(r'\bln\b', 'math.log', expression)
expression = re.sub(r'\bsqrt\b', 'math.sqrt', expression)
expression = re.sub(r'\bsin\b', 'math.sin', expression)
expression = re.sub(r'\bcos\b', 'math.cos', expression)
expression = re.sub(r'\btan\b', 'math.tan', expression)
expression = re.sub(r'\babs\b', 'abs', expression)
expression = re.sub(r'\bpi\b', 'math.pi', expression)
expression = re.sub(r'\be\b', 'math.e', expression)
# Define safe functions for eval
safe_dict = {
"__builtins__": {},
"math": math,
"abs": abs,
"round": round,
"min": min,
"max": max,
"sum": sum,
"len": len,
"pow": pow,
}
result = eval(expression, safe_dict)
return str(result)
except Exception as e:
return f"Error in calculation: {str(e)}"
@tool
def analyze_data(data: str, operation: str = "summary") -> str:
"""Analyzes numerical data and performs statistical operations.
Args:
data: Comma-separated numerical data or JSON array
operation: Type of analysis ("summary", "mean", "median", "std", "count", "sum", "min", "max")
Returns:
The result of the data analysis
"""
try:
import json
import statistics
# Parse the data
if data.startswith('[') and data.endswith(']'):
# JSON array format
numbers = json.loads(data)
else:
# Comma-separated format
numbers = [float(x.strip()) for x in data.split(',') if x.strip()]
if not numbers:
return "No valid numerical data provided"
if operation == "summary":
result = {
"count": len(numbers),
"sum": sum(numbers),
"mean": statistics.mean(numbers),
"median": statistics.median(numbers),
"min": min(numbers),
"max": max(numbers)
}
if len(numbers) > 1:
result["std"] = statistics.stdev(numbers)
return json.dumps(result, indent=2)
elif operation == "mean":
return str(statistics.mean(numbers))
elif operation == "median":
return str(statistics.median(numbers))
elif operation == "std":
return str(statistics.stdev(numbers)) if len(numbers) > 1 else "0"
elif operation == "count":
return str(len(numbers))
elif operation == "sum":
return str(sum(numbers))
elif operation == "min":
return str(min(numbers))
elif operation == "max":
return str(max(numbers))
else:
return f"Unknown operation: {operation}"
except Exception as e:
return f"Error in data analysis: {str(e)}"
@tool
def extract_numbers(text: str) -> str:
"""Extracts all numbers from a text string.
Args:
text: Text containing numbers
Returns:
Comma-separated list of extracted numbers
"""
try:
import re
# Pattern to match integers and floats (including negative numbers)
pattern = r'-?\d+(?:\.\d+)?'
numbers = re.findall(pattern, text)
if not numbers:
return "No numbers found in the text"
return ', '.join(numbers)
except Exception as e:
return f"Error extracting numbers: {str(e)}"
@tool
def process_file_content(file_url: str) -> str:
"""Downloads and processes content from a file URL, supporting various formats.
Args:
file_url: URL to a file (PDF, CSV, TXT, etc.)
Returns:
The processed content of the file as text
"""
try:
import requests
from urllib.parse import urlparse
import mimetypes
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}
response = requests.get(file_url, headers=headers, timeout=30)
response.raise_for_status()
# Get content type
content_type = response.headers.get('content-type', '').lower()
# Process based on content type
if 'text/' in content_type or 'csv' in content_type:
return response.text
elif 'json' in content_type:
return json.dumps(response.json(), indent=2)
else:
# For binary files, return info about the file
return f"Binary file detected. Size: {len(response.content)} bytes. Content-Type: {content_type}"
except Exception as e:
return f"Error processing file: {str(e)}"
@tool
def solve_equation(equation: str) -> str:
"""Solves mathematical equations and expressions symbolically.
Args:
equation: Mathematical equation to solve (e.g., "x^2 + 2*x - 3 = 0")
Returns:
The solution to the equation
"""
try:
import sympy as sp
import re
# Clean the equation
equation = equation.replace('=', '==')
# Define common variables
x, y, z, t = sp.symbols('x y z t')
variables = {'x': x, 'y': y, 'z': z, 't': t}
# Replace common math functions
equation = re.sub(r'\bsqrt\b', 'sp.sqrt', equation)
equation = re.sub(r'\bsin\b', 'sp.sin', equation)
equation = re.sub(r'\bcos\b', 'sp.cos', equation)
equation = re.sub(r'\btan\b', 'sp.tan', equation)
equation = re.sub(r'\blog\b', 'sp.log', equation)
equation = re.sub(r'\bexp\b', 'sp.exp', equation)
# Parse and solve
expr = eval(equation, {"sp": sp, "x": x, "y": y, "z": z, "t": t})
if '==' in equation:
# It's an equation to solve
solution = sp.solve(expr, x)
return str(solution)
else:
# It's an expression to simplify
simplified = sp.simplify(expr)
return str(simplified)
except Exception as e:
return f"Error solving equation: {str(e)}"
@tool
def parse_structured_data(data: str, format_type: str = "auto") -> str:
"""Parses and analyzes structured data (CSV, JSON, etc.).
Args:
data: The structured data as a string
format_type: Format type ("csv", "json", "auto")
Returns:
Analysis of the structured data
"""
try:
import pandas as pd
import json
from io import StringIO
if format_type == "auto":
# Auto-detect format
data_clean = data.strip()
if data_clean.startswith('{') or data_clean.startswith('['):
format_type = "json"
elif ',' in data_clean and '\n' in data_clean:
format_type = "csv"
if format_type == "json":
parsed = json.loads(data)
return json.dumps(parsed, indent=2)
elif format_type == "csv":
df = pd.read_csv(StringIO(data))
result = f"DataFrame shape: {df.shape}\n"
result += f"Columns: {list(df.columns)}\n"
result += f"First 5 rows:\n{df.head().to_string()}\n"
if df.select_dtypes(include=['number']).columns.any():
result += f"Numerical summary:\n{df.describe().to_string()}"
return result
else:
return f"Unsupported format: {format_type}"
except Exception as e:
return f"Error parsing data: {str(e)}"
def setup_authentication():
"""Setup HuggingFace authentication for the app."""
try:
# Try to get HF token from environment variables
hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN")
if hf_token:
login(token=hf_token)
print("โœ… Authenticated with HuggingFace using environment token")
return True
else:
print("โ„น๏ธ No HF token found in environment")
print("๐Ÿ’ก If running locally, please set HF_TOKEN environment variable")
print("๐Ÿ’ก For Spaces deployment, this should work automatically")
return False
except Exception as e:
print(f"โš ๏ธ Authentication issue: {e}")
return False
# --- Enhanced Agent Definition ---
class GAIAAgent:
def __init__(self):
print("GAIAAgent initializing with smolagents...")
# Handle HuggingFace authentication
try:
# Try to get HF token from environment (for Spaces)
hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN")
if hf_token:
login(token=hf_token)
print("โœ… Authenticated with HuggingFace using environment token")
else:
# In Spaces, authentication might already be handled
print("โ„น๏ธ No HF token found in environment, proceeding without explicit login")
except Exception as e:
print(f"โš ๏ธ Authentication warning: {e}")
# Initialize the model with fallback options
try:
hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN")
# Try powerful model first - but use one that's more widely available
model_id = "meta-llama/Llama-3.3-70B-Instruct"
self.model = InferenceClientModel(model_id=model_id, token=hf_token)
print(f"โœ… Model initialized successfully: {model_id}")
except Exception as e:
print(f"โš ๏ธ Error with primary model: {e}")
try:
# Fallback to a widely available model
fallback_model = "microsoft/DialoGPT-medium"
self.model = InferenceClientModel(model_id=fallback_model)
print(f"โœ… Fallback model initialized: {fallback_model}")
except Exception as e2:
print(f"โš ๏ธ Error with fallback model: {e2}")
try:
# Last resort - use default (should work without authentication)
self.model = InferenceClientModel()
print("โœ… Default model initialized")
except Exception as e3:
print(f"โŒ Critical error - could not initialize any model: {e3}")
raise e3
# Initialize tools (custom tools + base tools from smolagents)
self.custom_tools = [
visit_webpage,
calculate_math,
analyze_data,
extract_numbers,
process_file_content,
solve_equation,
parse_structured_data
]
# Create the CodeAgent with enhanced capabilities
try:
self.agent = CodeAgent(
tools=self.custom_tools,
model=self.model,
add_base_tools=True, # Adds DuckDuckGoSearchTool and other base tools
additional_authorized_imports=[
'requests', 'bs4', 'json', 'csv', 'math', 'statistics',
're', 'urllib.parse', 'base64', 'datetime', 'calendar',
'pandas', 'numpy', 'sympy', 'scipy'
],
max_steps=15, # Increased for complex multi-step reasoning
verbosity_level=1 # Reduce verbosity for cleaner output
)
print("โœ… GAIA Agent initialized successfully with PRO model and enhanced tools")
except Exception as e:
print(f"โŒ Error initializing agent: {e}")
raise e
def __call__(self, question: str) -> str:
"""Process a question and return the answer."""
try:
print(f"๐Ÿค– Processing question: {question[:100]}...")
# Enhanced GAIA-optimized prompt
enhanced_prompt = f"""You are an expert AI assistant designed to excel at the GAIA benchmark. You must answer questions with perfect accuracy using a systematic approach.
CRITICAL INSTRUCTIONS FOR GAIA SUCCESS:
1. ANALYZE THE QUESTION: Read carefully and identify what type of question this is:
- Mathematical calculation or equation
- Information retrieval from web/files
- Data analysis or statistics
- Multi-step reasoning problem
- Factual lookup
2. CHOOSE YOUR APPROACH:
- For math: Use calculate_math tool or solve_equation for complex equations
- For web info: Use DuckDuckGoSearchTool then visit_webpage for details
- For files: Use process_file_content to download and analyze
- For data: Use analyze_data or parse_structured_data
- For numbers in text: Use extract_numbers first
3. BE SYSTEMATIC:
- Break complex questions into steps
- Use multiple tools if needed
- Verify your reasoning
- Double-check calculations
4. ANSWER FORMAT:
- Give ONLY the final answer
- No explanations, no "FINAL ANSWER:" prefix
- For numbers: just the number (e.g., "42", not "42.0")
- For text: just the text without quotes
- Be precise with units, dates, and formatting
5. ACCURACY IS PARAMOUNT:
- GAIA requires exact matches
- Round numbers appropriately
- Use proper case and spelling
- Include units when relevant
Question: {question}
Think step by step, use the appropriate tools, and provide only the final answer:"""
# Run the agent with enhanced error handling
try:
result = self.agent.run(enhanced_prompt)
except Exception as api_error:
if "402" in str(api_error) or "Payment Required" in str(api_error):
print(f"โš ๏ธ API quota issue (you have Pro, this shouldn't happen): {api_error}")
result = f"API Error: {str(api_error)}"
else:
raise api_error
# Enhanced answer cleaning for GAIA precision
if isinstance(result, str):
result = result.strip()
# Remove any explanatory text before the answer
lines = result.split('\n')
for i, line in enumerate(lines):
line = line.strip()
if line and not line.startswith(('Step', 'First', 'Next', 'Then', 'Finally', 'Therefore', 'So,', 'Thus')):
result = line
break
# Remove common prefixes
result = re.sub(r'^(FINAL\s*ANSWER\s*:?\s*)', '', result, flags=re.IGNORECASE)
result = re.sub(r'^(ANSWER\s*:?\s*)', '', result, flags=re.IGNORECASE)
result = re.sub(r'^(RESULT\s*:?\s*)', '', result, flags=re.IGNORECASE)
result = re.sub(r'^(THE\s*ANSWER\s*IS\s*:?\s*)', '', result, flags=re.IGNORECASE)
# Remove quotes if the entire answer is wrapped
if (result.startswith('"') and result.endswith('"')) or (result.startswith("'") and result.endswith("'")):
result = result[1:-1]
# Clean up decimal numbers (e.g., "42.0" -> "42")
if re.match(r'^\d+\.0+$', result):
result = str(int(float(result)))
result = result.strip()
print(f"โœ… Agent response: {result}")
return result
else:
print(f"โœ… Agent response: {str(result)}")
return str(result)
except Exception as e:
error_msg = f"Error processing question: {str(e)}"
print(f"โŒ {error_msg}")
return error_msg
def run_and_submit_all(profile: gr.OAuthProfile | None):
"""
Fetches all questions, runs the GAIAAgent on them, submits all answers,
and displays the results.
"""
# --- Determine HF Space Runtime URL and Repo URL ---
space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
if profile:
username = f"{profile.username}"
print(f"User logged in: {username}")
else:
print("User not logged in.")
return "Please Login to Hugging Face with the button.", None
api_url = DEFAULT_API_URL
questions_url = f"{api_url}/questions"
submit_url = f"{api_url}/submit"
# 1. Instantiate Enhanced Agent
try:
print("๐Ÿš€ Initializing GAIA Agent with smolagents...")
agent = GAIAAgent()
print("โœ… Enhanced agent ready for GAIA benchmark!")
except Exception as e:
error_msg = f"Error initializing agent: {e}"
print(f"โŒ {error_msg}")
return error_msg, None
# In the case of an app running as a hugging Face space, this link points toward your codebase
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
print(f"Agent code link: {agent_code}")
# 2. Fetch Questions
print(f"๐Ÿ“ฅ Fetching questions from: {questions_url}")
try:
response = requests.get(questions_url, timeout=15)
response.raise_for_status()
questions_data = response.json()
if not questions_data:
print("Fetched questions list is empty.")
return "Fetched questions list is empty or invalid format.", None
print(f"โœ… Fetched {len(questions_data)} questions from GAIA benchmark.")
except requests.exceptions.RequestException as e:
print(f"โŒ Error fetching questions: {e}")
return f"Error fetching questions: {e}", None
except requests.exceptions.JSONDecodeError as e:
print(f"โŒ Error decoding JSON response from questions endpoint: {e}")
print(f"Response text: {response.text[:500]}")
return f"Error decoding server response for questions: {e}", None
except Exception as e:
print(f"โŒ An unexpected error occurred fetching questions: {e}")
return f"An unexpected error occurred fetching questions: {e}", None
# 3. Run Enhanced Agent
results_log = []
answers_payload = []
print(f"๐Ÿค– Running enhanced GAIA agent on {len(questions_data)} questions...")
for i, item in enumerate(questions_data, 1):
task_id = item.get("task_id")
question_text = item.get("question")
if not task_id or question_text is None:
print(f"โš ๏ธ Skipping item with missing task_id or question: {item}")
continue
print(f"\n๐Ÿ“ Processing question {i}/{len(questions_data)} (ID: {task_id})")
try:
submitted_answer = agent(question_text)
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
results_log.append({
"Task ID": task_id,
"Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
"Submitted Answer": submitted_answer
})
print(f"โœ… Answer for {task_id}: {submitted_answer}")
except Exception as e:
error_msg = f"AGENT ERROR: {e}"
print(f"โŒ Error running agent on task {task_id}: {e}")
answers_payload.append({"task_id": task_id, "submitted_answer": error_msg})
results_log.append({
"Task ID": task_id,
"Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
"Submitted Answer": error_msg
})
if not answers_payload:
print("โŒ Agent did not produce any answers to submit.")
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
# 4. Prepare Submission
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
status_update = f"๐Ÿš€ Agent finished processing. Submitting {len(answers_payload)} answers for user '{username}'..."
print(status_update)
# 5. Submit
print(f"๐Ÿ“ค Submitting {len(answers_payload)} answers to: {submit_url}")
try:
response = requests.post(submit_url, json=submission_data, timeout=60)
response.raise_for_status()
result_data = response.json()
score = result_data.get('score', 'N/A')
correct_count = result_data.get('correct_count', '?')
total_attempted = result_data.get('total_attempted', '?')
final_status = (
f"๐ŸŽ‰ Submission Successful!\n"
f"๐Ÿ‘ค User: {result_data.get('username')}\n"
f"๐Ÿ“Š Overall Score: {score}% ({correct_count}/{total_attempted} correct)\n"
f"๐ŸŽฏ Target: >30% for certification\n"
f"๐Ÿ’ฌ Message: {result_data.get('message', 'No message received.')}"
)
if isinstance(score, (int, float)) and score >= 30:
final_status += f"\n๐Ÿ† CONGRATULATIONS! You've achieved the target score of 30%!"
elif isinstance(score, (int, float)):
final_status += f"\n๐Ÿ“ˆ Keep improving! You need {30-score:.1f}% more to reach the target."
print("โœ… Submission successful!")
results_df = pd.DataFrame(results_log)
return final_status, results_df
except requests.exceptions.HTTPError as e:
error_detail = f"Server responded with status {e.response.status_code}."
try:
error_json = e.response.json()
error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
except requests.exceptions.JSONDecodeError:
error_detail += f" Response: {e.response.text[:500]}"
status_message = f"โŒ Submission Failed: {error_detail}"
print(status_message)
results_df = pd.DataFrame(results_log)
return status_message, results_df
except requests.exceptions.Timeout:
status_message = "โŒ Submission Failed: The request timed out."
print(status_message)
results_df = pd.DataFrame(results_log)
return status_message, results_df
except requests.exceptions.RequestException as e:
status_message = f"โŒ Submission Failed: Network error - {e}"
print(status_message)
results_df = pd.DataFrame(results_log)
return status_message, results_df
except Exception as e:
status_message = f"โŒ An unexpected error occurred during submission: {e}"
print(status_message)
results_df = pd.DataFrame(results_log)
return status_message, results_df
# --- Build Gradio Interface using Blocks ---
with gr.Blocks(title="GAIA Agent Evaluation") as demo:
gr.Markdown("# ๐Ÿค– Enhanced GAIA Agent Evaluation Runner")
gr.Markdown(
"""
**Enhanced Agent for GAIA Benchmark Certification**
This enhanced agent uses Hugging Face's **smolagents** framework with multiple specialized tools:
- ๐Ÿ” **Web Search**: DuckDuckGoSearchTool (from base toolkit) for finding information
- ๐Ÿ **Python Interpreter**: Code execution capabilities (from base toolkit)
- ๐ŸŒ **Web Scraping**: Custom webpage visitor for content extraction
- ๐Ÿงฎ **Mathematics**: Advanced calculation capabilities
- ๐Ÿ“Š **Data Analysis**: Statistical analysis of numerical data
- ๐Ÿ”ข **Number Extraction**: Intelligent number parsing from text
- ๐Ÿ“ **Text Analysis**: Counting and text processing utilities
- ๐Ÿค– **LLM Model**: Llama-3.3-70B-Instruct for advanced reasoning
**Instructions:**
1. ๐Ÿ”„ **Clone this space** and customize the agent as needed
2. ๐Ÿ”‘ **Log in** to your Hugging Face account using the button below
3. ๐Ÿš€ **Click 'Run Evaluation'** to test your agent on GAIA benchmark questions
4. ๐ŸŽฏ **Target**: Score >30% for course certification
**Goal**: Answer GAIA level 1 validation questions with exact match precision.
---
โš ๏ธ **Note**: Processing all questions may take several minutes due to the complexity of reasoning required.
"""
)
gr.LoginButton()
run_button = gr.Button("๐Ÿš€ Run Evaluation & Submit All Answers", variant="primary", size="lg")
status_output = gr.Textbox(
label="๐Ÿ“Š Evaluation Status & Results",
lines=8,
interactive=False,
placeholder="Click the button above to start the evaluation..."
)
results_table = gr.DataFrame(
label="๐Ÿ“‹ Questions and Agent Responses",
wrap=True,
headers=["Task ID", "Question", "Submitted Answer"]
)
run_button.click(
fn=run_and_submit_all,
outputs=[status_output, results_table]
)
if __name__ == "__main__":
print("\n" + "="*60)
print("๐Ÿค– ENHANCED GAIA AGENT STARTING UP")
print("="*60)
# Setup authentication
print("๐Ÿ” Setting up HuggingFace authentication...")
auth_success = setup_authentication()
# Check for SPACE_HOST and SPACE_ID at startup for information
space_host_startup = os.getenv("SPACE_HOST")
space_id_startup = os.getenv("SPACE_ID")
if space_host_startup:
print(f"โœ… SPACE_HOST found: {space_host_startup}")
print(f" ๐ŸŒ Runtime URL: https://{space_host_startup}.hf.space")
else:
print("โ„น๏ธ SPACE_HOST environment variable not found (running locally?).")
if not auth_success:
print("๐Ÿ’ก For local testing, you may need to run:")
print(" from huggingface_hub import notebook_login")
print(" notebook_login()")
if space_id_startup:
print(f"โœ… SPACE_ID found: {space_id_startup}")
print(f" ๐Ÿ“ Repo URL: https://huggingface.co/spaces/{space_id_startup}")
print(f" ๐Ÿ”— Code URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
else:
print("โ„น๏ธ SPACE_ID environment variable not found (running locally?).")
print("="*60)
print("๐Ÿš€ Launching Enhanced GAIA Agent Interface...")
print("๐ŸŽฏ Target: >30% score on GAIA benchmark")
print("="*60 + "\n")
demo.launch(debug=True, share=False)