import os import gradio as gr import requests import inspect import pandas as pd # HuggingFace authentication from huggingface_hub import login import warnings # smolagents imports from smolagents import CodeAgent, InferenceClientModel, tool import re from typing import Optional, Union, Any import json import csv import io import math import statistics # Additional imports for custom tools import base64 from urllib.parse import urlparse import mimetypes # (Keep Constants as is) # --- Constants --- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" # --- Custom Tools for GAIA Tasks --- @tool def visit_webpage(url: str) -> str: """Visits a webpage at the given URL and returns its content as text. Args: url: The URL of the webpage to visit Returns: The content of the webpage as text, or an error message if the request fails """ try: import requests from bs4 import BeautifulSoup headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' } response = requests.get(url, headers=headers, timeout=10) response.raise_for_status() soup = BeautifulSoup(response.content, 'html.parser') # Remove script and style elements for script in soup(["script", "style"]): script.decompose() # Get text content text = soup.get_text() # Clean up text lines = (line.strip() for line in text.splitlines()) chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) text = ' '.join(chunk for chunk in chunks if chunk) # Limit text length to avoid token limits if len(text) > 8000: text = text[:8000] + "... [Content truncated]" return text except Exception as e: return f"Error visiting webpage: {str(e)}" @tool def calculate_math(expression: str) -> str: """Safely evaluates mathematical expressions and performs calculations. Args: expression: A mathematical expression to evaluate (e.g., "2+2", "sqrt(16)", "log(100)") Returns: The result of the calculation or an error message """ try: import math import re # Clean the expression expression = expression.strip() # Replace common mathematical functions expression = re.sub(r'\blog\b', 'math.log10', expression) expression = re.sub(r'\bln\b', 'math.log', expression) expression = re.sub(r'\bsqrt\b', 'math.sqrt', expression) expression = re.sub(r'\bsin\b', 'math.sin', expression) expression = re.sub(r'\bcos\b', 'math.cos', expression) expression = re.sub(r'\btan\b', 'math.tan', expression) expression = re.sub(r'\babs\b', 'abs', expression) expression = re.sub(r'\bpi\b', 'math.pi', expression) expression = re.sub(r'\be\b', 'math.e', expression) # Define safe functions for eval safe_dict = { "__builtins__": {}, "math": math, "abs": abs, "round": round, "min": min, "max": max, "sum": sum, "len": len, "pow": pow, } result = eval(expression, safe_dict) return str(result) except Exception as e: return f"Error in calculation: {str(e)}" @tool def analyze_data(data: str, operation: str = "summary") -> str: """Analyzes numerical data and performs statistical operations. Args: data: Comma-separated numerical data or JSON array operation: Type of analysis ("summary", "mean", "median", "std", "count", "sum", "min", "max") Returns: The result of the data analysis """ try: import json import statistics # Parse the data if data.startswith('[') and data.endswith(']'): # JSON array format numbers = json.loads(data) else: # Comma-separated format numbers = [float(x.strip()) for x in data.split(',') if x.strip()] if not numbers: return "No valid numerical data provided" if operation == "summary": result = { "count": len(numbers), "sum": sum(numbers), "mean": statistics.mean(numbers), "median": statistics.median(numbers), "min": min(numbers), "max": max(numbers) } if len(numbers) > 1: result["std"] = statistics.stdev(numbers) return json.dumps(result, indent=2) elif operation == "mean": return str(statistics.mean(numbers)) elif operation == "median": return str(statistics.median(numbers)) elif operation == "std": return str(statistics.stdev(numbers)) if len(numbers) > 1 else "0" elif operation == "count": return str(len(numbers)) elif operation == "sum": return str(sum(numbers)) elif operation == "min": return str(min(numbers)) elif operation == "max": return str(max(numbers)) else: return f"Unknown operation: {operation}" except Exception as e: return f"Error in data analysis: {str(e)}" @tool def extract_numbers(text: str) -> str: """Extracts all numbers from a text string. Args: text: Text containing numbers Returns: Comma-separated list of extracted numbers """ try: import re # Pattern to match integers and floats (including negative numbers) pattern = r'-?\d+(?:\.\d+)?' numbers = re.findall(pattern, text) if not numbers: return "No numbers found in the text" return ', '.join(numbers) except Exception as e: return f"Error extracting numbers: {str(e)}" @tool def process_file_content(file_url: str) -> str: """Downloads and processes content from a file URL, supporting various formats. Args: file_url: URL to a file (PDF, CSV, TXT, etc.) Returns: The processed content of the file as text """ try: import requests from urllib.parse import urlparse import mimetypes headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' } response = requests.get(file_url, headers=headers, timeout=30) response.raise_for_status() # Get content type content_type = response.headers.get('content-type', '').lower() # Process based on content type if 'text/' in content_type or 'csv' in content_type: return response.text elif 'json' in content_type: return json.dumps(response.json(), indent=2) else: # For binary files, return info about the file return f"Binary file detected. Size: {len(response.content)} bytes. Content-Type: {content_type}" except Exception as e: return f"Error processing file: {str(e)}" @tool def solve_equation(equation: str) -> str: """Solves mathematical equations and expressions symbolically. Args: equation: Mathematical equation to solve (e.g., "x^2 + 2*x - 3 = 0") Returns: The solution to the equation """ try: import sympy as sp import re # Clean the equation equation = equation.replace('=', '==') # Define common variables x, y, z, t = sp.symbols('x y z t') variables = {'x': x, 'y': y, 'z': z, 't': t} # Replace common math functions equation = re.sub(r'\bsqrt\b', 'sp.sqrt', equation) equation = re.sub(r'\bsin\b', 'sp.sin', equation) equation = re.sub(r'\bcos\b', 'sp.cos', equation) equation = re.sub(r'\btan\b', 'sp.tan', equation) equation = re.sub(r'\blog\b', 'sp.log', equation) equation = re.sub(r'\bexp\b', 'sp.exp', equation) # Parse and solve expr = eval(equation, {"sp": sp, "x": x, "y": y, "z": z, "t": t}) if '==' in equation: # It's an equation to solve solution = sp.solve(expr, x) return str(solution) else: # It's an expression to simplify simplified = sp.simplify(expr) return str(simplified) except Exception as e: return f"Error solving equation: {str(e)}" @tool def parse_structured_data(data: str, format_type: str = "auto") -> str: """Parses and analyzes structured data (CSV, JSON, etc.). Args: data: The structured data as a string format_type: Format type ("csv", "json", "auto") Returns: Analysis of the structured data """ try: import pandas as pd import json from io import StringIO if format_type == "auto": # Auto-detect format data_clean = data.strip() if data_clean.startswith('{') or data_clean.startswith('['): format_type = "json" elif ',' in data_clean and '\n' in data_clean: format_type = "csv" if format_type == "json": parsed = json.loads(data) return json.dumps(parsed, indent=2) elif format_type == "csv": df = pd.read_csv(StringIO(data)) result = f"DataFrame shape: {df.shape}\n" result += f"Columns: {list(df.columns)}\n" result += f"First 5 rows:\n{df.head().to_string()}\n" if df.select_dtypes(include=['number']).columns.any(): result += f"Numerical summary:\n{df.describe().to_string()}" return result else: return f"Unsupported format: {format_type}" except Exception as e: return f"Error parsing data: {str(e)}" def setup_authentication(): """Setup HuggingFace authentication for the app.""" try: # Try to get HF token from environment variables hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN") if hf_token: login(token=hf_token) print("āœ… Authenticated with HuggingFace using environment token") return True else: print("ā„¹ļø No HF token found in environment") print("šŸ’” If running locally, please set HF_TOKEN environment variable") print("šŸ’” For Spaces deployment, this should work automatically") return False except Exception as e: print(f"āš ļø Authentication issue: {e}") return False # --- Enhanced Agent Definition --- class GAIAAgent: def __init__(self): print("GAIAAgent initializing with smolagents...") # Handle HuggingFace authentication try: # Try to get HF token from environment (for Spaces) hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN") if hf_token: login(token=hf_token) print("āœ… Authenticated with HuggingFace using environment token") else: # In Spaces, authentication might already be handled print("ā„¹ļø No HF token found in environment, proceeding without explicit login") except Exception as e: print(f"āš ļø Authentication warning: {e}") # Initialize the model with fallback options try: hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN") # Try powerful model first - but use one that's more widely available model_id = "meta-llama/Llama-3.3-70B-Instruct" self.model = InferenceClientModel(model_id=model_id, token=hf_token) print(f"āœ… Model initialized successfully: {model_id}") except Exception as e: print(f"āš ļø Error with primary model: {e}") try: # Fallback to a widely available model fallback_model = "microsoft/DialoGPT-medium" self.model = InferenceClientModel(model_id=fallback_model) print(f"āœ… Fallback model initialized: {fallback_model}") except Exception as e2: print(f"āš ļø Error with fallback model: {e2}") try: # Last resort - use default (should work without authentication) self.model = InferenceClientModel() print("āœ… Default model initialized") except Exception as e3: print(f"āŒ Critical error - could not initialize any model: {e3}") raise e3 # Initialize tools (custom tools + base tools from smolagents) self.custom_tools = [ visit_webpage, calculate_math, analyze_data, extract_numbers, process_file_content, solve_equation, parse_structured_data ] # Create the CodeAgent with enhanced capabilities try: self.agent = CodeAgent( tools=self.custom_tools, model=self.model, add_base_tools=True, # Adds DuckDuckGoSearchTool and other base tools additional_authorized_imports=[ 'requests', 'bs4', 'json', 'csv', 'math', 'statistics', 're', 'urllib.parse', 'base64', 'datetime', 'calendar', 'pandas', 'numpy', 'sympy', 'scipy' ], max_steps=15, # Increased for complex multi-step reasoning verbosity_level=1 # Reduce verbosity for cleaner output ) print("āœ… GAIA Agent initialized successfully with PRO model and enhanced tools") except Exception as e: print(f"āŒ Error initializing agent: {e}") raise e def __call__(self, question: str) -> str: """Process a question and return the answer.""" try: print(f"šŸ¤– Processing question: {question[:100]}...") # Enhanced GAIA-optimized prompt enhanced_prompt = f"""You are an expert AI assistant designed to excel at the GAIA benchmark. You must answer questions with perfect accuracy using a systematic approach. CRITICAL INSTRUCTIONS FOR GAIA SUCCESS: 1. ANALYZE THE QUESTION: Read carefully and identify what type of question this is: - Mathematical calculation or equation - Information retrieval from web/files - Data analysis or statistics - Multi-step reasoning problem - Factual lookup 2. CHOOSE YOUR APPROACH: - For math: Use calculate_math tool or solve_equation for complex equations - For web info: Use DuckDuckGoSearchTool then visit_webpage for details - For files: Use process_file_content to download and analyze - For data: Use analyze_data or parse_structured_data - For numbers in text: Use extract_numbers first 3. BE SYSTEMATIC: - Break complex questions into steps - Use multiple tools if needed - Verify your reasoning - Double-check calculations 4. ANSWER FORMAT: - Give ONLY the final answer - No explanations, no "FINAL ANSWER:" prefix - For numbers: just the number (e.g., "42", not "42.0") - For text: just the text without quotes - Be precise with units, dates, and formatting 5. ACCURACY IS PARAMOUNT: - GAIA requires exact matches - Round numbers appropriately - Use proper case and spelling - Include units when relevant Question: {question} Think step by step, use the appropriate tools, and provide only the final answer:""" # Run the agent with enhanced error handling try: result = self.agent.run(enhanced_prompt) except Exception as api_error: if "402" in str(api_error) or "Payment Required" in str(api_error): print(f"āš ļø API quota issue (you have Pro, this shouldn't happen): {api_error}") result = f"API Error: {str(api_error)}" else: raise api_error # Enhanced answer cleaning for GAIA precision if isinstance(result, str): result = result.strip() # Remove any explanatory text before the answer lines = result.split('\n') for i, line in enumerate(lines): line = line.strip() if line and not line.startswith(('Step', 'First', 'Next', 'Then', 'Finally', 'Therefore', 'So,', 'Thus')): result = line break # Remove common prefixes result = re.sub(r'^(FINAL\s*ANSWER\s*:?\s*)', '', result, flags=re.IGNORECASE) result = re.sub(r'^(ANSWER\s*:?\s*)', '', result, flags=re.IGNORECASE) result = re.sub(r'^(RESULT\s*:?\s*)', '', result, flags=re.IGNORECASE) result = re.sub(r'^(THE\s*ANSWER\s*IS\s*:?\s*)', '', result, flags=re.IGNORECASE) # Remove quotes if the entire answer is wrapped if (result.startswith('"') and result.endswith('"')) or (result.startswith("'") and result.endswith("'")): result = result[1:-1] # Clean up decimal numbers (e.g., "42.0" -> "42") if re.match(r'^\d+\.0+$', result): result = str(int(float(result))) result = result.strip() print(f"āœ… Agent response: {result}") return result else: print(f"āœ… Agent response: {str(result)}") return str(result) except Exception as e: error_msg = f"Error processing question: {str(e)}" print(f"āŒ {error_msg}") return error_msg def run_and_submit_all(profile: gr.OAuthProfile | None): """ Fetches all questions, runs the GAIAAgent on them, submits all answers, and displays the results. """ # --- Determine HF Space Runtime URL and Repo URL --- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code if profile: username = f"{profile.username}" print(f"User logged in: {username}") else: print("User not logged in.") return "Please Login to Hugging Face with the button.", None api_url = DEFAULT_API_URL questions_url = f"{api_url}/questions" submit_url = f"{api_url}/submit" # 1. Instantiate Enhanced Agent try: print("šŸš€ Initializing GAIA Agent with smolagents...") agent = GAIAAgent() print("āœ… Enhanced agent ready for GAIA benchmark!") except Exception as e: error_msg = f"Error initializing agent: {e}" print(f"āŒ {error_msg}") return error_msg, None # In the case of an app running as a hugging Face space, this link points toward your codebase agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" print(f"Agent code link: {agent_code}") # 2. Fetch Questions print(f"šŸ“„ Fetching questions from: {questions_url}") try: response = requests.get(questions_url, timeout=15) response.raise_for_status() questions_data = response.json() if not questions_data: print("Fetched questions list is empty.") return "Fetched questions list is empty or invalid format.", None print(f"āœ… Fetched {len(questions_data)} questions from GAIA benchmark.") except requests.exceptions.RequestException as e: print(f"āŒ Error fetching questions: {e}") return f"Error fetching questions: {e}", None except requests.exceptions.JSONDecodeError as e: print(f"āŒ Error decoding JSON response from questions endpoint: {e}") print(f"Response text: {response.text[:500]}") return f"Error decoding server response for questions: {e}", None except Exception as e: print(f"āŒ An unexpected error occurred fetching questions: {e}") return f"An unexpected error occurred fetching questions: {e}", None # 3. Run Enhanced Agent results_log = [] answers_payload = [] print(f"šŸ¤– Running enhanced GAIA agent on {len(questions_data)} questions...") for i, item in enumerate(questions_data, 1): task_id = item.get("task_id") question_text = item.get("question") if not task_id or question_text is None: print(f"āš ļø Skipping item with missing task_id or question: {item}") continue print(f"\nšŸ“ Processing question {i}/{len(questions_data)} (ID: {task_id})") try: submitted_answer = agent(question_text) answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer}) results_log.append({ "Task ID": task_id, "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text, "Submitted Answer": submitted_answer }) print(f"āœ… Answer for {task_id}: {submitted_answer}") except Exception as e: error_msg = f"AGENT ERROR: {e}" print(f"āŒ Error running agent on task {task_id}: {e}") answers_payload.append({"task_id": task_id, "submitted_answer": error_msg}) results_log.append({ "Task ID": task_id, "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text, "Submitted Answer": error_msg }) if not answers_payload: print("āŒ Agent did not produce any answers to submit.") return "Agent did not produce any answers to submit.", pd.DataFrame(results_log) # 4. Prepare Submission submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload} status_update = f"šŸš€ Agent finished processing. Submitting {len(answers_payload)} answers for user '{username}'..." print(status_update) # 5. Submit print(f"šŸ“¤ Submitting {len(answers_payload)} answers to: {submit_url}") try: response = requests.post(submit_url, json=submission_data, timeout=60) response.raise_for_status() result_data = response.json() score = result_data.get('score', 'N/A') correct_count = result_data.get('correct_count', '?') total_attempted = result_data.get('total_attempted', '?') final_status = ( f"šŸŽ‰ Submission Successful!\n" f"šŸ‘¤ User: {result_data.get('username')}\n" f"šŸ“Š Overall Score: {score}% ({correct_count}/{total_attempted} correct)\n" f"šŸŽÆ Target: >30% for certification\n" f"šŸ’¬ Message: {result_data.get('message', 'No message received.')}" ) if isinstance(score, (int, float)) and score >= 30: final_status += f"\nšŸ† CONGRATULATIONS! You've achieved the target score of 30%!" elif isinstance(score, (int, float)): final_status += f"\nšŸ“ˆ Keep improving! You need {30-score:.1f}% more to reach the target." print("āœ… Submission successful!") results_df = pd.DataFrame(results_log) return final_status, results_df except requests.exceptions.HTTPError as e: error_detail = f"Server responded with status {e.response.status_code}." try: error_json = e.response.json() error_detail += f" Detail: {error_json.get('detail', e.response.text)}" except requests.exceptions.JSONDecodeError: error_detail += f" Response: {e.response.text[:500]}" status_message = f"āŒ Submission Failed: {error_detail}" print(status_message) results_df = pd.DataFrame(results_log) return status_message, results_df except requests.exceptions.Timeout: status_message = "āŒ Submission Failed: The request timed out." print(status_message) results_df = pd.DataFrame(results_log) return status_message, results_df except requests.exceptions.RequestException as e: status_message = f"āŒ Submission Failed: Network error - {e}" print(status_message) results_df = pd.DataFrame(results_log) return status_message, results_df except Exception as e: status_message = f"āŒ An unexpected error occurred during submission: {e}" print(status_message) results_df = pd.DataFrame(results_log) return status_message, results_df # --- Build Gradio Interface using Blocks --- with gr.Blocks(title="GAIA Agent Evaluation") as demo: gr.Markdown("# šŸ¤– Enhanced GAIA Agent Evaluation Runner") gr.Markdown( """ **Enhanced Agent for GAIA Benchmark Certification** This enhanced agent uses Hugging Face's **smolagents** framework with multiple specialized tools: - šŸ” **Web Search**: DuckDuckGoSearchTool (from base toolkit) for finding information - šŸ **Python Interpreter**: Code execution capabilities (from base toolkit) - 🌐 **Web Scraping**: Custom webpage visitor for content extraction - 🧮 **Mathematics**: Advanced calculation capabilities - šŸ“Š **Data Analysis**: Statistical analysis of numerical data - šŸ”¢ **Number Extraction**: Intelligent number parsing from text - šŸ“ **Text Analysis**: Counting and text processing utilities - šŸ¤– **LLM Model**: Llama-3.3-70B-Instruct for advanced reasoning **Instructions:** 1. šŸ”„ **Clone this space** and customize the agent as needed 2. šŸ”‘ **Log in** to your Hugging Face account using the button below 3. šŸš€ **Click 'Run Evaluation'** to test your agent on GAIA benchmark questions 4. šŸŽÆ **Target**: Score >30% for course certification **Goal**: Answer GAIA level 1 validation questions with exact match precision. --- āš ļø **Note**: Processing all questions may take several minutes due to the complexity of reasoning required. """ ) gr.LoginButton() run_button = gr.Button("šŸš€ Run Evaluation & Submit All Answers", variant="primary", size="lg") status_output = gr.Textbox( label="šŸ“Š Evaluation Status & Results", lines=8, interactive=False, placeholder="Click the button above to start the evaluation..." ) results_table = gr.DataFrame( label="šŸ“‹ Questions and Agent Responses", wrap=True, headers=["Task ID", "Question", "Submitted Answer"] ) run_button.click( fn=run_and_submit_all, outputs=[status_output, results_table] ) if __name__ == "__main__": print("\n" + "="*60) print("šŸ¤– ENHANCED GAIA AGENT STARTING UP") print("="*60) # Setup authentication print("šŸ” Setting up HuggingFace authentication...") auth_success = setup_authentication() # Check for SPACE_HOST and SPACE_ID at startup for information space_host_startup = os.getenv("SPACE_HOST") space_id_startup = os.getenv("SPACE_ID") if space_host_startup: print(f"āœ… SPACE_HOST found: {space_host_startup}") print(f" 🌐 Runtime URL: https://{space_host_startup}.hf.space") else: print("ā„¹ļø SPACE_HOST environment variable not found (running locally?).") if not auth_success: print("šŸ’” For local testing, you may need to run:") print(" from huggingface_hub import notebook_login") print(" notebook_login()") if space_id_startup: print(f"āœ… SPACE_ID found: {space_id_startup}") print(f" šŸ“ Repo URL: https://huggingface.co/spaces/{space_id_startup}") print(f" šŸ”— Code URL: https://huggingface.co/spaces/{space_id_startup}/tree/main") else: print("ā„¹ļø SPACE_ID environment variable not found (running locally?).") print("="*60) print("šŸš€ Launching Enhanced GAIA Agent Interface...") print("šŸŽÆ Target: >30% score on GAIA benchmark") print("="*60 + "\n") demo.launch(debug=True, share=False)