Answeragent

Runtime error

App Files Files Community

Answeragent / app.py

Nitinguleria

Update app.py

2e5751e verified 11 months ago

raw

history blame

16.6 kB

	import os
	import gradio as gr
	import requests
	import pandas as pd
	import sympy
	import re
	from duckduckgo_search import DDGS
	from langgraph.graph import StateGraph, END
	from typing import TypedDict, Literal

	# Default API URL - you may need to update this
	DEFAULT_API_URL = "https://huggingface.co/api/spaces/evaluate"

	# --- Enhanced Tools for GAIA Benchmark ---

	def wikipedia_search_tool(input: str) -> str:
	"""Enhanced search tool with better result processing"""
	try:
	ddgs = DDGS()
	results = ddgs.text(input, max_results=5)
	if results:
	# Combine multiple results for better coverage
	combined_info = []
	for i, result in enumerate(results[:3]):
	body = result.get("body", "")
	if body and len(body) > 10:
	combined_info.append(f"Source {i+1}: {body}")

	if combined_info:
	return "\n\n".join(combined_info)
	return "No relevant information found."
	except Exception as e:
	return f"Search Error: {e}"

	def math_solver_tool(input: str) -> str:
	"""Enhanced math solver with better parsing"""
	try:
	# Clean and preprocess the input
	cleaned_input = input.replace("^", "**").replace("÷", "/")

	# Try to extract mathematical expressions
	math_patterns = [
	r'[\d\+\-\*/\^\.\s]+',
	r'[a-zA-Z\d\+\-\/\^\.\s]+=.',
	]

	for pattern in math_patterns:
	matches = re.findall(pattern, cleaned_input)
	if matches:
	try:
	expr = sympy.sympify(matches[0])
	result = expr.evalf()
	return str(result)
	except:
	continue

	# Direct sympy attempt
	expr = sympy.sympify(cleaned_input)
	result = expr.evalf()
	return str(result)

	except Exception as e:
	# Try basic eval as fallback (with safety checks)
	try:
	# Only allow safe mathematical operations
	safe_chars = set('0123456789+-*/.() ')
	if all(c in safe_chars for c in input.replace(' ', '')):
	result = eval(input)
	return str(result)
	except:
	pass
	return f"Could not solve mathematical expression: {e}"

	def code_execution_tool(input: str) -> str:
	"""Enhanced code execution with better safety and Python support"""
	try:
	# Create a safe execution environment
	safe_globals = {
	'__builtins__': {
	'len': len, 'str': str, 'int': int, 'float': float,
	'list': list, 'dict': dict, 'tuple': tuple, 'set': set,
	'sum': sum, 'max': max, 'min': min, 'abs': abs,
	'round': round, 'range': range, 'enumerate': enumerate,
	'zip': zip, 'sorted': sorted, 'reversed': reversed,
	'print': print
	},
	'math': __import__('math'),
	're': __import__('re'),
	}

	local_vars = {}

	# Try to execute the code
	if 'return ' in input or 'print(' in input:
	exec(input, safe_globals, local_vars)
	# Look for printed output or return values
	if 'result' in local_vars:
	return str(local_vars['result'])
	return "Code executed successfully"
	else:
	# Try to evaluate as expression
	result = eval(input, safe_globals, local_vars)
	return str(result)

	except Exception as e:
	return f"Code execution error: {e}"

	def general_reasoning_tool(input: str) -> str:
	"""Tool for general reasoning and analysis"""
	# This is a placeholder for more advanced reasoning
	# In a real implementation, you might use an LLM here

	# Simple keyword-based analysis
	if any(word in input.lower() for word in ['compare', 'difference', 'similar', 'contrast']):
	return f"Analysis: This appears to be a comparison question. Key factors to consider: {input[:200]}..."
	elif any(word in input.lower() for word in ['cause', 'reason', 'why', 'because']):
	return f"Reasoning: This is asking about causation. Consider multiple factors that might contribute to: {input[:200]}..."
	else:
	return f"General analysis: {input[:300]}..."

	# --- State definition ---

	class AgentState(TypedDict):
	question: str
	response: str
	tool_used: str

	# --- Enhanced Routing logic for GAIA ---

	def route_question(state: AgentState) -> Literal["math", "code", "search", "reasoning"]:
	"""Enhanced routing for GAIA benchmark questions"""
	q = state["question"].lower()

	# Math-related keywords
	math_keywords = [
	"solve", "calculate", "evaluate", "compute", "sum", "multiply",
	"divide", "percentage", "%", "=", "equation", "formula", "average",
	"total", "cost", "price", "number", "how many", "how much"
	]

	# Code-related keywords
	code_keywords = [
	"python", "code", "function", "return", "algorithm", "program",
	"script", "execute", "run", "implementation"
	]

	# Search-related keywords
	search_keywords = [
	"what", "who", "when", "where", "which", "capital", "country",
	"invented", "created", "founded", "established", "located", "known for"
	]

	# Check for mathematical expressions or numbers
	if (any(k in q for k in math_keywords) or
	re.search(r'\d+[\+\-\*/\^]\d+', q) or
	re.search(r'\$\d+', q) or
	'%' in q):
	return "math"
	elif any(k in q for k in code_keywords):
	return "code"
	elif any(k in q for k in search_keywords):
	return "search"
	else:
	return "reasoning"

	# --- Node functions ---

	def math_node(state: AgentState) -> AgentState:
	response = math_solver_tool(state["question"])
	return {
	"question": state["question"],
	"response": response,
	"tool_used": "math"
	}

	def code_node(state: AgentState) -> AgentState:
	response = code_execution_tool(state["question"])
	return {
	"question": state["question"],
	"response": response,
	"tool_used": "code"
	}

	def search_node(state: AgentState) -> AgentState:
	response = wikipedia_search_tool(state["question"])
	return {
	"question": state["question"],
	"response": response,
	"tool_used": "search"
	}

	def reasoning_node(state: AgentState) -> AgentState:
	response = general_reasoning_tool(state["question"])
	return {
	"question": state["question"],
	"response": response,
	"tool_used": "reasoning"
	}

	# --- LangGraph setup with corrected API ---

	def create_agent_graph():
	"""Create the agent graph using the correct LangGraph API"""

	# Create the state graph
	workflow = StateGraph(AgentState)

	# Add all the nodes
	workflow.add_node("math", math_node)
	workflow.add_node("code", code_node)
	workflow.add_node("search", search_node)
	workflow.add_node("reasoning", reasoning_node)

	# Add conditional edges from entry point
	workflow.add_conditional_edges(
	"__start__",
	route_question,
	{
	"math": "math",
	"code": "code",
	"search": "search",
	"reasoning": "reasoning"
	}
	)

	# All nodes end the workflow
	workflow.add_edge("math", END)
	workflow.add_edge("code", END)
	workflow.add_edge("search", END)
	workflow.add_edge("reasoning", END)

	return workflow.compile()

	# Create the compiled graph
	app_graph = create_agent_graph()

	# --- Enhanced Agent wrapper ---

	class BasicAgent:
	def __init__(self):
	self.graph = app_graph
	print("Enhanced LangGraph Agent initialized for GAIA benchmark.")

	def __call__(self, question: str) -> str:
	"""Process a question and return an answer"""
	try:
	state = {
	"question": question,
	"response": "",
	"tool_used": ""
	}

	result = self.graph.invoke(state)

	# Post-process the response for better formatting
	response = result.get("response", "No response generated")
	tool_used = result.get("tool_used", "unknown")

	# For math problems, try to extract just the numerical answer
	if tool_used == "math" and response:
	# Try to extract the final number
	numbers = re.findall(r'-?\d+\.?\d*', response)
	if numbers:
	return numbers[-1] # Return the last number found

	return str(response)

	except Exception as e:
	print(f"Error in agent processing: {e}")
	return f"Error: Could not process the question - {e}"

	def run_and_submit_all(profile: gr.OAuthProfile \| None):
	"""
	Fetches all questions, runs the BasicAgent on them, submits all answers,
	and displays the results.
	"""
	# --- Determine HF Space Runtime URL and Repo URL ---
	space_id = os.getenv("SPACE_ID")

	if profile:
	username = f"{profile.username}"
	print(f"User logged in: {username}")
	else:
	print("User not logged in.")
	return "Please Login to Hugging Face with the button.", None

	api_url = DEFAULT_API_URL
	questions_url = f"{api_url}/questions"
	submit_url = f"{api_url}/submit"

	# 1. Instantiate Agent
	try:
	agent = BasicAgent()
	except Exception as e:
	print(f"Error instantiating agent: {e}")
	return f"Error initializing agent: {e}", None

	agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "local"
	print(f"Agent code location: {agent_code}")

	# 2. Fetch Questions
	print(f"Fetching questions from: {questions_url}")
	try:
	response = requests.get(questions_url, timeout=15)
	response.raise_for_status()
	questions_data = response.json()
	if not questions_data:
	print("Fetched questions list is empty.")
	return "Fetched questions list is empty or invalid format.", None
	print(f"Fetched {len(questions_data)} questions.")
	except requests.exceptions.RequestException as e:
	print(f"Error fetching questions: {e}")
	return f"Error fetching questions: {e}", None
	except Exception as e:
	print(f"An unexpected error occurred fetching questions: {e}")
	return f"An unexpected error occurred fetching questions: {e}", None

	# 3. Run Agent on all questions
	results_log = []
	answers_payload = []
	print(f"Running agent on {len(questions_data)} questions...")

	for i, item in enumerate(questions_data):
	task_id = item.get("task_id")
	question_text = item.get("question")

	if not task_id or question_text is None:
	print(f"Skipping item with missing task_id or question: {item}")
	continue

	print(f"Processing question {i+1}/{len(questions_data)}: {task_id}")

	try:
	submitted_answer = agent(question_text)
	answers_payload.append({
	"task_id": task_id,
	"submitted_answer": submitted_answer
	})
	results_log.append({
	"Task ID": task_id,
	"Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
	"Submitted Answer": submitted_answer
	})
	except Exception as e:
	print(f"Error running agent on task {task_id}: {e}")
	error_answer = f"AGENT ERROR: {e}"
	answers_payload.append({
	"task_id": task_id,
	"submitted_answer": error_answer
	})
	results_log.append({
	"Task ID": task_id,
	"Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
	"Submitted Answer": error_answer
	})

	if not answers_payload:
	print("Agent did not produce any answers to submit.")
	return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)

	# 4. Prepare Submission
	submission_data = {
	"username": username.strip(),
	"agent_code": agent_code,
	"answers": answers_payload
	}

	print(f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'...")

	# 5. Submit answers
	print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
	try:
	response = requests.post(submit_url, json=submission_data, timeout=120)
	response.raise_for_status()
	result_data = response.json()

	final_status = (
	f"Submission Successful!\n"
	f"User: {result_data.get('username', username)}\n"
	f"Overall Score: {result_data.get('score', 'N/A')}% "
	f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
	f"Message: {result_data.get('message', 'No message received.')}"
	)
	print("Submission successful.")
	results_df = pd.DataFrame(results_log)
	return final_status, results_df

	except requests.exceptions.HTTPError as e:
	error_detail = f"Server responded with status {e.response.status_code}."
	try:
	error_json = e.response.json()
	error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
	except:
	error_detail += f" Response: {e.response.text[:500]}"
	status_message = f"Submission Failed: {error_detail}"
	print(status_message)
	results_df = pd.DataFrame(results_log)
	return status_message, results_df

	except Exception as e:
	status_message = f"Submission error: {e}"
	print(status_message)
	results_df = pd.DataFrame(results_log)
	return status_message, results_df

	# --- Gradio Interface ---
	with gr.Blocks(title="GAIA Benchmark Agent") as demo:
	gr.Markdown("# Enhanced GAIA Benchmark Agent")
	gr.Markdown(
	"""
	Enhanced Agent for GAIA Benchmark - Targeting 60% Accuracy

	Features:
	- Enhanced mathematical problem solving with symbolic computation
	- Improved search capabilities with multiple source aggregation
	- Safe code execution environment
	- Smart question routing (math/code/search/reasoning)
	- Better answer formatting and extraction

	Instructions:
	1. Log in to your Hugging Face account using the button below
	2. Click 'Run Evaluation & Submit All Answers' to start the benchmark
	3. The agent will process all questions and submit answers automatically

	Note: Processing may take several minutes depending on the number of questions.
	"""
	)

	gr.LoginButton()

	run_button = gr.Button("Run Evaluation & Submit All Answers", variant="primary")

	status_output = gr.Textbox(
	label="Status & Results",
	lines=8,
	interactive=False,
	placeholder="Click the button above to start the evaluation..."
	)

	results_table = gr.DataFrame(
	label="Questions and Agent Responses",
	wrap=True,
	interactive=False
	)

	run_button.click(
	fn=run_and_submit_all,
	inputs=[],
	outputs=[status_output, results_table]
	)

	if __name__ == "__main__":
	print("\n" + "="*50)
	print("🚀 GAIA Benchmark Agent Starting")
	print("="*50)

	# Environment info
	space_host = os.getenv("SPACE_HOST")
	space_id = os.getenv("SPACE_ID")

	if space_host:
	print(f"✅ SPACE_HOST: {space_host}")
	print(f" Runtime URL: https://{space_host}.hf.space")
	else:
	print("ℹ️ Running locally (SPACE_HOST not found)")

	if space_id:
	print(f"✅ SPACE_ID: {space_id}")
	print(f" Repo URL: https://huggingface.co/spaces/{space_id}")
	else:
	print("ℹ️ SPACE_ID not found")

	print("="*50 + "\n")

	print("🎯 Target: 60% accuracy on GAIA benchmark")
	print("🔧 Enhanced tools: Math, Code, Search, Reasoning")
	print("\nLaunching Gradio interface...")

	demo.launch(debug=True, share=False)