Final_Assignment_Template

Sleeping

jwgcurrie

hopefully app.py fixed

cfde807 8 months ago

8.55 kB

	import os
	import gradio as gr
	import requests
	import inspect
	import pandas as pd

	# Import necessary libraries for LangChain Agent
	from langchain_huggingface import HuggingFaceEndpoint
	from langchain.agents import AgentExecutor, create_react_agent
	from langchain import hub
	from langchain_community.utilities import SerpAPIWrapper
	from langchain.tools import Tool # Moved to top-level import for clarity and consistent access

	# --- Constants ---
	DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

	# --- LangChain Agent Definition ---
	class GAIAAgent:
	def __init__(self):
	print("GAIAAgent initialized using LangChain.")

	repo_id = "mistralai/Mistral-7B-Instruct-v0.3"

	self.llm = HuggingFaceEndpoint(
	endpoint_url=f"https://api-inference.huggingface.co/models/{repo_id}", # Explicitly set endpoint URL
	temperature=0.1, # Directly pass model parameters
	max_new_tokens=512, # Directly pass model parameters (common for generation)
	huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN") # Env var name is correct
	)

	# Define tools for the agent
	self.tools = []

	# Initialize SerpAPIWrapper tool
	# It will automatically pick up SERPAPI_API_KEY from environment variables
	self.serpapi_tool = SerpAPIWrapper()

	# Define a LangChain tool function that uses the SerpAPIWrapper
	# The description is crucial for the LLM to know when to use this tool
	web_search_tool = Tool(
	name="Serpapi Search",
	description="useful for when you need to answer questions about current events or facts. Input should be a search query.",
	func=self.serpapi_tool.run,
	)
	self.tools.append(web_search_tool)

	self.prompt = hub.pull("hwchase17/react")

	# Increased max_iterations and max_execution_time for better performance on complex GAIA questions
	self.agent = create_react_agent(self.llm, self.tools, self.prompt)
	self.agent_executor = AgentExecutor(
	agent=self.agent,
	tools=self.tools,
	verbose=True,
	handle_parsing_errors=True,
	max_iterations=25, # Increased from default, allow more thinking steps
	max_execution_time=180.0 # Increased from default, allow more time per question (3 minutes)
	)


	def __call__(self, question: str) -> str:
	print(f"Agent received question (first 50 chars): {question[:50]}...")
	try:
	response = self.agent_executor.invoke({"input": question})
	agent_answer = response["output"]
	print(f"Agent returning answer: {agent_answer}")
	return agent_answer
	except Exception as e:
	print(f"Error during agent execution: {e}")
	# Ensure a clean error message is returned when agent execution fails
	return f"Agent execution failed: {e}. Check tool outputs and LLM reasoning."

	def run_and_submit_all(profile: gr.OAuthProfile \| None):
	"""
	Fetches all questions, runs the GAIAAgent on them, submits all answers,
	and displays the results.
	"""
	# --- Determine HF Space Runtime URL and Repo URL ---
	space_id = os.getenv("SPACE_ID")

	# Robust username handling
	current_username = "anonymous_user" # Default for local testing or if not logged in
	if profile and profile.username:
	current_username = profile.username
	print(f"User logged in: {current_username}")
	else:
	print("User not logged in through Gradio OAuth, using default username for submission.")
	# Do NOT return here, allow the app to run locally without HF login

	api_url = DEFAULT_API_URL
	questions_url = f"{api_url}/questions"
	submit_url = f"{api_url}/submit"

	try:
	agent = GAIAAgent()
	except Exception as e:
	# Improved error handling for agent initialization
	error_message = f"Failed to initialize agent: {e}. Please ensure all required API keys (HUGGINGFACEHUB_API_TOKEN, SERPAPI_API_KEY, GOOGLE_API_KEY) are set in Hugging Face Space secrets, and model terms accepted."
	print(error_message)
	# Return empty DataFrame with the error message to display in Gradio UI
	return error_message, pd.DataFrame([{"Question ID": "N/A", "Question": "Agent Initialization Failed", "Agent Answer": str(e)}])

	try:
	response = requests.get(questions_url)
	response.raise_for_status()
	questions_data = response.json()
	questions = questions_data # Assume questions_data is directly the list of questions
	print(f"Fetched {len(questions)} questions.")
	except requests.exceptions.RequestException as e:
	return f"Failed to fetch questions: {e}", pd.DataFrame() # Return empty DataFrame on fetch error

	all_answers = []
	results_for_display = []

	for q_data in questions:
	q_id = q_data.get("task_id") # Use 'task_id' as per the data
	q_text = q_data.get("question")
	if not q_id or not q_text:
	print(f"Skipping malformed question data: {q_data}")
	continue

	print(f"\n--- Processing Question ID: {q_id} ---")
	agent_answer = agent(q_text) # This calls the GAIAAgent.__call__ method

	# Ensured submission keys are correct as per GAIA benchmark expectations
	all_answers.append({"task_id": q_id, "submitted_answer": agent_answer})
	results_for_display.append({"Question ID": q_id, "Question": q_text, "Agent Answer": agent_answer})

	results_df = pd.DataFrame(results_for_display)

	submission_data = {
	"answers": all_answers,
	"space_id": space_id, # Include SPACE_ID for the leaderboard link
	"username": current_username, # Use the robustly determined username
	"agent_code": inspect.getsource(GAIAAgent), # Add agent code (for debugging on leaderboard)
	}

	try:
	print(f"\nSubmitting {len(all_answers)} answers to {submit_url}...")
	submit_response = requests.post(submit_url, json=submission_data)
	submit_response.raise_for_status()
	submission_result = submit_response.json()
	print("Submission successful!")
	print(f"Submission Result: {submission_result}")

	score = submission_result.get("score", "N/A")
	leaderboard_link = submission_result.get("leaderboard_link", "")
	status_message = f"Evaluation complete! Your score: {score:.2f}%\n"
	if leaderboard_link:
	status_message += f"Check the leaderboard: {leaderboard_link}\n"
	else:
	status_message += "No leaderboard link provided."

	return status_message, results_df

	except requests.exceptions.RequestException as e:
	error_message = f"Failed to submit answers: {e}"
	if hasattr(e, 'response') and e.response is not None:
	error_message += f"\nResponse: {e.response.text}"
	print(error_message)
	return error_message, results_df

	# (Keep Gradio UI setup as is)
	with gr.Blocks() as demo:
	gr.Markdown(
	"""
	# Unit 4: Agentic AI for GAIA Benchmark

	This Gradio app allows you to run your agent against the GAIA benchmark questions and submit your answers.
	Your goal is to modify the `GAIAAgent` class in `app.py` to achieve a score above 30%.
	"""
	)
	gr.LoginButton()

	run_button = gr.Button("Run Evaluation & Submit All Answers")

	status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
	results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)

	run_button.click(
	fn=run_and_submit_all,
	outputs=[status_output, results_table]
	)

	if __name__ == "__main__":
	print("\n" + "-"30 + " App Starting " + "-"30)
	space_host_startup = os.getenv("SPACE_HOST")
	space_id_startup = os.getenv("SPACE_ID")

	if space_host_startup:
	print(f"✅ SPACE_HOST found: {space_host_startup}")
	print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
	else:
	print("ℹ️ SPACE_HOST environment variable not found (running locally?).")

	if space_id_startup:
	print(f"✅ SPACE_ID found: {space_id_startup}")
	print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
	print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
	else:
	print("ℹ️ SPACE_ID environment variable not found...")
	demo.launch()