jwgcurrie
hopefully app.py fixed
cfde807
import os
import gradio as gr
import requests
import inspect
import pandas as pd
# Import necessary libraries for LangChain Agent
from langchain_huggingface import HuggingFaceEndpoint
from langchain.agents import AgentExecutor, create_react_agent
from langchain import hub
from langchain_community.utilities import SerpAPIWrapper
from langchain.tools import Tool # Moved to top-level import for clarity and consistent access
# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
# --- LangChain Agent Definition ---
class GAIAAgent:
def __init__(self):
print("GAIAAgent initialized using LangChain.")
repo_id = "mistralai/Mistral-7B-Instruct-v0.3"
self.llm = HuggingFaceEndpoint(
endpoint_url=f"https://api-inference.huggingface.co/models/{repo_id}", # Explicitly set endpoint URL
temperature=0.1, # Directly pass model parameters
max_new_tokens=512, # Directly pass model parameters (common for generation)
huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN") # Env var name is correct
)
# Define tools for the agent
self.tools = []
# Initialize SerpAPIWrapper tool
# It will automatically pick up SERPAPI_API_KEY from environment variables
self.serpapi_tool = SerpAPIWrapper()
# Define a LangChain tool function that uses the SerpAPIWrapper
# The description is crucial for the LLM to know when to use this tool
web_search_tool = Tool(
name="Serpapi Search",
description="useful for when you need to answer questions about current events or facts. Input should be a search query.",
func=self.serpapi_tool.run,
)
self.tools.append(web_search_tool)
self.prompt = hub.pull("hwchase17/react")
# Increased max_iterations and max_execution_time for better performance on complex GAIA questions
self.agent = create_react_agent(self.llm, self.tools, self.prompt)
self.agent_executor = AgentExecutor(
agent=self.agent,
tools=self.tools,
verbose=True,
handle_parsing_errors=True,
max_iterations=25, # Increased from default, allow more thinking steps
max_execution_time=180.0 # Increased from default, allow more time per question (3 minutes)
)
def __call__(self, question: str) -> str:
print(f"Agent received question (first 50 chars): {question[:50]}...")
try:
response = self.agent_executor.invoke({"input": question})
agent_answer = response["output"]
print(f"Agent returning answer: {agent_answer}")
return agent_answer
except Exception as e:
print(f"Error during agent execution: {e}")
# Ensure a clean error message is returned when agent execution fails
return f"Agent execution failed: {e}. Check tool outputs and LLM reasoning."
def run_and_submit_all(profile: gr.OAuthProfile | None):
"""
Fetches all questions, runs the GAIAAgent on them, submits all answers,
and displays the results.
"""
# --- Determine HF Space Runtime URL and Repo URL ---
space_id = os.getenv("SPACE_ID")
# Robust username handling
current_username = "anonymous_user" # Default for local testing or if not logged in
if profile and profile.username:
current_username = profile.username
print(f"User logged in: {current_username}")
else:
print("User not logged in through Gradio OAuth, using default username for submission.")
# Do NOT return here, allow the app to run locally without HF login
api_url = DEFAULT_API_URL
questions_url = f"{api_url}/questions"
submit_url = f"{api_url}/submit"
try:
agent = GAIAAgent()
except Exception as e:
# Improved error handling for agent initialization
error_message = f"Failed to initialize agent: {e}. Please ensure all required API keys (HUGGINGFACEHUB_API_TOKEN, SERPAPI_API_KEY, GOOGLE_API_KEY) are set in Hugging Face Space secrets, and model terms accepted."
print(error_message)
# Return empty DataFrame with the error message to display in Gradio UI
return error_message, pd.DataFrame([{"Question ID": "N/A", "Question": "Agent Initialization Failed", "Agent Answer": str(e)}])
try:
response = requests.get(questions_url)
response.raise_for_status()
questions_data = response.json()
questions = questions_data # Assume questions_data is directly the list of questions
print(f"Fetched {len(questions)} questions.")
except requests.exceptions.RequestException as e:
return f"Failed to fetch questions: {e}", pd.DataFrame() # Return empty DataFrame on fetch error
all_answers = []
results_for_display = []
for q_data in questions:
q_id = q_data.get("task_id") # Use 'task_id' as per the data
q_text = q_data.get("question")
if not q_id or not q_text:
print(f"Skipping malformed question data: {q_data}")
continue
print(f"\n--- Processing Question ID: {q_id} ---")
agent_answer = agent(q_text) # This calls the GAIAAgent.__call__ method
# Ensured submission keys are correct as per GAIA benchmark expectations
all_answers.append({"task_id": q_id, "submitted_answer": agent_answer})
results_for_display.append({"Question ID": q_id, "Question": q_text, "Agent Answer": agent_answer})
results_df = pd.DataFrame(results_for_display)
submission_data = {
"answers": all_answers,
"space_id": space_id, # Include SPACE_ID for the leaderboard link
"username": current_username, # Use the robustly determined username
"agent_code": inspect.getsource(GAIAAgent), # Add agent code (for debugging on leaderboard)
}
try:
print(f"\nSubmitting {len(all_answers)} answers to {submit_url}...")
submit_response = requests.post(submit_url, json=submission_data)
submit_response.raise_for_status()
submission_result = submit_response.json()
print("Submission successful!")
print(f"Submission Result: {submission_result}")
score = submission_result.get("score", "N/A")
leaderboard_link = submission_result.get("leaderboard_link", "")
status_message = f"Evaluation complete! Your score: {score:.2f}%\n"
if leaderboard_link:
status_message += f"Check the leaderboard: {leaderboard_link}\n"
else:
status_message += "No leaderboard link provided."
return status_message, results_df
except requests.exceptions.RequestException as e:
error_message = f"Failed to submit answers: {e}"
if hasattr(e, 'response') and e.response is not None:
error_message += f"\nResponse: {e.response.text}"
print(error_message)
return error_message, results_df
# (Keep Gradio UI setup as is)
with gr.Blocks() as demo:
gr.Markdown(
"""
# Unit 4: Agentic AI for GAIA Benchmark
This Gradio app allows you to run your agent against the GAIA benchmark questions and submit your answers.
Your goal is to modify the `GAIAAgent` class in `app.py` to achieve a score above 30%.
"""
)
gr.LoginButton()
run_button = gr.Button("Run Evaluation & Submit All Answers")
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
run_button.click(
fn=run_and_submit_all,
outputs=[status_output, results_table]
)
if __name__ == "__main__":
print("\n" + "-"*30 + " App Starting " + "-"*30)
space_host_startup = os.getenv("SPACE_HOST")
space_id_startup = os.getenv("SPACE_ID")
if space_host_startup:
print(f"✅ SPACE_HOST found: {space_host_startup}")
print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
else:
print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
if space_id_startup:
print(f"✅ SPACE_ID found: {space_id_startup}")
print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
else:
print("ℹ️ SPACE_ID environment variable not found...")
demo.launch()