Spaces:
Sleeping
Sleeping
File size: 8,553 Bytes
10e9b7d eccf8e4 7d65c66 3c4371f 10e9b7d f3c3f5b cfde807 f3c3f5b cfde807 f3c3f5b e80aab9 3db6293 e80aab9 f3c3f5b 31243f4 f3c3f5b cfde807 f3c3f5b cfde807 f3c3f5b 31243f4 f3c3f5b cfde807 4021bf3 cfde807 31243f4 f3c3f5b 31243f4 7d65c66 f3c3f5b 3c4371f cfde807 7e4a06b cfde807 3c4371f 7e4a06b 31243f4 e80aab9 31243f4 f3c3f5b 31243f4 cfde807 f3c3f5b eccf8e4 f3c3f5b 7d65c66 31243f4 f3c3f5b e80aab9 cfde807 f3c3f5b 31243f4 f3c3f5b cfde807 f3c3f5b cfde807 f3c3f5b 31243f4 f3c3f5b cfde807 f3c3f5b e80aab9 f3c3f5b 31243f4 e80aab9 f3c3f5b e80aab9 f3c3f5b e80aab9 0ee0419 f3c3f5b e80aab9 f3c3f5b 7e4a06b e80aab9 31243f4 e80aab9 9088b99 7d65c66 e80aab9 31243f4 e80aab9 3c4371f f3c3f5b 7d65c66 3c4371f 7d65c66 3c4371f 7d65c66 f3c3f5b 7d65c66 f3c3f5b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 |
import os
import gradio as gr
import requests
import inspect
import pandas as pd
# Import necessary libraries for LangChain Agent
from langchain_huggingface import HuggingFaceEndpoint
from langchain.agents import AgentExecutor, create_react_agent
from langchain import hub
from langchain_community.utilities import SerpAPIWrapper
from langchain.tools import Tool # Moved to top-level import for clarity and consistent access
# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
# --- LangChain Agent Definition ---
class GAIAAgent:
def __init__(self):
print("GAIAAgent initialized using LangChain.")
repo_id = "mistralai/Mistral-7B-Instruct-v0.3"
self.llm = HuggingFaceEndpoint(
endpoint_url=f"https://api-inference.huggingface.co/models/{repo_id}", # Explicitly set endpoint URL
temperature=0.1, # Directly pass model parameters
max_new_tokens=512, # Directly pass model parameters (common for generation)
huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN") # Env var name is correct
)
# Define tools for the agent
self.tools = []
# Initialize SerpAPIWrapper tool
# It will automatically pick up SERPAPI_API_KEY from environment variables
self.serpapi_tool = SerpAPIWrapper()
# Define a LangChain tool function that uses the SerpAPIWrapper
# The description is crucial for the LLM to know when to use this tool
web_search_tool = Tool(
name="Serpapi Search",
description="useful for when you need to answer questions about current events or facts. Input should be a search query.",
func=self.serpapi_tool.run,
)
self.tools.append(web_search_tool)
self.prompt = hub.pull("hwchase17/react")
# Increased max_iterations and max_execution_time for better performance on complex GAIA questions
self.agent = create_react_agent(self.llm, self.tools, self.prompt)
self.agent_executor = AgentExecutor(
agent=self.agent,
tools=self.tools,
verbose=True,
handle_parsing_errors=True,
max_iterations=25, # Increased from default, allow more thinking steps
max_execution_time=180.0 # Increased from default, allow more time per question (3 minutes)
)
def __call__(self, question: str) -> str:
print(f"Agent received question (first 50 chars): {question[:50]}...")
try:
response = self.agent_executor.invoke({"input": question})
agent_answer = response["output"]
print(f"Agent returning answer: {agent_answer}")
return agent_answer
except Exception as e:
print(f"Error during agent execution: {e}")
# Ensure a clean error message is returned when agent execution fails
return f"Agent execution failed: {e}. Check tool outputs and LLM reasoning."
def run_and_submit_all(profile: gr.OAuthProfile | None):
"""
Fetches all questions, runs the GAIAAgent on them, submits all answers,
and displays the results.
"""
# --- Determine HF Space Runtime URL and Repo URL ---
space_id = os.getenv("SPACE_ID")
# Robust username handling
current_username = "anonymous_user" # Default for local testing or if not logged in
if profile and profile.username:
current_username = profile.username
print(f"User logged in: {current_username}")
else:
print("User not logged in through Gradio OAuth, using default username for submission.")
# Do NOT return here, allow the app to run locally without HF login
api_url = DEFAULT_API_URL
questions_url = f"{api_url}/questions"
submit_url = f"{api_url}/submit"
try:
agent = GAIAAgent()
except Exception as e:
# Improved error handling for agent initialization
error_message = f"Failed to initialize agent: {e}. Please ensure all required API keys (HUGGINGFACEHUB_API_TOKEN, SERPAPI_API_KEY, GOOGLE_API_KEY) are set in Hugging Face Space secrets, and model terms accepted."
print(error_message)
# Return empty DataFrame with the error message to display in Gradio UI
return error_message, pd.DataFrame([{"Question ID": "N/A", "Question": "Agent Initialization Failed", "Agent Answer": str(e)}])
try:
response = requests.get(questions_url)
response.raise_for_status()
questions_data = response.json()
questions = questions_data # Assume questions_data is directly the list of questions
print(f"Fetched {len(questions)} questions.")
except requests.exceptions.RequestException as e:
return f"Failed to fetch questions: {e}", pd.DataFrame() # Return empty DataFrame on fetch error
all_answers = []
results_for_display = []
for q_data in questions:
q_id = q_data.get("task_id") # Use 'task_id' as per the data
q_text = q_data.get("question")
if not q_id or not q_text:
print(f"Skipping malformed question data: {q_data}")
continue
print(f"\n--- Processing Question ID: {q_id} ---")
agent_answer = agent(q_text) # This calls the GAIAAgent.__call__ method
# Ensured submission keys are correct as per GAIA benchmark expectations
all_answers.append({"task_id": q_id, "submitted_answer": agent_answer})
results_for_display.append({"Question ID": q_id, "Question": q_text, "Agent Answer": agent_answer})
results_df = pd.DataFrame(results_for_display)
submission_data = {
"answers": all_answers,
"space_id": space_id, # Include SPACE_ID for the leaderboard link
"username": current_username, # Use the robustly determined username
"agent_code": inspect.getsource(GAIAAgent), # Add agent code (for debugging on leaderboard)
}
try:
print(f"\nSubmitting {len(all_answers)} answers to {submit_url}...")
submit_response = requests.post(submit_url, json=submission_data)
submit_response.raise_for_status()
submission_result = submit_response.json()
print("Submission successful!")
print(f"Submission Result: {submission_result}")
score = submission_result.get("score", "N/A")
leaderboard_link = submission_result.get("leaderboard_link", "")
status_message = f"Evaluation complete! Your score: {score:.2f}%\n"
if leaderboard_link:
status_message += f"Check the leaderboard: {leaderboard_link}\n"
else:
status_message += "No leaderboard link provided."
return status_message, results_df
except requests.exceptions.RequestException as e:
error_message = f"Failed to submit answers: {e}"
if hasattr(e, 'response') and e.response is not None:
error_message += f"\nResponse: {e.response.text}"
print(error_message)
return error_message, results_df
# (Keep Gradio UI setup as is)
with gr.Blocks() as demo:
gr.Markdown(
"""
# Unit 4: Agentic AI for GAIA Benchmark
This Gradio app allows you to run your agent against the GAIA benchmark questions and submit your answers.
Your goal is to modify the `GAIAAgent` class in `app.py` to achieve a score above 30%.
"""
)
gr.LoginButton()
run_button = gr.Button("Run Evaluation & Submit All Answers")
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
run_button.click(
fn=run_and_submit_all,
outputs=[status_output, results_table]
)
if __name__ == "__main__":
print("\n" + "-"*30 + " App Starting " + "-"*30)
space_host_startup = os.getenv("SPACE_HOST")
space_id_startup = os.getenv("SPACE_ID")
if space_host_startup:
print(f"✅ SPACE_HOST found: {space_host_startup}")
print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
else:
print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
if space_id_startup:
print(f"✅ SPACE_ID found: {space_id_startup}")
print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
else:
print("ℹ️ SPACE_ID environment variable not found...")
demo.launch() |