import os import gradio as gr import requests import inspect import pandas as pd # Import necessary libraries for LangChain Agent from langchain_huggingface import HuggingFaceEndpoint from langchain.agents import AgentExecutor, create_react_agent from langchain import hub from langchain_community.utilities import SerpAPIWrapper from langchain.tools import Tool # Moved to top-level import for clarity and consistent access # --- Constants --- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" # --- LangChain Agent Definition --- class GAIAAgent: def __init__(self): print("GAIAAgent initialized using LangChain.") repo_id = "mistralai/Mistral-7B-Instruct-v0.3" self.llm = HuggingFaceEndpoint( endpoint_url=f"https://api-inference.huggingface.co/models/{repo_id}", # Explicitly set endpoint URL temperature=0.1, # Directly pass model parameters max_new_tokens=512, # Directly pass model parameters (common for generation) huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN") # Env var name is correct ) # Define tools for the agent self.tools = [] # Initialize SerpAPIWrapper tool # It will automatically pick up SERPAPI_API_KEY from environment variables self.serpapi_tool = SerpAPIWrapper() # Define a LangChain tool function that uses the SerpAPIWrapper # The description is crucial for the LLM to know when to use this tool web_search_tool = Tool( name="Serpapi Search", description="useful for when you need to answer questions about current events or facts. Input should be a search query.", func=self.serpapi_tool.run, ) self.tools.append(web_search_tool) self.prompt = hub.pull("hwchase17/react") # Increased max_iterations and max_execution_time for better performance on complex GAIA questions self.agent = create_react_agent(self.llm, self.tools, self.prompt) self.agent_executor = AgentExecutor( agent=self.agent, tools=self.tools, verbose=True, handle_parsing_errors=True, max_iterations=25, # Increased from default, allow more thinking steps max_execution_time=180.0 # Increased from default, allow more time per question (3 minutes) ) def __call__(self, question: str) -> str: print(f"Agent received question (first 50 chars): {question[:50]}...") try: response = self.agent_executor.invoke({"input": question}) agent_answer = response["output"] print(f"Agent returning answer: {agent_answer}") return agent_answer except Exception as e: print(f"Error during agent execution: {e}") # Ensure a clean error message is returned when agent execution fails return f"Agent execution failed: {e}. Check tool outputs and LLM reasoning." def run_and_submit_all(profile: gr.OAuthProfile | None): """ Fetches all questions, runs the GAIAAgent on them, submits all answers, and displays the results. """ # --- Determine HF Space Runtime URL and Repo URL --- space_id = os.getenv("SPACE_ID") # Robust username handling current_username = "anonymous_user" # Default for local testing or if not logged in if profile and profile.username: current_username = profile.username print(f"User logged in: {current_username}") else: print("User not logged in through Gradio OAuth, using default username for submission.") # Do NOT return here, allow the app to run locally without HF login api_url = DEFAULT_API_URL questions_url = f"{api_url}/questions" submit_url = f"{api_url}/submit" try: agent = GAIAAgent() except Exception as e: # Improved error handling for agent initialization error_message = f"Failed to initialize agent: {e}. Please ensure all required API keys (HUGGINGFACEHUB_API_TOKEN, SERPAPI_API_KEY, GOOGLE_API_KEY) are set in Hugging Face Space secrets, and model terms accepted." print(error_message) # Return empty DataFrame with the error message to display in Gradio UI return error_message, pd.DataFrame([{"Question ID": "N/A", "Question": "Agent Initialization Failed", "Agent Answer": str(e)}]) try: response = requests.get(questions_url) response.raise_for_status() questions_data = response.json() questions = questions_data # Assume questions_data is directly the list of questions print(f"Fetched {len(questions)} questions.") except requests.exceptions.RequestException as e: return f"Failed to fetch questions: {e}", pd.DataFrame() # Return empty DataFrame on fetch error all_answers = [] results_for_display = [] for q_data in questions: q_id = q_data.get("task_id") # Use 'task_id' as per the data q_text = q_data.get("question") if not q_id or not q_text: print(f"Skipping malformed question data: {q_data}") continue print(f"\n--- Processing Question ID: {q_id} ---") agent_answer = agent(q_text) # This calls the GAIAAgent.__call__ method # Ensured submission keys are correct as per GAIA benchmark expectations all_answers.append({"task_id": q_id, "submitted_answer": agent_answer}) results_for_display.append({"Question ID": q_id, "Question": q_text, "Agent Answer": agent_answer}) results_df = pd.DataFrame(results_for_display) submission_data = { "answers": all_answers, "space_id": space_id, # Include SPACE_ID for the leaderboard link "username": current_username, # Use the robustly determined username "agent_code": inspect.getsource(GAIAAgent), # Add agent code (for debugging on leaderboard) } try: print(f"\nSubmitting {len(all_answers)} answers to {submit_url}...") submit_response = requests.post(submit_url, json=submission_data) submit_response.raise_for_status() submission_result = submit_response.json() print("Submission successful!") print(f"Submission Result: {submission_result}") score = submission_result.get("score", "N/A") leaderboard_link = submission_result.get("leaderboard_link", "") status_message = f"Evaluation complete! Your score: {score:.2f}%\n" if leaderboard_link: status_message += f"Check the leaderboard: {leaderboard_link}\n" else: status_message += "No leaderboard link provided." return status_message, results_df except requests.exceptions.RequestException as e: error_message = f"Failed to submit answers: {e}" if hasattr(e, 'response') and e.response is not None: error_message += f"\nResponse: {e.response.text}" print(error_message) return error_message, results_df # (Keep Gradio UI setup as is) with gr.Blocks() as demo: gr.Markdown( """ # Unit 4: Agentic AI for GAIA Benchmark This Gradio app allows you to run your agent against the GAIA benchmark questions and submit your answers. Your goal is to modify the `GAIAAgent` class in `app.py` to achieve a score above 30%. """ ) gr.LoginButton() run_button = gr.Button("Run Evaluation & Submit All Answers") status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False) results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True) run_button.click( fn=run_and_submit_all, outputs=[status_output, results_table] ) if __name__ == "__main__": print("\n" + "-"*30 + " App Starting " + "-"*30) space_host_startup = os.getenv("SPACE_HOST") space_id_startup = os.getenv("SPACE_ID") if space_host_startup: print(f"✅ SPACE_HOST found: {space_host_startup}") print(f" Runtime URL should be: https://{space_host_startup}.hf.space") else: print("ℹ️ SPACE_HOST environment variable not found (running locally?).") if space_id_startup: print(f"✅ SPACE_ID found: {space_id_startup}") print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}") print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main") else: print("ℹ️ SPACE_ID environment variable not found...") demo.launch()