Gaia_test_ai_agent

Running

App Files Files Community

kamorou commited on Jul 1

Commit

182b505

verified ·

1 Parent(s): 10a40a9

Update app.py

Browse files

Files changed (1) hide show

app.py +935 -241

app.py CHANGED Viewed

@@ -247,263 +247,957 @@
 # =================================================================================================
 #
 import os
-import io
-import json
 import requests
 import pandas as pd
-import gradio as gr
-from contextlib import redirect_stdout
-from typing import TypedDict, Annotated, List
-import operator
-# --- LangChain & LangGraph Imports ---
-from langchain_core.messages import BaseMessage, HumanMessage, ToolMessage, AIMessage, SystemMessage
-from langchain_core.tools import tool
-from langchain_huggingface import HuggingFaceEndpoint
-from langgraph.graph import StateGraph, END
 from tavily import TavilyClient
-import pypdf
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-FILES_DIR = "./files"
-os.makedirs(FILES_DIR, exist_ok=True)
-# --- System Prompt (Updated for Manual JSON Tool Calling) ---
-# This prompt instructs the model to generate JSON, a robust method for tool calls.
-AGENT_SYSTEM_PROMPT = """You are a world-class AI agent, specialized in solving complex problems from the GAIA benchmark.
-Your task is to analyze the user's question, think step-by-step, and use the provided tools to find the correct answer.
-**TOOL USAGE INSTRUCTIONS:**
-When you need to use a tool, you MUST respond with a JSON object containing the tool name and its arguments. The JSON object should have two keys: "tool_name" and "parameters".
-Here is an example of how to call the `tavily_search` tool:
-```json
-{
-  "tool_name": "tavily_search",
-  "parameters": {
-    "query": "Who won the last FIFA World Cup?"
-  }
-}
-Use code with caution.
-Python
-CRITICAL FINAL ANSWER INSTRUCTIONS:
-Once you have gathered all the necessary information and are absolutely certain of the answer, you MUST provide it directly and concisely.
-Your final response must ONLY be the answer itself.
-DO NOT wrap the final answer in a JSON object or include any conversational text.
-Think, use your tools, and then provide ONLY the final, precise answer.
-"""
-###===============================================================================================
-tavily = TavilyClient(api_key=os.getenv("TAVILY_API_KEY"))
-@tool
-def tavily_search(query: str) -> str:
-"""Uses the Tavily Search API to find information on the web."""
-print(f"--- Calling Tavily Search Tool with query: {query} ---")
-try:
-result = tavily.search(query=query, search_depth="advanced")
-return f"Search results for '{query}':\n" + "\n".join([f"- {r['content']}" for r in result['results']])
-except Exception as e: return f"Error during Tavily search: {e}"
-@tool
-def read_file(url: str) -> str:
-"""Downloads and reads the content of a file (text or PDF) from a URL."""
-print(f"--- Calling Read File Tool with URL: {url} ---")
-try:
-filename = os.path.join(FILES_DIR, os.path.basename(url))
-response = requests.get(url)
-response.raise_for_status()
-with open(filename, 'wb') as f: f.write(response.content)
-if url.lower().endswith('.pdf'):
-try:
-pdf_reader = pypdf.PdfReader(filename)
-return f"Successfully read PDF file '{filename}'. Content:\n\n{''.join(p.extract_text() for p in pdf_reader.pages)}"
-except Exception as e: return f"Error reading PDF file: {e}"
-else:
-try:
-with open(filename, 'r', encoding='utf-8') as f: return f"Successfully read text file '{filename}'. Content:\n\n{f.read()}"
-except UnicodeDecodeError: return f"Successfully downloaded binary file '{filename}'. Cannot display content as text."
-except requests.exceptions.RequestException as e: return f"Error downloading or reading file: {e}"
-@tool
-def python_interpreter(code: str) -> str:
-"""Executes Python code and returns its stdout."""
-print(f"--- Calling Python Interpreter Tool with code:\n{code} ---")
-output_buffer = io.StringIO()
-try:
-with redirect_stdout(output_buffer): exec(code, globals())
-return f"Code executed successfully. Output:\n{output_buffer.getvalue()}"
-except Exception as e: return f"Error executing Python code: {e}"
-##================================================================================================
-#✅ 2. CONFIGURE AND BUILD THE AGENT (with Qwen2 and Manual Tool Calling)
-#================================================================================================
-class AgentState(TypedDict):
-messages: Annotated[List[BaseMessage], operator.add]
-def build_agent_graph():
-"""Builds the agent using a manual LangGraph loop with the HuggingFaceEndpoint."""
-tools = [tavily_search, read_file, python_interpreter]
-tool_map = {tool.name: tool for tool in tools}
-Generated code
-# Using Qwen2-72B-Instruct model via HuggingFaceEndpoint
-repo_id = "Qwen/Qwen2-72B-Instruct"
-llm = HuggingFaceEndpoint(
-    repo_id=repo_id,
-    max_new_tokens=1024,
-    temperature=0.1,
-    huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN")
-)
-def call_model(state: AgentState):
-    """Invokes the LLM and wraps the response in an AIMessage."""
-    # Qwen2 Instruct uses a specific chat template. We build it manually.
-    prompt_str = ""
-    for msg in state['messages']:
-        role = ""
-        if isinstance(msg, SystemMessage): role = "system"
-        elif isinstance(msg, HumanMessage): role = "user"
-        elif isinstance(msg, AIMessage): role = "assistant"
-        elif isinstance(msg, ToolMessage): continue # We'll handle tool results differently
-        if role: prompt_str += f"<|im_start|>{role}\n{msg.content}<|im_end|>\n"
-    # Add results from the last tool call, if any
-    if isinstance(state['messages'][-1], ToolMessage):
-         prompt_str += f"<|im_start|>user\nTool output:\n{state['messages'][-1].content}<|im_end|>\n"
-    prompt_str += "<|im_start|>assistant\n"
-    response_text = llm.invoke(prompt_str)
-    return {"messages": [AIMessage(content=response_text)]}
-def should_continue(state: AgentState) -> str:
-    """Determines whether to call a tool or end the loop."""
-    last_message_content = state['messages'][-1].content.strip()
-    # A simple check for JSON is a reliable way to detect tool calls.
-    if "```json" in last_message_content:
-        return "action"
-    if last_message_content.startswith('{') and last_message_content.endswith('}'):
         try:
-            json.loads(last_message_content)
-            return "action"
-        except json.JSONDecodeError:
-            return "end" # Not valid JSON, must be the final answer
-    else:
-        return "end"
-def call_tool_node(state: AgentState):
-    """Parses the JSON tool call from the LLM and executes it."""
-    last_message_content = state['messages'][-1].content.strip()
-    # Extract JSON from markdown code block if present
-    if "```json" in last_message_content:
-        json_str = last_message_content.split("```json").split("```")[0].strip()
     else:
-        json_str = last_message_content
     try:
-        tool_call_data = json.loads(json_str)
-        tool_name = tool_call_data.get("tool_name")
-        parameters = tool_call_data.get("parameters", {})
-        if tool_name not in tool_map:
-            return {"messages": [ToolMessage(content=f"Error: Tool '{tool_name}' not found.", tool_call_id="error")]}
-        selected_tool = tool_map[tool_name]
-        tool_output = selected_tool.invoke(parameters)
-        return {"messages": [ToolMessage(content=str(tool_output), tool_call_id=tool_name)]}
     except Exception as e:
-        return {"messages": [ToolMessage(content=f"Error parsing tool call: {e}. Content: '{last_message_content}'", tool_call_id="error")]}
-workflow = StateGraph(AgentState)
-workflow.add_node("agent", call_model)
-workflow.add_node("action", call_tool_node)
-workflow.set_entry_point("agent")
-workflow.add_conditional_edges("agent", should_continue, {"action": "action", "end": END})
-workflow.add_edge('action', 'agent')
-return workflow.compile()
-Use code with caution.
-#================================================================================================
-#✅ 3. AGENT CLASS AND EVALUATION LOGIC
-#================================================================================================
-class GaiaAgent:
-def init(self):
-print("GaiaAgent initialized. Building agent with Qwen/Qwen2-72B-Instruct...")
-self.agent_app = build_agent_graph()
-Generated code
-def __call__(self, question: str) -> str:
-    print(f"\n{'='*60}\nAgent received question: {question[:100]}...\n{'='*60}")
-    try:
-        initial_input = {"messages": [SystemMessage(content=AGENT_SYSTEM_PROMPT), HumanMessage(content=question)]}
-        final_state = None
-        for step in self.agent_app.stream(initial_input, {"recursion_limit": 15}):
-            final_state = list(step.values())[0]
-        final_answer = final_state['messages'][-1].content
-        return str(final_answer).strip()
     except Exception as e:
-        print(f"An error occurred during agent execution: {e}")
-        return f"AGENT_EXECUTION_ERROR: {e}"
-Use code with caution.
---- The rest of the file is unchanged ---
-def run_and_submit_all( profile: gr.OAuthProfile | None):
-space_id = os.getenv("SPACE_ID")
-if not profile: return "Please Login to Hugging Face with the button.", None
-username = f"{profile.username}"
-print(f"User logged in: {username}")
-api_url = DEFAULT_API_URL
-questions_url = f"{api_url}/questions"
-submit_url = f"{api_url}/submit"
-agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
-Generated code
-try:
-    response = requests.get(questions_url, timeout=15)
-    response.raise_for_status()
-    questions_data = response.json()
-except Exception as e: return f"An unexpected error occurred fetching questions: {e}", None
-results_log, answers_payload = [], []
-agent_instance = GaiaAgent()
-for item in questions_data:
-    task_id, question_text = item.get("task_id"), item.get("question")
-    if not task_id or question_text is None: continue
     try:
-        submitted_answer = agent_instance(question_text)
-        answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
-        results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
     except Exception as e:
-         print(f"Error running agent on task {task_id}: {e}")
-         results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
-if not answers_payload: return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
-submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
-try:
-    response = requests.post(submit_url, json=submission_data, timeout=90)
-    response.raise_for_status()
-    result_data = response.json()
-    final_status = (
-        f"Submission Successful!\n"
-        f"User: {result_data.get('username')}\n"
-        f"Overall Score: {result_data.get('score', 'N/A')}% "
-        f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
-        f"Message: {result_data.get('message', 'No message received.')}"
-    )
-    return final_status, pd.DataFrame(results_log)
-except Exception as e: return f"An unexpected error in submission: {e}", pd.DataFrame(results_log)
-Use code with caution.
 with gr.Blocks() as demo:
-gr.Markdown("# GAIA Agent Final Assessment (Qwen2-72B-Instruct)")
-gr.Markdown(
-"""
-Instructor's Note: This version uses the powerful Qwen/Qwen2-72B-Instruct model from the Hugging Face Hub.
-It relies on a robust manual LangGraph loop to handle tool calls by instructing the model to generate JSON.
-1. Ensure you have a HUGGINGFACEHUB_API_TOKEN and TAVILY_API_KEY set in your secrets.
-2. Ensure your requirements.txt is updated. Good luck!
-"""
-)
-gr.LoginButton()
-run_button = gr.Button("Run Evaluation & Submit All Answers")
-status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
-results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
-run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
-if name == "main":
-print("\n" + "-"*30 + " App Starting " + "-"*30)
-demo.launch(debug=True, share=False, ssr_mode=False)

 # =================================================================================================
 #
+# import os
+# import io
+# import json
+# import requests
+# import pandas as pd
+# import gradio as gr
+# from contextlib import redirect_stdout
+# from typing import TypedDict, Annotated, List
+# import operator
+# # --- LangChain & LangGraph Imports ---
+# from langchain_core.messages import BaseMessage, HumanMessage, ToolMessage, AIMessage, SystemMessage
+# from langchain_core.tools import tool
+# from langchain_huggingface import HuggingFaceEndpoint
+# from langgraph.graph import StateGraph, END
+# from tavily import TavilyClient
+# import pypdf
+# # --- Constants ---
+# DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# FILES_DIR = "./files"
+# os.makedirs(FILES_DIR, exist_ok=True)
+# # --- System Prompt (Updated for Manual JSON Tool Calling) ---
+# # This prompt instructs the model to generate JSON, a robust method for tool calls.
+# AGENT_SYSTEM_PROMPT = """You are a world-class AI agent, specialized in solving complex problems from the GAIA benchmark.
+# Your task is to analyze the user's question, think step-by-step, and use the provided tools to find the correct answer.
+# **TOOL USAGE INSTRUCTIONS:**
+# When you need to use a tool, you MUST respond with a JSON object containing the tool name and its arguments. The JSON object should have two keys: "tool_name" and "parameters".
+# Here is an example of how to call the `tavily_search` tool:
+# ```json
+# {
+#   "tool_name": "tavily_search",
+#   "parameters": {
+#     "query": "Who won the last FIFA World Cup?"
+#   }
+# }
+# Use code with caution.
+# Python
+# CRITICAL FINAL ANSWER INSTRUCTIONS:
+# Once you have gathered all the necessary information and are absolutely certain of the answer, you MUST provide it directly and concisely.
+# Your final response must ONLY be the answer itself.
+# DO NOT wrap the final answer in a JSON object or include any conversational text.
+# Think, use your tools, and then provide ONLY the final, precise answer.
+# """
+# ###===============================================================================================
+# tavily = TavilyClient(api_key=os.getenv("TAVILY_API_KEY"))
+# @tool
+# def tavily_search(query: str) -> str:
+# """Uses the Tavily Search API to find information on the web."""
+# print(f"--- Calling Tavily Search Tool with query: {query} ---")
+# try:
+# result = tavily.search(query=query, search_depth="advanced")
+# return f"Search results for '{query}':\n" + "\n".join([f"- {r['content']}" for r in result['results']])
+# except Exception as e: return f"Error during Tavily search: {e}"
+# @tool
+# def read_file(url: str) -> str:
+# """Downloads and reads the content of a file (text or PDF) from a URL."""
+# print(f"--- Calling Read File Tool with URL: {url} ---")
+# try:
+# filename = os.path.join(FILES_DIR, os.path.basename(url))
+# response = requests.get(url)
+# response.raise_for_status()
+# with open(filename, 'wb') as f: f.write(response.content)
+# if url.lower().endswith('.pdf'):
+# try:
+# pdf_reader = pypdf.PdfReader(filename)
+# return f"Successfully read PDF file '{filename}'. Content:\n\n{''.join(p.extract_text() for p in pdf_reader.pages)}"
+# except Exception as e: return f"Error reading PDF file: {e}"
+# else:
+# try:
+# with open(filename, 'r', encoding='utf-8') as f: return f"Successfully read text file '{filename}'. Content:\n\n{f.read()}"
+# except UnicodeDecodeError: return f"Successfully downloaded binary file '{filename}'. Cannot display content as text."
+# except requests.exceptions.RequestException as e: return f"Error downloading or reading file: {e}"
+# @tool
+# def python_interpreter(code: str) -> str:
+# """Executes Python code and returns its stdout."""
+# print(f"--- Calling Python Interpreter Tool with code:\n{code} ---")
+# output_buffer = io.StringIO()
+# try:
+# with redirect_stdout(output_buffer): exec(code, globals())
+# return f"Code executed successfully. Output:\n{output_buffer.getvalue()}"
+# except Exception as e: return f"Error executing Python code: {e}"
+# ##================================================================================================
+# #✅ 2. CONFIGURE AND BUILD THE AGENT (with Qwen2 and Manual Tool Calling)
+# #================================================================================================
+# class AgentState(TypedDict):
+# messages: Annotated[List[BaseMessage], operator.add]
+# def build_agent_graph():
+# """Builds the agent using a manual LangGraph loop with the HuggingFaceEndpoint."""
+# tools = [tavily_search, read_file, python_interpreter]
+# tool_map = {tool.name: tool for tool in tools}
+# Generated code
+# # Using Qwen2-72B-Instruct model via HuggingFaceEndpoint
+# repo_id = "Qwen/Qwen2-72B-Instruct"
+# llm = HuggingFaceEndpoint(
+#     repo_id=repo_id,
+#     max_new_tokens=1024,
+#     temperature=0.1,
+#     huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN")
+# )
+# def call_model(state: AgentState):
+#     """Invokes the LLM and wraps the response in an AIMessage."""
+#     # Qwen2 Instruct uses a specific chat template. We build it manually.
+#     prompt_str = ""
+#     for msg in state['messages']:
+#         role = ""
+#         if isinstance(msg, SystemMessage): role = "system"
+#         elif isinstance(msg, HumanMessage): role = "user"
+#         elif isinstance(msg, AIMessage): role = "assistant"
+#         elif isinstance(msg, ToolMessage): continue # We'll handle tool results differently
+#         if role: prompt_str += f"<|im_start|>{role}\n{msg.content}<|im_end|>\n"
+#     # Add results from the last tool call, if any
+#     if isinstance(state['messages'][-1], ToolMessage):
+#          prompt_str += f"<|im_start|>user\nTool output:\n{state['messages'][-1].content}<|im_end|>\n"
+#     prompt_str += "<|im_start|>assistant\n"
+#     response_text = llm.invoke(prompt_str)
+#     return {"messages": [AIMessage(content=response_text)]}
+# def should_continue(state: AgentState) -> str:
+#     """Determines whether to call a tool or end the loop."""
+#     last_message_content = state['messages'][-1].content.strip()
+#     # A simple check for JSON is a reliable way to detect tool calls.
+#     if "```json" in last_message_content:
+#         return "action"
+#     if last_message_content.startswith('{') and last_message_content.endswith('}'):
+#         try:
+#             json.loads(last_message_content)
+#             return "action"
+#         except json.JSONDecodeError:
+#             return "end" # Not valid JSON, must be the final answer
+#     else:
+#         return "end"
+# def call_tool_node(state: AgentState):
+#     """Parses the JSON tool call from the LLM and executes it."""
+#     last_message_content = state['messages'][-1].content.strip()
+#     # Extract JSON from markdown code block if present
+#     if "```json" in last_message_content:
+#         json_str = last_message_content.split("```json").split("```")[0].strip()
+#     else:
+#         json_str = last_message_content
+#     try:
+#         tool_call_data = json.loads(json_str)
+#         tool_name = tool_call_data.get("tool_name")
+#         parameters = tool_call_data.get("parameters", {})
+#         if tool_name not in tool_map:
+#             return {"messages": [ToolMessage(content=f"Error: Tool '{tool_name}' not found.", tool_call_id="error")]}
+#         selected_tool = tool_map[tool_name]
+#         tool_output = selected_tool.invoke(parameters)
+#         return {"messages": [ToolMessage(content=str(tool_output), tool_call_id=tool_name)]}
+#     except Exception as e:
+#         return {"messages": [ToolMessage(content=f"Error parsing tool call: {e}. Content: '{last_message_content}'", tool_call_id="error")]}
+# workflow = StateGraph(AgentState)
+# workflow.add_node("agent", call_model)
+# workflow.add_node("action", call_tool_node)
+# workflow.set_entry_point("agent")
+# workflow.add_conditional_edges("agent", should_continue, {"action": "action", "end": END})
+# workflow.add_edge('action', 'agent')
+# return workflow.compile()
+# Use code with caution.
+# #================================================================================================
+# #✅ 3. AGENT CLASS AND EVALUATION LOGIC
+# #================================================================================================
+# class GaiaAgent:
+# def init(self):
+# print("GaiaAgent initialized. Building agent with Qwen/Qwen2-72B-Instruct...")
+# self.agent_app = build_agent_graph()
+# Generated code
+# def __call__(self, question: str) -> str:
+#     print(f"\n{'='*60}\nAgent received question: {question[:100]}...\n{'='*60}")
+#     try:
+#         initial_input = {"messages": [SystemMessage(content=AGENT_SYSTEM_PROMPT), HumanMessage(content=question)]}
+#         final_state = None
+#         for step in self.agent_app.stream(initial_input, {"recursion_limit": 15}):
+#             final_state = list(step.values())[0]
+#         final_answer = final_state['messages'][-1].content
+#         return str(final_answer).strip()
+#     except Exception as e:
+#         print(f"An error occurred during agent execution: {e}")
+#         return f"AGENT_EXECUTION_ERROR: {e}"
+# Use code with caution.
+# --- The rest of the file is unchanged ---
+# def run_and_submit_all( profile: gr.OAuthProfile | None):
+# space_id = os.getenv("SPACE_ID")
+# if not profile: return "Please Login to Hugging Face with the button.", None
+# username = f"{profile.username}"
+# print(f"User logged in: {username}")
+# api_url = DEFAULT_API_URL
+# questions_url = f"{api_url}/questions"
+# submit_url = f"{api_url}/submit"
+# agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
+# Generated code
+# try:
+#     response = requests.get(questions_url, timeout=15)
+#     response.raise_for_status()
+#     questions_data = response.json()
+# except Exception as e: return f"An unexpected error occurred fetching questions: {e}", None
+# results_log, answers_payload = [], []
+# agent_instance = GaiaAgent()
+# for item in questions_data:
+#     task_id, question_text = item.get("task_id"), item.get("question")
+#     if not task_id or question_text is None: continue
+#     try:
+#         submitted_answer = agent_instance(question_text)
+#         answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
+#         results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
+#     except Exception as e:
+#          print(f"Error running agent on task {task_id}: {e}")
+#          results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
+# if not answers_payload: return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+# submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
+# try:
+#     response = requests.post(submit_url, json=submission_data, timeout=90)
+#     response.raise_for_status()
+#     result_data = response.json()
+#     final_status = (
+#         f"Submission Successful!\n"
+#         f"User: {result_data.get('username')}\n"
+#         f"Overall Score: {result_data.get('score', 'N/A')}% "
+#         f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
+#         f"Message: {result_data.get('message', 'No message received.')}"
+#     )
+#     return final_status, pd.DataFrame(results_log)
+# except Exception as e: return f"An unexpected error in submission: {e}", pd.DataFrame(results_log)
+# Use code with caution.
+# with gr.Blocks() as demo:
+# gr.Markdown("# GAIA Agent Final Assessment (Qwen2-72B-Instruct)")
+# gr.Markdown(
+# """
+# Instructor's Note: This version uses the powerful Qwen/Qwen2-72B-Instruct model from the Hugging Face Hub.
+# It relies on a robust manual LangGraph loop to handle tool calls by instructing the model to generate JSON.
+# 1. Ensure you have a HUGGINGFACEHUB_API_TOKEN and TAVILY_API_KEY set in your secrets.
+# 2. Ensure your requirements.txt is updated. Good luck!
+# """
+# )
+# gr.LoginButton()
+# run_button = gr.Button("Run Evaluation & Submit All Answers")
+# status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
+# results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
+# run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
+# if name == "main":
+# print("\n" + "-"*30 + " App Starting " + "-"*30)
+# demo.launch(debug=True, share=False, ssr_mode=False)
+#########################
 import os
+import gradio as gr
 import requests
+import inspect
 import pandas as pd
+import json
+import re
+from typing import Dict, Any, List, Optional
+from dataclasses import dataclass
+import logging
+from datetime import datetime
+import traceback
+# Third-party imports for the agent
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+import torch
 from tavily import TavilyClient
+import tempfile
+import subprocess
+import sys
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# Agent System Prompt
+AGENT_SYSTEM_PROMPT = """You are a world-class AI agent, specialized in solving complex problems from the GAIA benchmark. Your task is to analyze the user's question, think step-by-step, and use the provided tools to find the correct answer.
+CRITICAL INSTRUCTIONS:
+1. **Analyze the Goal:** First, understand what the user is asking for.
+2. **Plan & Execute:** Formulate a plan and use the available tools (`tavily_search`, `read_file`, `python_interpreter`) to gather information.
+3. **Final Answer Format:** Once you are absolutely certain of the answer, you MUST provide it directly and concisely.
+   - DO NOT include your reasoning, thoughts, or any conversational text like 'The answer is...', 'Here is the result:', or 'Based on my search...'.
+   - Your final response must ONLY be the answer itself.
+EXAMPLES OF CORRECT FINAL ANSWERS:
+- If the question asks for a year: `2023`
+- If it asks for a name: `John Doe`
+- If it asks for a number: `42`
+- If it asks for a comma-separated list: `item1, item2, item3`
+Think, use your tools, and then provide ONLY the final, precise answer."""
+@dataclass
+class ToolResult:
+    """Result from a tool execution"""
+    success: bool
+    result: Any
+    error: Optional[str] = None
+class ToolExecutor:
+    """Handles tool execution for the agent"""
+    def __init__(self):
+        self.tavily_client = None
+        self.setup_tavily()
+    def setup_tavily(self):
+        """Initialize Tavily search client"""
         try:
+            tavily_api_key = os.getenv("TAVILY_API_KEY")
+            if tavily_api_key:
+                self.tavily_client = TavilyClient(api_key=tavily_api_key)
+                logger.info("Tavily client initialized successfully")
+            else:
+                logger.warning("TAVILY_API_KEY not found in environment variables")
+        except Exception as e:
+            logger.error(f"Failed to initialize Tavily client: {e}")
+    def tavily_search(self, query: str, max_results: int = 5) -> ToolResult:
+        """Search the web using Tavily"""
+        try:
+            if not self.tavily_client:
+                return ToolResult(success=False, error="Tavily client not initialized")
+            response = self.tavily_client.search(
+                query=query,
+                search_depth="advanced",
+                max_results=max_results,
+                include_answer=True,
+                include_raw_content=True
+            )
+            # Extract relevant information
+            results = []
+            if response.get('results'):
+                for result in response['results']:
+                    results.append({
+                        'title': result.get('title', ''),
+                        'content': result.get('content', ''),
+                        'url': result.get('url', ''),
+                        'score': result.get('score', 0)
+                    })
+            search_result = {
+                'answer': response.get('answer', ''),
+                'results': results,
+                'query': query
+            }
+            return ToolResult(success=True, result=search_result)
+        except Exception as e:
+            logger.error(f"Tavily search error: {e}")
+            return ToolResult(success=False, error=str(e))
+    def python_interpreter(self, code: str) -> ToolResult:
+        """Execute Python code safely"""
+        try:
+            # Create a temporary file for the code
+            with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
+                f.write(code)
+                temp_file = f.name
+            # Execute the code and capture output
+            result = subprocess.run(
+                [sys.executable, temp_file],
+                capture_output=True,
+                text=True,
+                timeout=30  # 30 seconds timeout
+            )
+            # Clean up
+            os.unlink(temp_file)
+            if result.returncode == 0:
+                return ToolResult(success=True, result=result.stdout.strip())
+            else:
+                return ToolResult(success=False, error=result.stderr.strip())
+        except subprocess.TimeoutExpired:
+            return ToolResult(success=False, error="Code execution timed out")
+        except Exception as e:
+            logger.error(f"Python interpreter error: {e}")
+            return ToolResult(success=False, error=str(e))
+    def read_file(self, file_path: str) -> ToolResult:
+        """Read a file and return its contents"""
+        try:
+            if not os.path.exists(file_path):
+                return ToolResult(success=False, error=f"File not found: {file_path}")
+            with open(file_path, 'r', encoding='utf-8') as f:
+                content = f.read()
+            return ToolResult(success=True, result=content)
+        except Exception as e:
+            logger.error(f"File reading error: {e}")
+            return ToolResult(success=False, error=str(e))
+class GAIAAgent:
+    """Advanced GAIA benchmark agent using Qwen model with tool integration"""
+    def __init__(self, model_name: str = "Qwen/Qwen2.5-7B-Instruct"):
+        self.model_name = model_name
+        self.tool_executor = ToolExecutor()
+        self.tokenizer = None
+        self.model = None
+        self.pipeline = None
+        self.setup_model()
+        logger.info(f"GAIAAgent initialized with model: {model_name}")
+    def setup_model(self):
+        """Initialize the Qwen model and tokenizer"""
+        try:
+            # Check if CUDA is available
+            device = "cuda" if torch.cuda.is_available() else "cpu"
+            logger.info(f"Using device: {device}")
+            # Load tokenizer and model
+            self.tokenizer = AutoTokenizer.from_pretrained(
+                self.model_name,
+                trust_remote_code=True
+            )
+            # Use pipeline for easier inference
+            self.pipeline = pipeline(
+                "text-generation",
+                model=self.model_name,
+                tokenizer=self.tokenizer,
+                torch_dtype=torch.float16 if device == "cuda" else torch.float32,
+                device_map="auto" if device == "cuda" else None,
+                trust_remote_code=True
+            )
+            logger.info("Model loaded successfully")
+        except Exception as e:
+            logger.error(f"Failed to load model: {e}")
+            # Fallback to a simpler approach
+            self.setup_fallback_model()
+    def setup_fallback_model(self):
+        """Setup a fallback model if main model fails"""
+        try:
+            # Try a smaller model
+            fallback_model = "microsoft/DialoGPT-medium"
+            self.pipeline = pipeline(
+                "text-generation",
+                model=fallback_model,
+                tokenizer=fallback_model
+            )
+            logger.info(f"Fallback model loaded: {fallback_model}")
+        except Exception as e:
+            logger.error(f"Fallback model also failed: {e}")
+            self.pipeline = None
+    def extract_tool_calls(self, text: str) -> List[Dict[str, Any]]:
+        """Extract tool calls from the model's response"""
+        tool_calls = []
+        # Pattern to match tool calls like: <tool_call>tavily_search("query")</tool_call>
+        pattern = r'<tool_call>(\w+)\(([^)]+)\)</tool_call>'
+        matches = re.findall(pattern, text)
+        for tool_name, args_str in matches:
+            try:
+                # Simple argument parsing (assumes string arguments)
+                args = args_str.strip().strip('"\'')
+                tool_calls.append({
+                    'tool': tool_name,
+                    'args': args
+                })
+            except Exception as e:
+                logger.error(f"Failed to parse tool call: {e}")
+        return tool_calls
+    def execute_tools(self, tool_calls: List[Dict[str, Any]]) -> str:
+        """Execute tool calls and return results"""
+        results = []
+        for call in tool_calls:
+            tool_name = call['tool']
+            args = call['args']
+            if tool_name == 'tavily_search':
+                result = self.tool_executor.tavily_search(args)
+            elif tool_name == 'python_interpreter':
+                result = self.tool_executor.python_interpreter(args)
+            elif tool_name == 'read_file':
+                result = self.tool_executor.read_file(args)
+            else:
+                result = ToolResult(success=False, error=f"Unknown tool: {tool_name}")
+            if result.success:
+                results.append(f"Tool {tool_name} result: {result.result}")
+            else:
+                results.append(f"Tool {tool_name} error: {result.error}")
+        return "\n".join(results)
+    def generate_response(self, prompt: str, max_length: int = 1000) -> str:
+        """Generate response using the model"""
+        try:
+            if not self.pipeline:
+                return "Model not available"
+            # Generate response
+            outputs = self.pipeline(
+                prompt,
+                max_length=max_length,
+                do_sample=True,
+                temperature=0.7,
+                top_p=0.9,
+                pad_token_id=self.tokenizer.eos_token_id if self.tokenizer else None
+            )
+            # Extract the generated text
+            generated_text = outputs[0]['generated_text']
+            # Remove the input prompt from the output
+            if generated_text.startswith(prompt):
+                generated_text = generated_text[len(prompt):].strip()
+            return generated_text
+        except Exception as e:
+            logger.error(f"Generation error: {e}")
+            return f"Generation failed: {str(e)}"
+    def solve_with_reasoning(self, question: str) -> str:
+        """Solve question with step-by-step reasoning and tool usage"""
+        try:
+            # Create initial prompt
+            reasoning_prompt = f"""
+{AGENT_SYSTEM_PROMPT}
+Question: {question}
+Let me think through this step by step:
+1. First, I need to understand what this question is asking for.
+2. Then I'll determine what tools I need to use.
+3. I'll gather information using the appropriate tools.
+4. Finally, I'll provide the precise answer.
+Let me start by analyzing the question:
+"""
+            # Generate initial reasoning
+            response = self.generate_response(reasoning_prompt)
+            # Check if we need to use tools
+            if self.should_use_search(question, response):
+                search_result = self.tool_executor.tavily_search(question)
+                if search_result.success:
+                    # Incorporate search results
+                    search_info = search_result.result
+                    enhanced_prompt = f"""
+{reasoning_prompt}
+Based on my analysis, I need to search for information. Here are the search results:
+Search Query: {question}
+Answer: {search_info.get('answer', 'No direct answer found')}
+Top Results:
+"""
+                    for i, result in enumerate(search_info.get('results', [])[:3]):
+                        enhanced_prompt += f"Result {i+1}: {result.get('title', '')}\n{result.get('content', '')[:200]}...\n\n"
+                    enhanced_prompt += "\nBased on this information, the answer is:"
+                    final_response = self.generate_response(enhanced_prompt, max_length=500)
+                    return self.extract_final_answer(final_response)
+            # Check if we need Python computation
+            if self.should_use_python(question, response):
+                # Generate Python code
+                code_prompt = f"""
+Question: {question}
+I need to solve this using Python. Let me write the code:
+```python
+"""
+                code_response = self.generate_response(code_prompt, max_length=300)
+                # Extract Python code
+                python_code = self.extract_python_code(code_response)
+                if python_code:
+                    exec_result = self.tool_executor.python_interpreter(python_code)
+                    if exec_result.success:
+                        return str(exec_result.result).strip()
+            # If no tools needed, extract answer from reasoning
+            return self.extract_final_answer(response)
+        except Exception as e:
+            logger.error(f"Error in solve_with_reasoning: {e}")
+            return self.fallback_solve(question)
+    def should_use_search(self, question: str, response: str) -> bool:
+        """Determine if we should use web search"""
+        search_indicators = [
+            "current", "recent", "latest", "news", "today", "now",
+            "who is", "what is", "when did", "where is",
+            "population", "capital", "president", "CEO",
+            "founded", "established", "released", "launched"
+        ]
+        question_lower = question.lower()
+        return any(indicator in question_lower for indicator in search_indicators)
+    def should_use_python(self, question: str, response: str) -> bool:
+        """Determine if we should use Python computation"""
+        python_indicators = [
+            "calculate", "compute", "solve", "equation", "formula",
+            "sum", "average", "total", "percentage", "rate",
+            "graph", "plot", "data", "analysis", "statistics"
+        ]
+        question_lower = question.lower()
+        return any(indicator in question_lower for indicator in python_indicators)
+    def extract_python_code(self, text: str) -> str:
+        """Extract Python code from generated text"""
+        # Look for code blocks
+        code_pattern = r'```python\n(.*?)\n```'
+        matches = re.findall(code_pattern, text, re.DOTALL)
+        if matches:
+            return matches[0].strip()
+        # Look for simple code after "python" keyword
+        lines = text.split('\n')
+        code_lines = []
+        in_code = False
+        for line in lines:
+            if 'python' in line.lower() or in_code:
+                in_code = True
+                if line.strip() and not line.strip().startswith('#'):
+                    code_lines.append(line)
+        return '\n'.join(code_lines) if code_lines else ""
+    def extract_final_answer(self, text: str) -> str:
+        """Extract the final answer from generated text"""
+        # Look for common answer patterns
+        answer_patterns = [
+            r'(?:the answer is|answer:|final answer:)\s*(.+?)(?:\n|$)',
+            r'(?:therefore|thus|so|hence),?\s*(.+?)(?:\n|$)',
+            r'(?:result|conclusion):\s*(.+?)(?:\n|$)',
+        ]
+        for pattern in answer_patterns:
+            matches = re.findall(pattern, text, re.IGNORECASE)
+            if matches:
+                answer = matches[-1].strip()
+                # Clean up the answer
+                answer = re.sub(r'^["\']|["\']$', '', answer)  # Remove quotes
+                answer = answer.strip('.,!?')  # Remove trailing punctuation
+                return answer
+        # If no pattern found, return the last meaningful line
+        lines = [line.strip() for line in text.split('\n') if line.strip()]
+        if lines:
+            return lines[-1]
+        return text.strip()
+    def fallback_solve(self, question: str) -> str:
+        """Simple fallback solution method"""
+        try:
+            # Try direct search first
+            search_result = self.tool_executor.tavily_search(question)
+            if search_result.success and search_result.result.get('answer'):
+                return search_result.result['answer']
+            # If search fails, try basic pattern matching
+            question_lower = question.lower()
+            # Handle year questions
+            if 'year' in question_lower or 'when' in question_lower:
+                # Look for 4-digit years in search results
+                if search_result.success:
+                    text = str(search_result.result)
+                    years = re.findall(r'\b(19|20)\d{2}\b', text)
+                    if years:
+                        return years[0]
+            # Handle number questions
+            if any(word in question_lower for word in ['how many', 'number', 'count']):
+                if search_result.success:
+                    text = str(search_result.result)
+                    numbers = re.findall(r'\b\d+\b', text)
+                    if numbers:
+                        return numbers[0]
+            # Default fallback
+            return "Unable to determine answer"
+        except Exception as e:
+            logger.error(f"Fallback solve error: {e}")
+            return "Error processing question"
+    def __call__(self, question: str) -> str:
+        """Main entry point for the agent"""
+        logger.info(f"Processing question: {question[:100]}...")
+        try:
+            # Solve the question
+            answer = self.solve_with_reasoning(question)
+            # Clean and validate answer
+            final_answer = answer.strip()
+            if not final_answer:
+                final_answer = self.fallback_solve(question)
+            logger.info(f"Generated answer: {final_answer}")
+            return final_answer
+        except Exception as e:
+            logger.error(f"Error in agent call: {e}")
+            logger.error(traceback.format_exc())
+            return self.fallback_solve(question)
+def run_and_submit_all(profile: gr.OAuthProfile | None):
+    """
+    Fetches all questions, runs the GAIAAgent on them, submits all answers,
+    and displays the results.
+    """
+    # --- Determine HF Space Runtime URL and Repo URL ---
+    space_id = os.getenv("SPACE_ID")  # Get the SPACE_ID for sending link to the code
+    if profile:
+        username = f"{profile.username}"
+        print(f"User logged in: {username}")
     else:
+        print("User not logged in.")
+        return "Please Login to Hugging Face with the button.", None
+    api_url = DEFAULT_API_URL
+    questions_url = f"{api_url}/questions"
+    submit_url = f"{api_url}/submit"
+    # 1. Instantiate Agent
     try:
+        agent = GAIAAgent()
     except Exception as e:
+        print(f"Error instantiating agent: {e}")
+        return f"Error initializing agent: {e}", None
+    # In the case of an app running as a Hugging Face space, this link points toward your codebase
+    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
+    print(agent_code)
+    # 2. Fetch Questions
+    print(f"Fetching questions from: {questions_url}")
+    try:
+        response = requests.get(questions_url, timeout=15)
+        response.raise_for_status()
+        questions_data = response.json()
+        if not questions_data:
+            print("Fetched questions list is empty.")
+            return "Fetched questions list is empty or invalid format.", None
+        print(f"Fetched {len(questions_data)} questions.")
+    except requests.exceptions.RequestException as e:
+        print(f"Error fetching questions: {e}")
+        return f"Error fetching questions: {e}", None
+    except requests.exceptions.JSONDecodeError as e:
+        print(f"Error decoding JSON response from questions endpoint: {e}")
+        print(f"Response text: {response.text[:500]}")
+        return f"Error decoding server response for questions: {e}", None
     except Exception as e:
+        print(f"An unexpected error occurred fetching questions: {e}")
+        return f"An unexpected error occurred fetching questions: {e}", None
+    # 3. Run your Agent
+    results_log = []
+    answers_payload = []
+    print(f"Running agent on {len(questions_data)} questions...")
+    for i, item in enumerate(questions_data):
+        task_id = item.get("task_id")
+        question_text = item.get("question")
+        if not task_id or question_text is None:
+            print(f"Skipping item with missing task_id or question: {item}")
+            continue
+        print(f"Processing question {i+1}/{len(questions_data)}: {task_id}")
+        try:
+            submitted_answer = agent(question_text)
+            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
+            results_log.append({
+                "Task ID": task_id,
+                "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
+                "Submitted Answer": submitted_answer
+            })
+            print(f"Answer for {task_id}: {submitted_answer}")
+        except Exception as e:
+            print(f"Error running agent on task {task_id}: {e}")
+            error_msg = f"AGENT ERROR: {e}"
+            answers_payload.append({"task_id": task_id, "submitted_answer": error_msg})
+            results_log.append({
+                "Task ID": task_id,
+                "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
+                "Submitted Answer": error_msg
+            })
+    if not answers_payload:
+        print("Agent did not produce any answers to submit.")
+        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+    # 4. Prepare Submission
+    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
+    status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
+    print(status_update)
+    # 5. Submit
+    print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
+        response = requests.post(submit_url, json=submission_data, timeout=60)
+        response.raise_for_status()
+        result_data = response.json()
+        final_status = (
+            f"Submission Successful!\n"
+            f"User: {result_data.get('username')}\n"
+            f"Overall Score: {result_data.get('score', 'N/A')}% "
+            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
+            f"Message: {result_data.get('message', 'No message received.')}"
+        )
+        print("Submission successful.")
+        results_df = pd.DataFrame(results_log)
+        return final_status, results_df
+    except requests.exceptions.HTTPError as e:
+        error_detail = f"Server responded with status {e.response.status_code}."
+        try:
+            error_json = e.response.json()
+            error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
+        except requests.exceptions.JSONDecodeError:
+            error_detail += f" Response: {e.response.text[:500]}"
+        status_message = f"Submission Failed: {error_detail}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except requests.exceptions.Timeout:
+        status_message = "Submission Failed: The request timed out."
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except requests.exceptions.RequestException as e:
+        status_message = f"Submission Failed: Network error - {e}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
     except Exception as e:
+        status_message = f"An unexpected error occurred during submission: {e}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+# --- Build Gradio Interface using Blocks ---
 with gr.Blocks() as demo:
+    gr.Markdown("# GAIA Benchmark AI Agent")
+    gr.Markdown(
+        """
+        **Advanced AI Agent for GAIA Benchmark**
+        This agent uses:
+        - **Qwen 2.5-7B-Instruct** for reasoning and planning
+        - **Tavily Search** for real-time information retrieval
+        - **Python Interpreter** for computational tasks
+        - **File Reading** capabilities for document analysis
+        **Instructions:**
+        1. Clone this space and set up your environment variables:
+           - `TAVILY_API_KEY`: Your Tavily API key for web search
+           - `HF_TOKEN`: Your Hugging Face token (if needed)
+        2. Log in to your Hugging Face account using the button below
+        3. Click 'Run Evaluation & Submit All Answers' to start the evaluation
+        **Expected Performance:** This agent is designed to score >30% on the GAIA benchmark.
+        """
+    )
+    gr.LoginButton()
+    run_button = gr.Button("Run Evaluation & Submit All Answers", variant="primary")
+    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
+    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
+    run_button.click(
+        fn=run_and_submit_all,
+        outputs=[status_output, results_table]
+    )
+if __name__ == "__main__":
+    print("\n" + "-"*50 + " GAIA Agent Starting " + "-"*50)
+    # Check for required environment variables
+    required_vars = ["TAVILY_API_KEY"]
+    missing_vars = []
+    for var in required_vars:
+        if not os.getenv(var):
+            missing_vars.append(var)
+    if missing_vars:
+        print(f"⚠️  Missing environment variables: {', '.join(missing_vars)}")
+        print("   Please set these variables for optimal performance.")
+    else:
+        print("✅ All required environment variables found.")
+    # Check for SPACE_HOST and SPACE_ID at startup for information
+    space_host_startup = os.getenv("SPACE_HOST")
+    space_id_startup = os.getenv("SPACE_ID")
+    if space_host_startup:
+        print(f"✅ SPACE_HOST found: {space_host_startup}")
+        print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
+    else:
+        print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
+    if space_id_startup:
+        print(f"✅ SPACE_ID found: {space_id_startup}")
+        print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
+        print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
+    else:
+        print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
+    print("-"*120 + "\n")
+    print("🚀 Launching GAIA Benchmark AI Agent...")
+    demo.launch(debug=True, share=False)