Gaia_test_ai_agent

Running

App Files Files Community

kamorou commited on Jul 1

Commit

ce897b3

verified ·

1 Parent(s): 80e320c

Update app.py

Browse files

Files changed (1) hide show

app.py +220 -178

app.py CHANGED Viewed

@@ -249,6 +249,7 @@
 #
 import os
 import io
 import requests
 import pandas as pd
 import gradio as gr
@@ -256,12 +257,11 @@ from contextlib import redirect_stdout
 from typing import TypedDict, Annotated, List
 import operator
-# --- LangChain & LangGraph Imports (from the pinned versions) ---
-from langchain_core.messages import BaseMessage, HumanMessage, ToolMessage
 from langchain_core.tools import tool
-from langchain_cohere.chat_models import ChatCohere
 from langgraph.graph import StateGraph, END
-from langgraph.prebuilt import ToolNode
 from tavily import TavilyClient
 import pypdf
@@ -270,198 +270,240 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 FILES_DIR = "./files"
 os.makedirs(FILES_DIR, exist_ok=True)
-# --- System Prompt (Unchanged) ---
 AGENT_SYSTEM_PROMPT = """You are a world-class AI agent, specialized in solving complex problems from the GAIA benchmark.
 Your task is to analyze the user's question, think step-by-step, and use the provided tools to find the correct answer.
-CRITICAL INSTRUCTIONS:
-1.  **Analyze the Goal:** First, understand what the user is asking for.
-2.  **Plan & Execute:** Formulate a plan and use the available tools (`tavily_search`, `read_file`, `python_interpreter`) to gather information.
-3.  **Final Answer Format:** Once you are absolutely certain of the answer, you MUST provide it directly and concisely.
-    - DO NOT include your reasoning, thoughts, or any conversational text like 'The answer is...', 'Here is the result:', or 'Based on my search...'.
-    - Your final response must ONLY be the answer itself.
-EXAMPLES OF CORRECT FINAL ANSWERS:
-- If the question asks for a year: `2023`
-- If it asks for a name: `John Doe`
-- If it asks for a number: `42`
-- If it asks for a comma-separated list: `item1, item2, item3`
 Think, use your tools, and then provide ONLY the final, precise answer.
 """
-#
-# ================================================================================================
-#  ✅ 1. DEFINE THE AGENT'S TOOLS (Unchanged)
-# ================================================================================================
-#
 tavily = TavilyClient(api_key=os.getenv("TAVILY_API_KEY"))
 @tool
 def tavily_search(query: str) -> str:
-    """Uses the Tavily Search API to find information on the web."""
-    print(f"--- Calling Tavily Search Tool with query: {query} ---")
-    try:
-        result = tavily.search(query=query, search_depth="advanced")
-        return f"Search results for '{query}':\n" + "\n".join([f"- {r['content']}" for r in result['results']])
-    except Exception as e: return f"Error during Tavily search: {e}"
 @tool
 def read_file(url: str) -> str:
-    """Downloads and reads the content of a file (text or PDF) from a URL."""
-    print(f"--- Calling Read File Tool with URL: {url} ---")
-    try:
-        filename = os.path.join(FILES_DIR, os.path.basename(url))
-        response = requests.get(url)
-        response.raise_for_status()
-        with open(filename, 'wb') as f: f.write(response.content)
-        if url.lower().endswith('.pdf'):
-            try:
-                pdf_reader = pypdf.PdfReader(filename)
-                return f"Successfully read PDF file '{filename}'. Content:\n\n{''.join(p.extract_text() for p in pdf_reader.pages)}"
-            except Exception as e: return f"Error reading PDF file: {e}"
-        else:
-            try:
-                with open(filename, 'r', encoding='utf-8') as f: return f"Successfully read text file '{filename}'. Content:\n\n{f.read()}"
-            except UnicodeDecodeError: return f"Successfully downloaded binary file '{filename}'. Cannot display content as text."
-    except requests.exceptions.RequestException as e: return f"Error downloading or reading file: {e}"
 @tool
 def python_interpreter(code: str) -> str:
-    """Executes Python code and returns its stdout."""
-    print(f"--- Calling Python Interpreter Tool with code:\n{code} ---")
-    output_buffer = io.StringIO()
-    try:
-        with redirect_stdout(output_buffer): exec(code, globals())
-        return f"Code executed successfully. Output:\n{output_buffer.getvalue()}"
-    except Exception as e: return f"Error executing Python code: {e}"
-#
-# ================================================================================================
-#  ✅ 2. CONFIGURE AND BUILD THE AGENT (Stable LangGraph Method)
-# ================================================================================================
-#
 class AgentState(TypedDict):
-    messages: Annotated[List[BaseMessage], operator.add]
 def build_agent_graph():
-    """Builds the agent using a stable LangGraph loop."""
-    tools = [tavily_search, read_file, python_interpreter]
-    llm = ChatCohere(model="command-r-plus", temperature=0, cohere_api_key=os.getenv("COHERE_API_KEY"))
-    # This is the modern, correct way to make the LLM aware of tools.
-    llm_with_tools = llm.bind_tools(tools)
-    def call_model(state: AgentState):
-        """Invokes the LLM with the current state."""
-        response = llm_with_tools.invoke(state['messages'])
-        return {"messages": [response]}
-    def should_continue(state: AgentState):
-        """Checks the last message for tool calls."""
-        if state['messages'][-1].tool_calls:
             return "action"
         return "end"
-    # The ToolNode is a pre-built component that executes tools.
-    tool_node = ToolNode(tools)
-    workflow = StateGraph(AgentState)
-    workflow.add_node("agent", call_model)
-    workflow.add_node("action", tool_node)
-    workflow.set_entry_point("agent")
-    workflow.add_conditional_edges("agent", should_continue, {"action": "action", "end": END})
-    workflow.add_edge('action', 'agent')
-    return workflow.compile()
-#
-# ================================================================================================
-#  ✅ 3. AGENT CLASS AND EVALUATION LOGIC
-# ================================================================================================
-#
 class GaiaAgent:
-    def __init__(self):
-        print("GaiaAgent initialized. Building stable LangGraph agent...")
-        self.agent_app = build_agent_graph()
-    def __call__(self, question: str) -> str:
-        print(f"\n{'='*60}\nAgent received question: {question[:100]}...\n{'='*60}")
-        try:
-            initial_input = {"messages": [HumanMessage(content=f"{AGENT_SYSTEM_PROMPT}\n\nUSER QUESTION: {question}")]}
-            final_state = None
-            for step in self.agent_app.stream(initial_input, {"recursion_limit": 15}):
-                final_state = step
-            # The final answer is in the last 'agent' step's AIMessage
-            final_answer = final_state['agent']['messages'][-1].content
-            print(f"\n--- Agent finished. Final Answer: {final_answer} ---\n")
-            return str(final_answer).strip()
-        except Exception as e:
-            print(f"An error occurred during agent execution: {e}")
-            return f"AGENT_EXECUTION_ERROR: {e}"
-# --- The rest of the file is unchanged ---
-def run_and_submit_all( profile: gr.OAuthProfile | None):
-    space_id = os.getenv("SPACE_ID")
-    if not profile: return "Please Login to Hugging Face with the button.", None
-    username = f"{profile.username}"
-    print(f"User logged in: {username}")
-    api_url = DEFAULT_API_URL
-    questions_url = f"{api_url}/questions"
-    submit_url = f"{api_url}/submit"
-    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     try:
-        response = requests.get(questions_url, timeout=15)
-        response.raise_for_status()
-        questions_data = response.json()
-    except Exception as e: return f"An unexpected error occurred fetching questions: {e}", None
-    results_log, answers_payload = [], []
-    agent_instance = GaiaAgent()
-    for item in questions_data:
-        task_id, question_text = item.get("task_id"), item.get("question")
-        if not task_id or question_text is None: continue
-        try:
-            submitted_answer = agent_instance(question_text)
-            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
-            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
-        except Exception as e:
-             print(f"Error running agent on task {task_id}: {e}")
-             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
-    if not answers_payload: return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
-    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
     try:
-        response = requests.post(submit_url, json=submission_data, timeout=90)
-        response.raise_for_status()
-        result_data = response.json()
-        final_status = (
-            f"Submission Successful!\n"
-            f"User: {result_data.get('username')}\n"
-            f"Overall Score: {result_data.get('score', 'N/A')}% "
-            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
-            f"Message: {result_data.get('message', 'No message received.')}"
-        )
-        return final_status, pd.DataFrame(results_log)
-    except Exception as e: return f"An unexpected error in submission: {e}", pd.DataFrame(results_log)
-with gr.Blocks() as demo:
-    gr.Markdown("# GAIA Agent Final Assessment (Stable Environment)")
-    gr.Markdown(
-        """
-        **Instructor's Note:** This version uses pinned library versions in `requirements.txt` to create a stable, reproducible environment.
-        This is the definitive solution to the previous import errors.
-        1.  Ensure your **`requirements.txt`** is correct.
-        2.  Ensure you have a **`COHERE_API_KEY`** and a **`TAVILY_API_KEY`** set in your Space secrets.
-        """
     )
-    gr.LoginButton()
-    run_button = gr.Button("Run Evaluation & Submit All Answers")
-    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
-    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
-    run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
-if __name__ == "__main__":
-    print("\n" + "-"*30 + " App Starting " + "-"*30)
-    demo.launch(debug=True, share=False, ssr_mode=False)

 #
 import os
 import io
+import json
 import requests
 import pandas as pd
 import gradio as gr
 from typing import TypedDict, Annotated, List
 import operator
+# --- LangChain & LangGraph Imports ---
+from langchain_core.messages import BaseMessage, HumanMessage, ToolMessage, AIMessage, SystemMessage
 from langchain_core.tools import tool
+from langchain_huggingface import HuggingFaceEndpoint
 from langgraph.graph import StateGraph, END
 from tavily import TavilyClient
 import pypdf
 FILES_DIR = "./files"
 os.makedirs(FILES_DIR, exist_ok=True)
+# --- System Prompt (Updated for Manual JSON Tool Calling) ---
+# This prompt instructs the model to generate JSON, a robust method for tool calls.
 AGENT_SYSTEM_PROMPT = """You are a world-class AI agent, specialized in solving complex problems from the GAIA benchmark.
 Your task is to analyze the user's question, think step-by-step, and use the provided tools to find the correct answer.
+**TOOL USAGE INSTRUCTIONS:**
+When you need to use a tool, you MUST respond with a JSON object containing the tool name and its arguments. The JSON object should have two keys: "tool_name" and "parameters".
+Here is an example of how to call the `tavily_search` tool:
+```json
+{
+  "tool_name": "tavily_search",
+  "parameters": {
+    "query": "Who won the last FIFA World Cup?"
+  }
+}
+Use code with caution.
+Python
+CRITICAL FINAL ANSWER INSTRUCTIONS:
+Once you have gathered all the necessary information and are absolutely certain of the answer, you MUST provide it directly and concisely.
+Your final response must ONLY be the answer itself.
+DO NOT wrap the final answer in a JSON object or include any conversational text.
 Think, use your tools, and then provide ONLY the final, precise answer.
 """
+###===============================================================================================
 tavily = TavilyClient(api_key=os.getenv("TAVILY_API_KEY"))
 @tool
 def tavily_search(query: str) -> str:
+"""Uses the Tavily Search API to find information on the web."""
+print(f"--- Calling Tavily Search Tool with query: {query} ---")
+try:
+result = tavily.search(query=query, search_depth="advanced")
+return f"Search results for '{query}':\n" + "\n".join([f"- {r['content']}" for r in result['results']])
+except Exception as e: return f"Error during Tavily search: {e}"
 @tool
 def read_file(url: str) -> str:
+"""Downloads and reads the content of a file (text or PDF) from a URL."""
+print(f"--- Calling Read File Tool with URL: {url} ---")
+try:
+filename = os.path.join(FILES_DIR, os.path.basename(url))
+response = requests.get(url)
+response.raise_for_status()
+with open(filename, 'wb') as f: f.write(response.content)
+if url.lower().endswith('.pdf'):
+try:
+pdf_reader = pypdf.PdfReader(filename)
+return f"Successfully read PDF file '{filename}'. Content:\n\n{''.join(p.extract_text() for p in pdf_reader.pages)}"
+except Exception as e: return f"Error reading PDF file: {e}"
+else:
+try:
+with open(filename, 'r', encoding='utf-8') as f: return f"Successfully read text file '{filename}'. Content:\n\n{f.read()}"
+except UnicodeDecodeError: return f"Successfully downloaded binary file '{filename}'. Cannot display content as text."
+except requests.exceptions.RequestException as e: return f"Error downloading or reading file: {e}"
 @tool
 def python_interpreter(code: str) -> str:
+"""Executes Python code and returns its stdout."""
+print(f"--- Calling Python Interpreter Tool with code:\n{code} ---")
+output_buffer = io.StringIO()
+try:
+with redirect_stdout(output_buffer): exec(code, globals())
+return f"Code executed successfully. Output:\n{output_buffer.getvalue()}"
+except Exception as e: return f"Error executing Python code: {e}"
+##================================================================================================
+#✅ 2. CONFIGURE AND BUILD THE AGENT (with Qwen2 and Manual Tool Calling)
+#================================================================================================
 class AgentState(TypedDict):
+messages: Annotated[List[BaseMessage], operator.add]
 def build_agent_graph():
+"""Builds the agent using a manual LangGraph loop with the HuggingFaceEndpoint."""
+tools = [tavily_search, read_file, python_interpreter]
+tool_map = {tool.name: tool for tool in tools}
+Generated code
+# Using Qwen2-72B-Instruct model via HuggingFaceEndpoint
+repo_id = "Qwen/Qwen2-72B-Instruct"
+llm = HuggingFaceEndpoint(
+    repo_id=repo_id,
+    max_new_tokens=1024,
+    temperature=0.1,
+    huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN")
+)
+def call_model(state: AgentState):
+    """Invokes the LLM and wraps the response in an AIMessage."""
+    # Qwen2 Instruct uses a specific chat template. We build it manually.
+    prompt_str = ""
+    for msg in state['messages']:
+        role = ""
+        if isinstance(msg, SystemMessage): role = "system"
+        elif isinstance(msg, HumanMessage): role = "user"
+        elif isinstance(msg, AIMessage): role = "assistant"
+        elif isinstance(msg, ToolMessage): continue # We'll handle tool results differently
+        if role: prompt_str += f"<|im_start|>{role}\n{msg.content}<|im_end|>\n"
+    # Add results from the last tool call, if any
+    if isinstance(state['messages'][-1], ToolMessage):
+         prompt_str += f"<|im_start|>user\nTool output:\n{state['messages'][-1].content}<|im_end|>\n"
+    prompt_str += "<|im_start|>assistant\n"
+    response_text = llm.invoke(prompt_str)
+    return {"messages": [AIMessage(content=response_text)]}
+def should_continue(state: AgentState) -> str:
+    """Determines whether to call a tool or end the loop."""
+    last_message_content = state['messages'][-1].content.strip()
+    # A simple check for JSON is a reliable way to detect tool calls.
+    if "```json" in last_message_content:
+        return "action"
+    if last_message_content.startswith('{') and last_message_content.endswith('}'):
+        try:
+            json.loads(last_message_content)
             return "action"
+        except json.JSONDecodeError:
+            return "end" # Not valid JSON, must be the final answer
+    else:
         return "end"
+def call_tool_node(state: AgentState):
+    """Parses the JSON tool call from the LLM and executes it."""
+    last_message_content = state['messages'][-1].content.strip()
+    # Extract JSON from markdown code block if present
+    if "```json" in last_message_content:
+        json_str = last_message_content.split("```json").split("```")[0].strip()
+    else:
+        json_str = last_message_content
+    try:
+        tool_call_data = json.loads(json_str)
+        tool_name = tool_call_data.get("tool_name")
+        parameters = tool_call_data.get("parameters", {})
+        if tool_name not in tool_map:
+            return {"messages": [ToolMessage(content=f"Error: Tool '{tool_name}' not found.", tool_call_id="error")]}
+        selected_tool = tool_map[tool_name]
+        tool_output = selected_tool.invoke(parameters)
+        return {"messages": [ToolMessage(content=str(tool_output), tool_call_id=tool_name)]}
+    except Exception as e:
+        return {"messages": [ToolMessage(content=f"Error parsing tool call: {e}. Content: '{last_message_content}'", tool_call_id="error")]}
+workflow = StateGraph(AgentState)
+workflow.add_node("agent", call_model)
+workflow.add_node("action", call_tool_node)
+workflow.set_entry_point("agent")
+workflow.add_conditional_edges("agent", should_continue, {"action": "action", "end": END})
+workflow.add_edge('action', 'agent')
+return workflow.compile()
+Use code with caution.
+#================================================================================================
+#✅ 3. AGENT CLASS AND EVALUATION LOGIC
+#================================================================================================
 class GaiaAgent:
+def init(self):
+print("GaiaAgent initialized. Building agent with Qwen/Qwen2-72B-Instruct...")
+self.agent_app = build_agent_graph()
+Generated code
+def __call__(self, question: str) -> str:
+    print(f"\n{'='*60}\nAgent received question: {question[:100]}...\n{'='*60}")
     try:
+        initial_input = {"messages": [SystemMessage(content=AGENT_SYSTEM_PROMPT), HumanMessage(content=question)]}
+        final_state = None
+        for step in self.agent_app.stream(initial_input, {"recursion_limit": 15}):
+            final_state = list(step.values())[0]
+        final_answer = final_state['messages'][-1].content
+        return str(final_answer).strip()
+    except Exception as e:
+        print(f"An error occurred during agent execution: {e}")
+        return f"AGENT_EXECUTION_ERROR: {e}"
+Use code with caution.
+--- The rest of the file is unchanged ---
+def run_and_submit_all( profile: gr.OAuthProfile | None):
+space_id = os.getenv("SPACE_ID")
+if not profile: return "Please Login to Hugging Face with the button.", None
+username = f"{profile.username}"
+print(f"User logged in: {username}")
+api_url = DEFAULT_API_URL
+questions_url = f"{api_url}/questions"
+submit_url = f"{api_url}/submit"
+agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
+Generated code
+try:
+    response = requests.get(questions_url, timeout=15)
+    response.raise_for_status()
+    questions_data = response.json()
+except Exception as e: return f"An unexpected error occurred fetching questions: {e}", None
+results_log, answers_payload = [], []
+agent_instance = GaiaAgent()
+for item in questions_data:
+    task_id, question_text = item.get("task_id"), item.get("question")
+    if not task_id or question_text is None: continue
     try:
+        submitted_answer = agent_instance(question_text)
+        answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
+        results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
+    except Exception as e:
+         print(f"Error running agent on task {task_id}: {e}")
+         results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
+if not answers_payload: return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
+try:
+    response = requests.post(submit_url, json=submission_data, timeout=90)
+    response.raise_for_status()
+    result_data = response.json()
+    final_status = (
+        f"Submission Successful!\n"
+        f"User: {result_data.get('username')}\n"
+        f"Overall Score: {result_data.get('score', 'N/A')}% "
+        f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
+        f"Message: {result_data.get('message', 'No message received.')}"
     )
+    return final_status, pd.DataFrame(results_log)
+except Exception as e: return f"An unexpected error in submission: {e}", pd.DataFrame(results_log)
+Use code with caution.
+with gr.Blocks() as demo:
+gr.Markdown("# GAIA Agent Final Assessment (Qwen2-72B-Instruct)")
+gr.Markdown(
+"""
+Instructor's Note: This version uses the powerful Qwen/Qwen2-72B-Instruct model from the Hugging Face Hub.
+It relies on a robust manual LangGraph loop to handle tool calls by instructing the model to generate JSON.
+1. Ensure you have a HUGGINGFACEHUB_API_TOKEN and TAVILY_API_KEY set in your secrets.
+2. Ensure your requirements.txt is updated. Good luck!
+"""
+)
+gr.LoginButton()
+run_button = gr.Button("Run Evaluation & Submit All Answers")
+status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
+results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
+run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
+if name == "main":
+print("\n" + "-"*30 + " App Starting " + "-"*30)
+demo.launch(debug=True, share=False, ssr_mode=False)