Gaia_test_ai_agent

Sleeping

App Files Files Community

kamorou commited on Jul 1

Commit

6d3c99f

verified ·

1 Parent(s): 20bc99a

Update app.py

Browse files

Files changed (1) hide show

app.py +116 -68

app.py CHANGED Viewed

@@ -250,6 +250,7 @@
 import os
 import io
 import requests
 import pandas as pd
 import gradio as gr
@@ -261,11 +262,7 @@ import operator
 from langchain_core.messages import BaseMessage, HumanMessage, ToolMessage, AIMessage, SystemMessage
 from langchain_core.tools import tool
 from langchain_huggingface import HuggingFaceEndpoint
-# <<<--- CHANGE 1: Import new components for building the agent --->>>
-from langchain.agents import AgentExecutor, create_tool_calling_agent
-from langchain_core.prompts import ChatPromptTemplate
 from langgraph.graph import StateGraph, END
-from langgraph.prebuilt import ToolNode
 from tavily import TavilyClient
 import pypdf
@@ -274,21 +271,32 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 FILES_DIR = "./files"
 os.makedirs(FILES_DIR, exist_ok=True)
-# --- System Prompt (Unchanged) ---
 AGENT_SYSTEM_PROMPT = """You are a world-class AI agent, specialized in solving complex problems from the GAIA benchmark.
 Your task is to analyze the user's question, think step-by-step, and use the provided tools to find the correct answer.
-CRITICAL INSTRUCTIONS:
-1.  **Analyze the Goal:** First, understand what the user is asking for.
-2.  **Plan & Execute:** Formulate a plan and use the available tools (`tavily_search`, `read_file`, `python_interpreter`) to gather information.
-3.  **Final Answer Format:** Once you are absolutely certain of the answer, you MUST provide it directly and concisely.
-    - DO NOT include your reasoning, thoughts, or any conversational text like 'The answer is...', 'Here is the result:', or 'Based on my search...'.
-    - Your final response must ONLY be the answer itself.
 EXAMPLES OF CORRECT FINAL ANSWERS:
-- If the question asks for a year: `2023`
-- If it asks for a name: `John Doe`
-- If it asks for a number: `42`
-- If it asks for a comma-separated list: `item1, item2, item3`
-Think, use your tools, and then provide ONLY the final, precise answer.
 """
 #
@@ -317,7 +325,7 @@ def read_file(url: str) -> str:
         response = requests.get(url)
         response.raise_for_status()
         with open(filename, 'wb') as f: f.write(response.content)
         if url.lower().endswith('.pdf'):
             try:
                 pdf_reader = pypdf.PdfReader(filename)
@@ -341,44 +349,85 @@ def python_interpreter(code: str) -> str:
 #
 # ================================================================================================
-#  ✅ 2. CONFIGURE AND BUILD THE AGENT GRAPH (CORRECTED IMPLEMENTATION)
 # ================================================================================================
 #
 class AgentState(TypedDict):
-    # <<<--- CHANGE 2: The state is now simpler. It tracks the input and the agent's output. --->>>
-    input: str
-    agent_outcome: dict | None
 def build_agent_graph():
     """Builds the LangGraph agent."""
     tools = [tavily_search, read_file, python_interpreter]
     repo_id = "CohereForAI/c4ai-command-r-plus"
     llm = HuggingFaceEndpoint(
-        repo_id=repo_id, max_new_tokens=1024, temperature=0.1,
         huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN")
     )
-    # <<<--- CHANGE 3: Create a proper agent prompt template --->>>
-    # This prompt is specifically designed to instruct the model on how to use tools.
-    prompt = ChatPromptTemplate.from_messages([
-        ("system", AGENT_SYSTEM_PROMPT),
-        ("human", "{input}"),
-        # The 'agent_scratchpad' placeholder is crucial for the agent to remember previous tool calls.
-        ("placeholder", "{agent_scratchpad}"),
-    ])
-    # <<<--- CHANGE 4: Create the tool-calling agent runnable --->>>
-    agent = create_tool_calling_agent(llm, tools, prompt)
-    # <<<--- CHANGE 5: Create the AgentExecutor which will run the agent loop --->>>
-    # This replaces the manual LangGraph loop for calling the model and tools.
-    # It's a robust, pre-built component for this exact purpose.
-    agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
-    # We are not using LangGraph for the agent loop anymore, as AgentExecutor handles it.
-    # We just need a simple callable class that invokes it.
-    return agent_executor
 #
 # ================================================================================================
@@ -387,24 +436,28 @@ def build_agent_graph():
 #
 class GaiaAgent:
     def __init__(self):
-        print("GaiaAgent initialized. Building Command R+ agent with AgentExecutor...")
-        # The agent_app is now the fully-formed AgentExecutor
         self.agent_app = build_agent_graph()
     def __call__(self, question: str) -> str:
         print(f"\n{'='*60}\nAgent received question: {question[:100]}...\n{'='*60}")
-        # <<<--- CHANGE 6: Invoke the AgentExecutor and extract the final answer --->>>
-        try:
-            # The AgentExecutor takes a dictionary and returns the final output in the 'output' key.
-            response = self.agent_app.invoke({"input": question})
-            final_answer = str(response.get("output", "")).strip()
-            print(f"\n--- Agent finished. Final Answer: {final_answer} ---\n")
-            return final_answer
-        except Exception as e:
-            print(f"An error occurred during agent execution: {e}")
-            return f"AGENT_EXECUTION_ERROR: {e}"
 # --- The rest of the file (run_and_submit_all, Gradio UI) remains the same ---
 def run_and_submit_all( profile: gr.OAuthProfile | None):
@@ -431,15 +484,13 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
-    # Instantiate the agent once to save time
-    agent_instance = GaiaAgent()
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None: continue
         try:
-            # Reuse the same agent instance
             submitted_answer = agent_instance(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
@@ -471,14 +522,11 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
         return status_message, results_df
 with gr.Blocks() as demo:
-    gr.Markdown("# GAIA Agent Final Assessment (Open Source: Command R+ - Corrected)")
     gr.Markdown(
         """
-        **Instructor's Note:** This version corrects the agent construction logic to be compatible with the `HuggingFaceEndpoint`.
-        It now uses the standard `create_tool_calling_agent` and `AgentExecutor` from LangChain for robust tool use.
-        1.  Ensure you have a **`HUGGINGFACEHUB_API_TOKEN`** and a **`TAVILY_API_KEY`** set in your Space secrets.
-        2.  Your `requirements.txt` should include `langchain`, `langchain-huggingface`, and `langchain-core`.
-        3.  Let's run the evaluation again!
         """
     )
     gr.LoginButton()

 import os
 import io
+import json
 import requests
 import pandas as pd
 import gradio as gr
 from langchain_core.messages import BaseMessage, HumanMessage, ToolMessage, AIMessage, SystemMessage
 from langchain_core.tools import tool
 from langchain_huggingface import HuggingFaceEndpoint
 from langgraph.graph import StateGraph, END
 from tavily import TavilyClient
 import pypdf
 FILES_DIR = "./files"
 os.makedirs(FILES_DIR, exist_ok=True)
+# --- System Prompt (Updated for Manual Tool Calling) ---
 AGENT_SYSTEM_PROMPT = """You are a world-class AI agent, specialized in solving complex problems from the GAIA benchmark.
 Your task is to analyze the user's question, think step-by-step, and use the provided tools to find the correct answer.
+**TOOL USAGE INSTRUCTIONS:**
+When you need to use a tool, you MUST respond with a JSON object containing the tool name and its arguments. The JSON object should have two keys: "tool_name" and "parameters".
+Here is an example of how to call the `tavily_search` tool:
+```json
+{
+  "tool_name": "tavily_search",
+  "parameters": {
+    "query": "What was the score of the 2023 FIFA Women's World Cup final?"
+  }
+}```
+**CRITICAL FINAL ANSWER INSTRUCTIONS:**
+Once you have gathered all the necessary information and are absolutely certain of the answer, you MUST provide it directly and concisely.
+- Your final response must ONLY be the answer itself.
+- DO NOT wrap the final answer in a JSON object or include any conversational text like 'The answer is...'.
 EXAMPLES OF CORRECT FINAL ANSWERS:
+- `2023`
+- `John Doe`
+- `42`
+- `broccoli, celery, lettuce, sweet potatoes`
 """
 #
         response = requests.get(url)
         response.raise_for_status()
         with open(filename, 'wb') as f: f.write(response.content)
         if url.lower().endswith('.pdf'):
             try:
                 pdf_reader = pypdf.PdfReader(filename)
 #
 # ================================================================================================
+#  ✅ 2. CONFIGURE AND BUILD THE AGENT GRAPH (MANUAL LANGGRAPH IMPLEMENTATION)
 # ================================================================================================
 #
 class AgentState(TypedDict):
+    messages: Annotated[List[BaseMessage], operator.add]
 def build_agent_graph():
     """Builds the LangGraph agent."""
     tools = [tavily_search, read_file, python_interpreter]
+    tool_map = {tool.name: tool for tool in tools}
     repo_id = "CohereForAI/c4ai-command-r-plus"
     llm = HuggingFaceEndpoint(
+        repo_id=repo_id, max_new_tokens=1024, temperature=0,
         huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN")
     )
+    def call_model(state: AgentState):
+        """Invokes the LLM and wraps the response in an AIMessage."""
+        messages = state['messages']
+        # We combine the system prompt with the rest of the conversation.
+        prompt_str = ""
+        for msg in messages:
+            if isinstance(msg, SystemMessage):
+                prompt_str += f"<|SYSTEM|>\n{msg.content}\n"
+            elif isinstance(msg, HumanMessage):
+                prompt_str += f"<|USER|>\n{msg.content}\n"
+            elif isinstance(msg, AIMessage):
+                prompt_str += f"<|ASSISTANT|>\n{msg.content}\n"
+            elif isinstance(msg, ToolMessage):
+                prompt_str += f"<|TOOL_RESULT|>\n{msg.content}\n"
+        prompt_str += "<|ASSISTANT|>"
+        response_text = llm.invoke(prompt_str)
+        return {"messages": [AIMessage(content=response_text)]}
+    def should_continue(state: AgentState) -> str:
+        """Determines whether to call a tool or end the loop."""
+        last_message_content = state['messages'][-1].content.strip()
+        # A simple check: if the response looks like a JSON object, it's a tool call.
+        if last_message_content.startswith('{') and last_message_content.endswith('}'):
+             # More robust check for JSON tool call
+            try:
+                json.loads(last_message_content)
+                return "action"
+            except json.JSONDecodeError:
+                return "end" # It's not valid JSON, so it must be the final answer
+        else:
+            return "end"
+    def call_tool_node(state: AgentState):
+        """Parses the tool call from the LLM output and executes it."""
+        last_message_content = state['messages'][-1].content.strip()
+        try:
+            tool_call_data = json.loads(last_message_content)
+            tool_name = tool_call_data.get("tool_name")
+            parameters = tool_call_data.get("parameters", {})
+            if tool_name not in tool_map:
+                error_message = f"Error: Tool '{tool_name}' not found."
+                return {"messages": [ToolMessage(content=error_message, tool_call_id="error")]}
+            selected_tool = tool_map[tool_name]
+            tool_output = selected_tool.invoke(parameters)
+            return {"messages": [ToolMessage(content=str(tool_output), tool_call_id=tool_name)]}
+        except Exception as e:
+            error_message = f"Error processing tool call: {e}. Content: '{last_message_content}'"
+            return {"messages": [ToolMessage(content=error_message, tool_call_id="error")]}
+    workflow = StateGraph(AgentState)
+    workflow.add_node("agent", call_model)
+    workflow.add_node("action", call_tool_node)
+    workflow.set_entry_point("agent")
+    workflow.add_conditional_edges("agent", should_continue, {"action": "action", "end": END})
+    workflow.add_edge('action', 'agent')
+    return workflow.compile()
 #
 # ================================================================================================
 #
 class GaiaAgent:
     def __init__(self):
+        print("GaiaAgent initialized. Building Command R+ agent with manual LangGraph loop...")
         self.agent_app = build_agent_graph()
     def __call__(self, question: str) -> str:
         print(f"\n{'='*60}\nAgent received question: {question[:100]}...\n{'='*60}")
+        initial_input = {
+            "messages": [
+                SystemMessage(content=AGENT_SYSTEM_PROMPT),
+                HumanMessage(content=question)
+            ]
+        }
+        final_state = None
+        for i, step in enumerate(self.agent_app.stream(initial_input, {"recursion_limit": 15})):
+            if i == 0: print("--- Starting Agentic Loop ---")
+            print(f"--- Step {i+1} ---")
+            print(step)
+            final_state = list(step.values())[0] # Get the state from the graph step
+        final_answer_message = final_state['messages'][-1]
+        final_answer = str(final_answer_message.content).strip()
+        print(f"\n--- Agent finished. Final Answer: {final_answer} ---\n")
+        return final_answer
 # --- The rest of the file (run_and_submit_all, Gradio UI) remains the same ---
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
+    agent_instance = GaiaAgent() # Instantiate the agent once
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None: continue
         try:
             submitted_answer = agent_instance(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         return status_message, results_df
 with gr.Blocks() as demo:
+    gr.Markdown("# GAIA Agent Final Assessment (Open Source: Command R+ - Final)")
     gr.Markdown(
         """
+        **Instructor's Note:** This version uses a robust, manual LangGraph loop to handle tool calls for the `HuggingFaceEndpoint`.
+        It explicitly tells the model to generate JSON for tool calls and parses this JSON from the text output. This is the correct, fundamental way to build agents with models that don't support modern tool-binding abstractions.
         """
     )
     gr.LoginButton()