HF_Agents_Course_GAIA_Agent

Sleeping

App Files Files Community

agercas commited on Jun 7, 2025

Commit

b6088cd

1 Parent(s): 283931c

refactor code

Browse files

Files changed (4) hide show

src/agent.py +0 -0
src/agents/langgraph_agent.py +303 -0
src/agents/smolagents_agent.py +240 -0
src/{tools.py → tools/custom_tools.py} +0 -0

src/agent.py DELETED Viewed

File without changes

src/agents/langgraph_agent.py ADDED Viewed

	@@ -0,0 +1,303 @@

+from typing import Annotated, Sequence, TypedDict, Literal
+from langchain_core.messages import BaseMessage, SystemMessage, HumanMessage, ToolMessage
+from langchain_core.runnables import RunnableConfig
+from langgraph.graph.message import add_messages
+from langgraph.graph import StateGraph, END
+from pydantic import BaseModel, Field
+from langchain.chat_models import init_chat_model
+import json
+# Import tools
+from langchain_community.tools import DuckDuckGoSearchRun
+from langchain_community.tools.pubmed.tool import PubmedQueryRun
+from langchain_community.tools.semanticscholar.tool import SemanticScholarQueryRun
+from langchain_community.tools.arxiv import ArxivQueryRun
+from langchain_community.tools.wikidata.tool import WikidataQueryRun
+from langchain_community.tools import WikipediaQueryRun
+from langchain_community.utilities import WikipediaAPIWrapper
+from langchain_experimental.utilities import PythonREPL
+from langchain_core.tools import Tool
+# Set up tools
+python_repl = PythonREPL()
+repl_tool = Tool(
+    name="python_repl",
+    description="A Python shell. Use this to execute python commands. Input should be a valid python command. If you want to see the output of a value, you should print it out with `print(...)`.",
+    func=python_repl.run,
+)
+# Initialize all tools
+tools = [
+    DuckDuckGoSearchRun(),
+    PubmedQueryRun(),
+    SemanticScholarQueryRun(),
+    ArxivQueryRun(),
+    WikidataQueryRun(),
+    WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper()),
+    repl_tool
+]
+# Initialize Gemini model
+model = init_chat_model("gemini-2.0-flash", model_provider="google_genai")
+model_with_tools = model.bind_tools(tools)
+# Create tools lookup
+tools_by_name = {tool.name: tool for tool in tools}
+# Pydantic models for structured output
+class ToolSufficiencyResponse(BaseModel):
+    """Response for tool sufficiency check"""
+    sufficient: bool = Field(description="Whether the available tools are sufficient to answer the question")
+    reasoning: str = Field(description="Brief reasoning for the decision")
+class FinalAnswer(BaseModel):
+    """Final answer structure"""
+    answer: str = Field(description="The comprehensive answer to the user's question")
+    confidence: Literal["high", "medium", "low"] = Field(description="Confidence level in the answer")
+    sources_used: list[str] = Field(description="List of tools/sources that were used to generate the answer")
+# Define graph state
+class AgentState(TypedDict):
+    """The state of the agent."""
+    messages: Annotated[Sequence[BaseMessage], add_messages]
+    llm_call_count: int
+    max_llm_calls: int
+# Node functions
+def check_tool_sufficiency(state: AgentState, config: RunnableConfig):
+    """Check if available tools are sufficient to answer the question"""
+    # Get the user's question
+    user_message = None
+    for msg in state["messages"]:
+        if msg.type == "human":
+            user_message = msg.content
+            break
+    # Create system prompt for sufficiency check
+    available_tools_desc = "\n".join([f"- {tool.name}: {tool.description}" for tool in tools])
+    system_prompt = f"""You are an AI assistant that needs to determine if the available tools are sufficient to answer a user's question.
+Available tools:
+{available_tools_desc}
+Your task is to analyze the user's question and determine if these tools provide sufficient capability to answer it comprehensively.
+Consider:
+- Can the question be answered with web search, academic papers, or computational tools?
+- Does the question require real-time data, personal information, or capabilities not available through these tools?
+- Can you break down the question into parts that these tools can handle?
+Be generous in your assessment - if there's a reasonable path to answer the question using these tools, respond with sufficient=True."""
+    # Use structured output for sufficiency check
+    structured_model = model.with_structured_output(ToolSufficiencyResponse)
+    messages = [
+        SystemMessage(content=system_prompt),
+        HumanMessage(content=f"Question to analyze: {user_message}")
+    ]
+    response = structured_model.invoke(messages, config)
+    # Add response to messages for context
+    response_message = SystemMessage(
+        content=f"Tool sufficiency check: {'Sufficient' if response.sufficient else 'Insufficient'}. Reasoning: {response.reasoning}"
+    )
+    return {
+        "messages": [response_message],
+        "tool_sufficiency": response.sufficient
+    }
+def call_model(state: AgentState, config: RunnableConfig):
+    """Call the model (ReAct agent LLM node)"""
+    system_prompt = SystemMessage(
+        content="""You are a helpful AI assistant with access to various tools. Use the tools available to you to answer the user's question comprehensively.
+Think step by step:
+1. Analyze what information you need
+2. Use appropriate tools to gather that information
+3. Synthesize the information to provide a complete answer
+Be thorough but efficient with your tool usage."""
+    )
+    response = model_with_tools.invoke([system_prompt] + state["messages"], config)
+    # Increment LLM call count
+    new_count = state.get("llm_call_count", 0) + 1
+    return {
+        "messages": [response],
+        "llm_call_count": new_count
+    }
+def tool_node(state: AgentState):
+    """Execute tools based on the last message's tool calls"""
+    outputs = []
+    last_message = state["messages"][-1]
+    for tool_call in last_message.tool_calls:
+        try:
+            tool_result = tools_by_name[tool_call["name"]].invoke(tool_call["args"])
+            outputs.append(
+                ToolMessage(
+                    content=str(tool_result),
+                    name=tool_call["name"],
+                    tool_call_id=tool_call["id"],
+                )
+            )
+        except Exception as e:
+            outputs.append(
+                ToolMessage(
+                    content=f"Error executing tool {tool_call['name']}: {str(e)}",
+                    name=tool_call["name"],
+                    tool_call_id=tool_call["id"],
+                )
+            )
+    return {"messages": outputs}
+def final_answer_node(state: AgentState, config: RunnableConfig):
+    """Generate final structured answer based on conversation history"""
+    system_prompt = SystemMessage(
+        content="""You are tasked with providing a final, comprehensive answer based on the conversation history and tool usage.
+Analyze all the information gathered from the tools and provide:
+1. A clear, comprehensive answer to the original question
+2. Your confidence level in this answer
+3. The sources/tools that were used
+Be honest about limitations and indicate your confidence level appropriately."""
+    )
+    # Get the original user question
+    user_question = None
+    for msg in state["messages"]:
+        if msg.type == "human":
+            user_question = msg.content
+            break
+    # Create structured output model
+    structured_model = model.with_structured_output(FinalAnswer)
+    messages = [
+        system_prompt,
+        HumanMessage(content=f"Original question: {user_question}"),
+        SystemMessage(content="Based on the following conversation history, provide your final answer:")
+    ] + state["messages"]
+    response = structured_model.invoke(messages, config)
+    return {
+        "messages": [SystemMessage(content=f"Final Answer: {response.answer}")],
+        "final_answer": response
+    }
+# Edge functions
+def should_continue_sufficiency(state: AgentState):
+    """Decide whether tools are sufficient"""
+    # Check if we have a tool sufficiency result
+    for msg in reversed(state["messages"]):
+        if "Tool sufficiency check: Sufficient" in msg.content:
+            return "sufficient"
+        elif "Tool sufficiency check: Insufficient" in msg.content:
+            return "insufficient"
+    return "insufficient"  # Default to insufficient if unclear
+def should_continue_react(state: AgentState):
+    """Decide whether to continue with ReAct loop or move to final answer"""
+    messages = state["messages"]
+    last_message = messages[-1]
+    llm_call_count = state.get("llm_call_count", 0)
+    max_calls = state.get("max_llm_calls", 4)
+    # If we've reached the maximum number of LLM calls, force stop
+    if llm_call_count >= max_calls:
+        return "final_answer"
+    # If there are no tool calls, we're done with ReAct loop
+    if not hasattr(last_message, 'tool_calls') or not last_message.tool_calls:
+        return "final_answer"
+    # Otherwise continue with tools
+    return "continue"
+# Build the graph
+def create_react_agent_graph():
+    """Create and return the compiled ReAct agent graph"""
+    workflow = StateGraph(AgentState)
+    # Add nodes
+    workflow.add_node("check_sufficiency", check_tool_sufficiency)
+    workflow.add_node("agent", call_model)
+    workflow.add_node("tools", tool_node)
+    workflow.add_node("final_answer", final_answer_node)
+    # Set entry point
+    workflow.set_entry_point("check_sufficiency")
+    # Add conditional edge from sufficiency check
+    workflow.add_conditional_edges(
+        "check_sufficiency",
+        should_continue_sufficiency,
+        {
+            "sufficient": "agent",
+            "insufficient": END
+        }
+    )
+    # Add conditional edge from agent
+    workflow.add_conditional_edges(
+        "agent",
+        should_continue_react,
+        {
+            "continue": "tools",
+            "final_answer": "final_answer"
+        }
+    )
+    # Add edge from tools back to agent
+    workflow.add_edge("tools", "agent")
+    # Add edge from final_answer to END
+    workflow.add_edge("final_answer", END)
+    return workflow.compile()
+# Helper function for running the agent
+def run_agent(question: str, max_llm_calls: int = 4):
+    """Run the ReAct agent with a question"""
+    graph = create_react_agent_graph()
+    initial_state = {
+        "messages": [HumanMessage(content=question)],
+        "llm_call_count": 0,
+        "max_llm_calls": max_llm_calls
+    }
+    # Stream the execution
+    print(f"Question: {question}")
+    print("=" * 50)
+    for step in graph.stream(initial_state):
+        for node, output in step.items():
+            print(f"\n--- {node.upper()} ---")
+            if "messages" in output and output["messages"]:
+                for msg in output["messages"]:
+                    if hasattr(msg, 'content'):
+                        print(f"{msg.__class__.__name__}: {msg.content}")
+                    elif hasattr(msg, 'tool_calls') and msg.tool_calls:
+                        print(f"Tool calls: {[tc['name'] for tc in msg.tool_calls]}")
+            if "final_answer" in output:
+                print(f"\nFINAL STRUCTURED ANSWER:")
+                print(f"Answer: {output['final_answer'].answer}")
+                print(f"Confidence: {output['final_answer'].confidence}")
+                print(f"Sources: {output['final_answer'].sources_used}")

src/agents/smolagents_agent.py ADDED Viewed

	@@ -0,0 +1,240 @@

+"""
+Multi-Agent System for GAIA Benchmark using smolagents
+Architecture: Coordinator -> Specialized Agents
+"""
+from typing import Any
+from smolagents import CodeAgent, HfApiModel
+from src.tools import all_tools
+class GAIAMultiAgentSystem:
+    """
+    Multi-agent system designed for GAIA benchmark tasks.
+    Uses a coordinator agent that delegates to specialized agents.
+    """
+    def __init__(self, model_config: dict[str, Any] | None = None):
+        """
+        Initialize the multi-agent system.
+        Args:
+            model_config: Configuration for the language model
+                         e.g., {"model_id": "Qwen/Qwen2.5-Coder-32B-Instruct", "provider": "together"}
+        """
+        model_config = model_config or {}
+        self.model = HfApiModel(**model_config)
+        # self.model = InferenceClientModel(**model_config)
+        self.agents = {}
+        self._setup_agents()
+        self._setup_coordinator()
+    def _setup_agents(self):
+        """Set up all specialized agents with their respective tools."""
+        # Search Agent - Information retrieval
+        search_tools = [
+            # Assuming these are your actual tool instances
+            # Replace with actual tool references from all_tools
+            "wikipedia_search",
+            "wikipedia_search_tool",
+            "duckduckgo_search",
+            "web_search_duckduckgo",
+            "arxiv_search",
+            "fetch_webpage_content",
+        ]
+        self.agents["search_agent"] = CodeAgent(
+            model=self.model,
+            tools=[tool for tool in all_tools if tool.name in search_tools],
+            name="search_agent",
+            description="Retrieves factual information and background data from various sources including Wikipedia, web search, and academic papers",
+            verbosity_level=1,
+            max_steps=10,
+        )
+        # Document Agent - Document processing
+        document_tools = ["load_csv_file", "load_excel_file", "read_text_file", "transcribe_audio_file"]
+        self.agents["document_agent"] = CodeAgent(
+            model=self.model,
+            tools=[tool for tool in all_tools if tool.name in document_tools],
+            name="document_agent",
+            description="Loads and processes structured and unstructured documents including CSV, Excel, text files, and audio transcriptions",
+            verbosity_level=1,
+            max_steps=8,
+        )
+        # Vision Agent - Image processing
+        vision_tools = ["ocr_tool", "image_captioning_tool", "visual_qa_tool"]
+        self.agents["vision_agent"] = CodeAgent(
+            model=self.model,
+            tools=[tool for tool in all_tools if tool.name in vision_tools],
+            name="vision_agent",
+            description="Extracts text and meaning from images using OCR, captioning, and visual question answering",
+            verbosity_level=1,
+            max_steps=6,
+        )
+        # Reasoning Agent - Logic and analysis
+        reasoning_tools = ["analyze_chess_position", "analyze_table_commutativity", "count_items_in_list"]
+        self.agents["reasoning_agent"] = CodeAgent(
+            model=self.model,
+            tools=[tool for tool in all_tools if tool.name in reasoning_tools],
+            name="reasoning_agent",
+            description="Performs symbolic reasoning, logical pattern recognition, and analytical tasks",
+            verbosity_level=1,
+            max_steps=8,
+        )
+        # Language Agent - Text processing
+        language_tools = ["reverse_string", "reverse_words_in_string"]
+        # Note: Language agent might need additional string manipulation tools
+        self.agents["language_agent"] = CodeAgent(
+            model=self.model,
+            tools=[tool for tool in all_tools if tool.name in language_tools],
+            name="language_agent",
+            description="Handles low-level text transformations and string manipulations",
+            verbosity_level=1,
+            max_steps=5,
+        )
+        # Coding Agent - Python execution and logic
+        self.agents["coding_agent"] = CodeAgent(
+            model=self.model,
+            tools=[],  # Uses implicit code execution capabilities
+            name="coding_agent",
+            description="Executes Python code and performs computational logic through code interpretation",
+            additional_authorized_imports=[
+                "pandas",
+                "numpy",
+                "matplotlib",
+                "json",
+                "re",
+                "datetime",
+                "math",
+                "statistics",
+                "itertools",
+            ],
+            verbosity_level=1,
+            max_steps=10,
+        )
+    def _setup_coordinator(self):
+        """Set up the coordinator agent that manages other agents."""
+        # Collect all managed agents
+        managed_agents = list(self.agents.values())
+        self.coordinator = CodeAgent(
+            model=self.model,
+            tools=[],  # Coordinator has no direct tools
+            managed_agents=managed_agents,
+            name="coordinator",
+            description="Coordinates and delegates tasks to specialized agents based on task requirements",
+            planning_interval=3,  # Plan every 3 steps
+            verbosity_level=2,
+            max_steps=20,
+        )
+    def analyze_task(self, task: str) -> dict[str, Any]:
+        """
+        Analyze a GAIA task to determine which agents might be needed.
+        Args:
+            task: The task description
+        Returns:
+            Dictionary with task analysis
+        """
+        analysis_prompt = f"""
+        Analyze this GAIA benchmark task and determine which types of agents would be most useful:
+        Task: {task}
+        Available agent types:
+        - search_agent: For finding factual information online
+        - document_agent: For processing files (CSV, Excel, text, audio)
+        - vision_agent: For analyzing images
+        - reasoning_agent: For logical analysis and pattern recognition
+        - language_agent: For text transformations
+        - coding_agent: For computational tasks and data processing
+        Provide a brief analysis of what agents would be needed and why.
+        """
+        # Use the coordinator's model for analysis
+        response = self.model([{"role": "user", "content": analysis_prompt}])
+        return {"analysis": response.content, "task": task}
+    def solve_task(self, task: str, context: str | None = None) -> Any:
+        """
+        Solve a GAIA benchmark task using the multi-agent system.
+        Args:
+            task: The task to solve
+            context: Optional additional context
+        Returns:
+            The result from the coordinator agent
+        """
+        # Prepare the enhanced prompt for the coordinator
+        enhanced_task = f"""
+        You are coordinating a team of specialized agents to solve this GAIA benchmark task.
+        TASK: {task}
+        {f"CONTEXT: {context}" if context else ""}
+        Available agents and their capabilities:
+        - search_agent: Retrieves information from Wikipedia, web search, ArXiv
+        - document_agent: Processes CSV, Excel, text files, and audio transcriptions
+        - vision_agent: Analyzes images with OCR, captioning, and visual QA
+        - reasoning_agent: Performs logical analysis and pattern recognition
+        - language_agent: Handles text transformations and string operations
+        - coding_agent: Executes Python code for computational tasks
+        Strategy:
+        1. Analyze what type of information or processing is needed
+        2. Delegate to appropriate specialized agents
+        3. Combine results from multiple agents if needed
+        4. Provide a final comprehensive answer
+        Be systematic and thorough. Use multiple agents when the task requires different types of expertise.
+        """
+        return self.coordinator.run(enhanced_task)
+    def get_agent_info(self) -> dict[str, dict]:
+        """Get information about all agents in the system."""
+        info = {}
+        for name, agent in self.agents.items():
+            info[name] = {
+                "description": agent.description,
+                "tools": [tool.name for tool in agent.tools] if hasattr(agent, "tools") else [],
+                "max_steps": agent.max_steps,
+            }
+        info["coordinator"] = {
+            "description": self.coordinator.description,
+            "managed_agents": [agent.name for agent in self.coordinator.managed_agents],
+            "max_steps": self.coordinator.max_steps,
+        }
+        return info
+    def visualize_system(self):
+        """Visualize the multi-agent system structure."""
+        if hasattr(self.coordinator, "visualize"):
+            return self.coordinator.visualize()
+        else:
+            print("System Structure:")
+            print("Coordinator")
+            for agent_name in self.agents.keys():
+                print(f"  └── {agent_name}")

src/{tools.py → tools/custom_tools.py} RENAMED Viewed

File without changes