HF_Agents_Course_GAIA_Agent

Sleeping

App Files Files Community

agercas commited on Aug 12, 2025

Commit

ee85a4b

1 Parent(s): 1ffaf53

add tools and prompts

Browse files

Files changed (5) hide show

src/agents/models.py +31 -0
src/agents/prompts.py +172 -0
src/agents/tools.py +28 -0
src/tools/custom_tools.py +5 -0
src/tools/custom_wikipedia_tool.py +103 -0

src/agents/models.py ADDED Viewed

	@@ -0,0 +1,31 @@

+# Pydantic models
+from pydantic import BaseModel, Field
+class FeasibilityCheck(BaseModel):
+    """The result of the feasibility check"""
+    feasible: bool = Field(description="Whether the question is feasible to answer with the available tools")
+    reasoning: str = Field(description="The reasoning for the feasibility check")
+class NextStep(BaseModel):
+    """The next step in the plan"""
+    step: str = Field(description="Description of the next step to take")
+    tools: list[str] = Field(description="List of tool names to use for this step")
+    is_final: bool = Field(description="Whether this is the final step")
+class FinalConclusion(BaseModel):
+    """A final conclusion from the executor"""
+    conclusion: str = Field(description="The conclusion based on the work completed so far")
+    partial_results: str = Field(description="Summary of partial results obtained")
+class FinalAnswer(BaseModel):
+    """The final answer to the question"""
+    answer: str = Field(description="The comprehensive final answer to the question")
+    reasoning: str = Field(description="The reasoning behind the final answer")

src/agents/prompts.py ADDED Viewed

	@@ -0,0 +1,172 @@

+class GAIAPrompts:
+    """Centralized prompts for the GAIA benchmark multi-agent system"""
+    @staticmethod
+    def get_feasibility_check_prompt(available_tools: list) -> str:
+        """System prompt for feasibility checking"""
+        tools_desc = "\n".join([f"- {tool.name}: {tool.description}" for tool in available_tools])
+        return f"""You are a feasibility assessor for the GAIA benchmark, which tests real-world assistant capabilities.
+Your task is to determine if a question can be answered using the available tools and capabilities.
+Available tools and capabilities:
+{tools_desc}
+Consider these factors when assessing feasibility:
+1. Information availability: Can the required information be found through the available tools?
+2. Computational requirements: Can any necessary calculations be performed with Python?
+3. Multi-step reasoning: Can the question be broken down into manageable sub-tasks?
+4. Time constraints: Is this a question that can be reasonably answered (not requiring real-time data beyond our capabilities)?
+For GAIA questions, be optimistic about feasibility if:
+- The question requires factual research that can be done with search tools
+- Mathematical/computational work that can be done with Python
+- Multi-step reasoning combining information from different sources
+- Analysis of data that can be obtained through available tools
+Be pessimistic only if:
+- The question requires real-time data we cannot access
+- Requires tools/capabilities we don't have
+- Asks for subjective opinions without factual basis
+- Requires interaction with external systems we cannot access
+Provide a clear and direct assessment: "Feasible" or "Not Feasible", followed by a concise reason. Do NOT include any conversational or exploratory thinking. Get straight to the point.
+"""
+    @staticmethod
+    def get_coordinator_system_prompt(available_tools: list) -> str:
+        """System prompt for the coordinator agent"""
+        tools_desc = "\n".join([f"- {tool.name}: {tool.description}" for tool in available_tools])
+        return f"""You are a strategic coordinator for solving GAIA benchmark questions.
+Your role is to break down complex questions into specific, actionable subtasks that an executor agent can complete using available tools.
+Key principles for GAIA questions:
+1. **Decomposition**: Break complex questions into logical, sequential steps
+2. **Tool Selection**: Choose the most appropriate tools for each subtask
+3. **Information Flow**: Ensure each step builds on previous results
+4. **Verification**: Include steps to verify and cross-check important findings
+5. **Synthesis**: Plan how individual results will combine into a final answer
+When defining subtasks:
+- Be specific about what information is needed
+- Specify which tools are most appropriate for each task
+- Only reference tools that are available in the list below
+- Consider the logical dependencies between steps
+- Include verification steps for critical information
+- Think about how to handle potential failures or missing information
+Available tools and capabilities:
+{tools_desc}
+Your output should be a direct plan of subtasks. Do NOT include any conversational preamble, self-correction, or extended reasoning. Just the plan. Mark is_final=True only when you have enough information to provide a complete, accurate answer to the original question.
+"""
+    @staticmethod
+    def get_coordinator_context_prompt(question: str) -> str:
+        """Generate context for coordinator decisions"""
+        return f"""Original Question: {question}
+=== YOUR TASK ===
+Review the original question and all previous work. Determine if we can now provide a complete answer, or if we need additional information. If additional work is needed, define a specific, actionable next step. Be direct; do not include conversational text or detailed reasoning
+"""
+    @staticmethod
+    def get_coordinator_max_iterations_prompt(question: str) -> str:
+        """Prompt when coordinator reaches max iterations"""
+        return f"""You have reached the maximum number of planning iterations
+Based on all the work completed so far, you must now provide the best possible final answer to the original question using the information gathered.
+Review all previous subtasks and their results. Synthesize the information to provide:
+1. A comprehensive answer based on available evidence
+2. Clear reasoning showing how you arrived at this conclusion
+3. Acknowledgment of any limitations or uncertainties
+4. A confidence assessment of your answer
+Even if the investigation is incomplete, provide the most accurate answer possible based on the evidence collected.
+As a reminder, the current question is: {question}
+Provide your answer directly and concisely, without any extra conversational text or extensive self-reflection
+"""
+    @staticmethod
+    def get_executor_system_prompt(available_tools: list) -> str:
+        """System prompt for the executor agent"""
+        tools_list = ", ".join(available_tools)
+        return f"""You are an executor agent specialized in completing specific research and analysis tasks for GAIA benchmark questions.
+Available Tools: {tools_list}
+Your approach should be:
+1. **Understand the Task**: Carefully analyze what specific information or result is needed.
+2. **Plan Your Approach**: Determine which tools to use and in what order.
+3. **Execute Systematically**: Use tools methodically to gather information.
+4. **Verify Results**: Cross-check important findings when possible.
+5. **Summarize Clearly**: Provide clear, concise results for the coordinator.
+Best practices:
+- Start with the most reliable sources for factual information.
+- Use multiple sources to verify critical facts.
+- For calculations, show your work and double-check results.
+- If information is conflicting, note the discrepancies.
+- If you encounter errors or limitations, document them clearly.
+Be thorough but efficient. Focus on getting accurate, complete information for your specific task rather than exploring broadly
+Your output should directly be the tool call or the factual result/summary for the task. Do NOT include conversational text, elaborate reasoning, or step-by-step thinking processes. Get straight to the action or the answer
+"""
+    @staticmethod
+    def get_executor_task_prompt(current_step: str) -> str:
+        """Generate context for executor decisions"""
+        context = f"Current Task: {current_step}\n"
+        return context
+    @staticmethod
+    def get_executor_max_iterations_prompt(current_step: str) -> str:
+        """Prompt when executor reaches max iterations"""
+        return f"""You have reached the maximum number of execution steps for this task.
+Provide a concise conclusion based on the work you've completed:
+1. Summarize what you accomplished
+2. Present any findings or results obtained
+3. Note any limitations or incomplete aspects
+4. Assess the reliability of your findings
+Even if the task is not fully complete, provide the best possible summary of your work and findings directly, without any conversational preamble or unnecessary explanation
+As a reminder, the current task is: {current_step}"""
+    @staticmethod
+    def get_finalizer_prompt() -> str:
+        """System prompt for generating the final answer"""
+        return """You are responsible for generating the final answer to a GAIA benchmark question.
+GAIA questions are complex, multi-step problems that require:
+- Factual accuracy based on reliable sources
+- Clear logical reasoning
+- Integration of information from multiple sources
+- Appropriate confidence assessment
+Your task is to:
+1. **Synthesize Information**: Combine all findings from the research process
+2. **Reason Clearly**: Show how the evidence leads to your conclusion
+3. **Address the Question**: Directly answer what was asked
+4. **Assess Confidence**: Provide an honest assessment of answer reliability
+5. **Note Limitations**: Acknowledge any gaps or uncertainties
+Quality standards:
+- Base conclusions on evidence, not assumptions
+- Distinguish between facts and inferences
+- If information is incomplete, state what is known vs. unknown
+- Provide specific, actionable answers when possible
+- Use appropriate precision for numerical answers
+The final answer should be comprehensive enough to fully address the original question while being concise and well-organized. Provide the answer directly and clearly, avoiding any self-reflection or conversational lead-ins
+"""

src/agents/tools.py ADDED Viewed

	@@ -0,0 +1,28 @@

+# Set up tools
+from langchain_community.tools import DuckDuckGoSearchRun
+from langchain_community.tools.arxiv import ArxivQueryRun
+from langchain_community.tools.pubmed.tool import PubmedQueryRun
+from langchain_community.tools.semanticscholar.tool import SemanticScholarQueryRun
+from langchain_core.tools import Tool
+from langchain_experimental.utilities import PythonREPL
+from tools.custome_wikipedia_tool import wikipedia_tool
+python_repl = PythonREPL()
+repl_tool = Tool(
+    name="python_repl",
+    description="A Python shell. Use this to execute python commands. Input should be a valid python command. If you want to see the output of a value, you should print it out with `print(...)`.",
+    func=python_repl.run,
+)
+# Initialize all tools
+tools = [
+    DuckDuckGoSearchRun(),
+    PubmedQueryRun(),
+    wikipedia_tool,
+    SemanticScholarQueryRun(),
+    ArxivQueryRun(),
+    repl_tool,
+]

src/tools/custom_tools.py CHANGED Viewed

@@ -15,11 +15,16 @@ import whisper
 import wikipedia
 from bs4 import BeautifulSoup
 from langchain_community.tools import DuckDuckGoSearchRun, WikipediaQueryRun
 from langchain_community.utilities import ArxivAPIWrapper, WikipediaAPIWrapper
 from openpyxl import load_workbook
 from smolagents import tool
 # === SEARCH AND WEB TOOLS ===
 @tool

 import wikipedia
 from bs4 import BeautifulSoup
 from langchain_community.tools import DuckDuckGoSearchRun, WikipediaQueryRun
+from langchain_community.tools.wikidata.tool import WikidataAPIWrapper, WikidataQueryRun
 from langchain_community.utilities import ArxivAPIWrapper, WikipediaAPIWrapper
 from openpyxl import load_workbook
 from smolagents import tool
 # === SEARCH AND WEB TOOLS ===
+additional_tools = [
+    WikidataQueryRun(api_wrapper=WikidataAPIWrapper()),
+    WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper()),
+]
 @tool

src/tools/custom_wikipedia_tool.py ADDED Viewed

	@@ -0,0 +1,103 @@

+import re
+import wikipedia
+from langchain_core.tools import tool
+@tool
+def wikipedia_tool(
+    title: str, action: str = "summary", section_name: str | None = None, sentences: int = 3
+) -> str | list[str]:
+    """
+    Retrieve information from Wikipedia pages with flexible content extraction.
+    This tool provides four main operations for Wikipedia content:
+    - Extract summaries of varying lengths
+    - Retrieve complete page content including all sections
+    - List all section titles to understand page structure
+    - Extract specific sections by name with fuzzy matching
+    The tool handles section parsing by recognizing Wikipedia's markup format
+    (== Section ==, === Subsection ===, etc.) and automatically suggests similar
+    sections if exact matches aren't found.
+    Args:
+        title: Wikipedia page title (supports auto-suggestion for typos)
+        action: Operation type - "summary", "full", "sections", or "section"
+        section_name: Name of specific section (required when action="section")
+        sentences: Number of sentences for summary (default: 3)
+    Returns:
+        - "summary": Summary text (str)
+        - "full": Full page content including all sections (str)
+        - "sections": List of all section titles (List[str])
+        - "section": Content of matching section or empty string if not found (str)
+    Examples:
+        wikipedia_tool("Python programming language", "summary")
+        wikipedia_tool("Albert Einstein", "full")
+        wikipedia_tool("Climate change", "sections")
+        wikipedia_tool("Machine learning", "section", section_name="History")
+    """
+    def parse_sections(content: str) -> list[tuple]:
+        """Parse Wikipedia content into sections based on == markers."""
+        sections = []
+        # Find all section headers with regex
+        section_pattern = r"^(={2,})\s*([^=]+?)\s*\1\s*$"
+        matches = list(re.finditer(section_pattern, content, re.MULTILINE))
+        if not matches:
+            return [("Full Content", content.strip())]
+        for i, match in enumerate(matches):
+            title = match.group(2).strip()
+            start_pos = match.end()
+            # Find the end position (start of next section or end of content)
+            if i + 1 < len(matches):
+                end_pos = matches[i + 1].start()
+            else:
+                end_pos = len(content)
+            section_content = content[start_pos:end_pos].strip()
+            sections.append((title, section_content))
+        return sections
+    # Configure wikipedia settings
+    wikipedia.set_lang("en")
+    wikipedia.set_rate_limiting(True)
+    if action == "summary":
+        return wikipedia.summary(title, sentences=sentences, auto_suggest=True)
+    elif action == "full":
+        page = wikipedia.page(title, auto_suggest=True)
+        return page.content
+    elif action == "sections":
+        page = wikipedia.page(title, auto_suggest=True)
+        sections = parse_sections(page.content)
+        return [section_title for section_title, _ in sections]
+    elif action == "section":
+        if not section_name:
+            raise ValueError("section_name is required when action='section'")
+        page = wikipedia.page(title, auto_suggest=True)
+        sections = parse_sections(page.content)
+        # Find matching section (fuzzy match)
+        for section_title, section_content in sections:
+            if section_name.lower() in section_title.lower():
+                return section_content
+        # If no match found, return empty string
+        return "No matching section found. Available sections: " + ", ".join(
+            [section_title for section_title, _ in sections]
+        )
+    else:
+        raise ValueError(f"Invalid action: {action}. Must be one of: summary, full, sections, section")