agercas commited on
Commit
ee85a4b
·
1 Parent(s): 1ffaf53

add tools and prompts

Browse files
src/agents/models.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Pydantic models
2
+ from pydantic import BaseModel, Field
3
+
4
+
5
+ class FeasibilityCheck(BaseModel):
6
+ """The result of the feasibility check"""
7
+
8
+ feasible: bool = Field(description="Whether the question is feasible to answer with the available tools")
9
+ reasoning: str = Field(description="The reasoning for the feasibility check")
10
+
11
+
12
+ class NextStep(BaseModel):
13
+ """The next step in the plan"""
14
+
15
+ step: str = Field(description="Description of the next step to take")
16
+ tools: list[str] = Field(description="List of tool names to use for this step")
17
+ is_final: bool = Field(description="Whether this is the final step")
18
+
19
+
20
+ class FinalConclusion(BaseModel):
21
+ """A final conclusion from the executor"""
22
+
23
+ conclusion: str = Field(description="The conclusion based on the work completed so far")
24
+ partial_results: str = Field(description="Summary of partial results obtained")
25
+
26
+
27
+ class FinalAnswer(BaseModel):
28
+ """The final answer to the question"""
29
+
30
+ answer: str = Field(description="The comprehensive final answer to the question")
31
+ reasoning: str = Field(description="The reasoning behind the final answer")
src/agents/prompts.py ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class GAIAPrompts:
2
+ """Centralized prompts for the GAIA benchmark multi-agent system"""
3
+
4
+ @staticmethod
5
+ def get_feasibility_check_prompt(available_tools: list) -> str:
6
+ """System prompt for feasibility checking"""
7
+ tools_desc = "\n".join([f"- {tool.name}: {tool.description}" for tool in available_tools])
8
+
9
+ return f"""You are a feasibility assessor for the GAIA benchmark, which tests real-world assistant capabilities.
10
+
11
+ Your task is to determine if a question can be answered using the available tools and capabilities.
12
+
13
+ Available tools and capabilities:
14
+ {tools_desc}
15
+
16
+ Consider these factors when assessing feasibility:
17
+ 1. Information availability: Can the required information be found through the available tools?
18
+ 2. Computational requirements: Can any necessary calculations be performed with Python?
19
+ 3. Multi-step reasoning: Can the question be broken down into manageable sub-tasks?
20
+ 4. Time constraints: Is this a question that can be reasonably answered (not requiring real-time data beyond our capabilities)?
21
+
22
+ For GAIA questions, be optimistic about feasibility if:
23
+ - The question requires factual research that can be done with search tools
24
+ - Mathematical/computational work that can be done with Python
25
+ - Multi-step reasoning combining information from different sources
26
+ - Analysis of data that can be obtained through available tools
27
+
28
+ Be pessimistic only if:
29
+ - The question requires real-time data we cannot access
30
+ - Requires tools/capabilities we don't have
31
+ - Asks for subjective opinions without factual basis
32
+ - Requires interaction with external systems we cannot access
33
+
34
+ Provide a clear and direct assessment: "Feasible" or "Not Feasible", followed by a concise reason. Do NOT include any conversational or exploratory thinking. Get straight to the point.
35
+ """
36
+
37
+ @staticmethod
38
+ def get_coordinator_system_prompt(available_tools: list) -> str:
39
+ """System prompt for the coordinator agent"""
40
+ tools_desc = "\n".join([f"- {tool.name}: {tool.description}" for tool in available_tools])
41
+
42
+ return f"""You are a strategic coordinator for solving GAIA benchmark questions.
43
+
44
+ Your role is to break down complex questions into specific, actionable subtasks that an executor agent can complete using available tools.
45
+
46
+ Key principles for GAIA questions:
47
+ 1. **Decomposition**: Break complex questions into logical, sequential steps
48
+ 2. **Tool Selection**: Choose the most appropriate tools for each subtask
49
+ 3. **Information Flow**: Ensure each step builds on previous results
50
+ 4. **Verification**: Include steps to verify and cross-check important findings
51
+ 5. **Synthesis**: Plan how individual results will combine into a final answer
52
+
53
+ When defining subtasks:
54
+ - Be specific about what information is needed
55
+ - Specify which tools are most appropriate for each task
56
+ - Only reference tools that are available in the list below
57
+ - Consider the logical dependencies between steps
58
+ - Include verification steps for critical information
59
+ - Think about how to handle potential failures or missing information
60
+
61
+ Available tools and capabilities:
62
+ {tools_desc}
63
+
64
+ Your output should be a direct plan of subtasks. Do NOT include any conversational preamble, self-correction, or extended reasoning. Just the plan. Mark is_final=True only when you have enough information to provide a complete, accurate answer to the original question.
65
+ """
66
+
67
+ @staticmethod
68
+ def get_coordinator_context_prompt(question: str) -> str:
69
+ """Generate context for coordinator decisions"""
70
+ return f"""Original Question: {question}
71
+
72
+ === YOUR TASK ===
73
+ Review the original question and all previous work. Determine if we can now provide a complete answer, or if we need additional information. If additional work is needed, define a specific, actionable next step. Be direct; do not include conversational text or detailed reasoning
74
+ """
75
+
76
+ @staticmethod
77
+ def get_coordinator_max_iterations_prompt(question: str) -> str:
78
+ """Prompt when coordinator reaches max iterations"""
79
+ return f"""You have reached the maximum number of planning iterations
80
+
81
+ Based on all the work completed so far, you must now provide the best possible final answer to the original question using the information gathered.
82
+
83
+ Review all previous subtasks and their results. Synthesize the information to provide:
84
+ 1. A comprehensive answer based on available evidence
85
+ 2. Clear reasoning showing how you arrived at this conclusion
86
+ 3. Acknowledgment of any limitations or uncertainties
87
+ 4. A confidence assessment of your answer
88
+
89
+ Even if the investigation is incomplete, provide the most accurate answer possible based on the evidence collected.
90
+ As a reminder, the current question is: {question}
91
+
92
+ Provide your answer directly and concisely, without any extra conversational text or extensive self-reflection
93
+ """
94
+
95
+ @staticmethod
96
+ def get_executor_system_prompt(available_tools: list) -> str:
97
+ """System prompt for the executor agent"""
98
+ tools_list = ", ".join(available_tools)
99
+
100
+ return f"""You are an executor agent specialized in completing specific research and analysis tasks for GAIA benchmark questions.
101
+
102
+ Available Tools: {tools_list}
103
+
104
+ Your approach should be:
105
+ 1. **Understand the Task**: Carefully analyze what specific information or result is needed.
106
+ 2. **Plan Your Approach**: Determine which tools to use and in what order.
107
+ 3. **Execute Systematically**: Use tools methodically to gather information.
108
+ 4. **Verify Results**: Cross-check important findings when possible.
109
+ 5. **Summarize Clearly**: Provide clear, concise results for the coordinator.
110
+
111
+ Best practices:
112
+ - Start with the most reliable sources for factual information.
113
+ - Use multiple sources to verify critical facts.
114
+ - For calculations, show your work and double-check results.
115
+ - If information is conflicting, note the discrepancies.
116
+ - If you encounter errors or limitations, document them clearly.
117
+
118
+ Be thorough but efficient. Focus on getting accurate, complete information for your specific task rather than exploring broadly
119
+
120
+ Your output should directly be the tool call or the factual result/summary for the task. Do NOT include conversational text, elaborate reasoning, or step-by-step thinking processes. Get straight to the action or the answer
121
+ """
122
+
123
+ @staticmethod
124
+ def get_executor_task_prompt(current_step: str) -> str:
125
+ """Generate context for executor decisions"""
126
+ context = f"Current Task: {current_step}\n"
127
+
128
+ return context
129
+
130
+ @staticmethod
131
+ def get_executor_max_iterations_prompt(current_step: str) -> str:
132
+ """Prompt when executor reaches max iterations"""
133
+
134
+ return f"""You have reached the maximum number of execution steps for this task.
135
+
136
+ Provide a concise conclusion based on the work you've completed:
137
+ 1. Summarize what you accomplished
138
+ 2. Present any findings or results obtained
139
+ 3. Note any limitations or incomplete aspects
140
+ 4. Assess the reliability of your findings
141
+
142
+ Even if the task is not fully complete, provide the best possible summary of your work and findings directly, without any conversational preamble or unnecessary explanation
143
+
144
+ As a reminder, the current task is: {current_step}"""
145
+
146
+ @staticmethod
147
+ def get_finalizer_prompt() -> str:
148
+ """System prompt for generating the final answer"""
149
+ return """You are responsible for generating the final answer to a GAIA benchmark question.
150
+
151
+ GAIA questions are complex, multi-step problems that require:
152
+ - Factual accuracy based on reliable sources
153
+ - Clear logical reasoning
154
+ - Integration of information from multiple sources
155
+ - Appropriate confidence assessment
156
+
157
+ Your task is to:
158
+ 1. **Synthesize Information**: Combine all findings from the research process
159
+ 2. **Reason Clearly**: Show how the evidence leads to your conclusion
160
+ 3. **Address the Question**: Directly answer what was asked
161
+ 4. **Assess Confidence**: Provide an honest assessment of answer reliability
162
+ 5. **Note Limitations**: Acknowledge any gaps or uncertainties
163
+
164
+ Quality standards:
165
+ - Base conclusions on evidence, not assumptions
166
+ - Distinguish between facts and inferences
167
+ - If information is incomplete, state what is known vs. unknown
168
+ - Provide specific, actionable answers when possible
169
+ - Use appropriate precision for numerical answers
170
+
171
+ The final answer should be comprehensive enough to fully address the original question while being concise and well-organized. Provide the answer directly and clearly, avoiding any self-reflection or conversational lead-ins
172
+ """
src/agents/tools.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Set up tools
2
+
3
+ from langchain_community.tools import DuckDuckGoSearchRun
4
+ from langchain_community.tools.arxiv import ArxivQueryRun
5
+ from langchain_community.tools.pubmed.tool import PubmedQueryRun
6
+ from langchain_community.tools.semanticscholar.tool import SemanticScholarQueryRun
7
+ from langchain_core.tools import Tool
8
+ from langchain_experimental.utilities import PythonREPL
9
+
10
+ from tools.custome_wikipedia_tool import wikipedia_tool
11
+
12
+ python_repl = PythonREPL()
13
+ repl_tool = Tool(
14
+ name="python_repl",
15
+ description="A Python shell. Use this to execute python commands. Input should be a valid python command. If you want to see the output of a value, you should print it out with `print(...)`.",
16
+ func=python_repl.run,
17
+ )
18
+
19
+
20
+ # Initialize all tools
21
+ tools = [
22
+ DuckDuckGoSearchRun(),
23
+ PubmedQueryRun(),
24
+ wikipedia_tool,
25
+ SemanticScholarQueryRun(),
26
+ ArxivQueryRun(),
27
+ repl_tool,
28
+ ]
src/tools/custom_tools.py CHANGED
@@ -15,11 +15,16 @@ import whisper
15
  import wikipedia
16
  from bs4 import BeautifulSoup
17
  from langchain_community.tools import DuckDuckGoSearchRun, WikipediaQueryRun
 
18
  from langchain_community.utilities import ArxivAPIWrapper, WikipediaAPIWrapper
19
  from openpyxl import load_workbook
20
  from smolagents import tool
21
 
22
  # === SEARCH AND WEB TOOLS ===
 
 
 
 
23
 
24
 
25
  @tool
 
15
  import wikipedia
16
  from bs4 import BeautifulSoup
17
  from langchain_community.tools import DuckDuckGoSearchRun, WikipediaQueryRun
18
+ from langchain_community.tools.wikidata.tool import WikidataAPIWrapper, WikidataQueryRun
19
  from langchain_community.utilities import ArxivAPIWrapper, WikipediaAPIWrapper
20
  from openpyxl import load_workbook
21
  from smolagents import tool
22
 
23
  # === SEARCH AND WEB TOOLS ===
24
+ additional_tools = [
25
+ WikidataQueryRun(api_wrapper=WikidataAPIWrapper()),
26
+ WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper()),
27
+ ]
28
 
29
 
30
  @tool
src/tools/custom_wikipedia_tool.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ import wikipedia
4
+ from langchain_core.tools import tool
5
+
6
+
7
+ @tool
8
+ def wikipedia_tool(
9
+ title: str, action: str = "summary", section_name: str | None = None, sentences: int = 3
10
+ ) -> str | list[str]:
11
+ """
12
+ Retrieve information from Wikipedia pages with flexible content extraction.
13
+
14
+ This tool provides four main operations for Wikipedia content:
15
+ - Extract summaries of varying lengths
16
+ - Retrieve complete page content including all sections
17
+ - List all section titles to understand page structure
18
+ - Extract specific sections by name with fuzzy matching
19
+
20
+ The tool handles section parsing by recognizing Wikipedia's markup format
21
+ (== Section ==, === Subsection ===, etc.) and automatically suggests similar
22
+ sections if exact matches aren't found.
23
+
24
+ Args:
25
+ title: Wikipedia page title (supports auto-suggestion for typos)
26
+ action: Operation type - "summary", "full", "sections", or "section"
27
+ section_name: Name of specific section (required when action="section")
28
+ sentences: Number of sentences for summary (default: 3)
29
+
30
+ Returns:
31
+ - "summary": Summary text (str)
32
+ - "full": Full page content including all sections (str)
33
+ - "sections": List of all section titles (List[str])
34
+ - "section": Content of matching section or empty string if not found (str)
35
+
36
+ Examples:
37
+ wikipedia_tool("Python programming language", "summary")
38
+ wikipedia_tool("Albert Einstein", "full")
39
+ wikipedia_tool("Climate change", "sections")
40
+ wikipedia_tool("Machine learning", "section", section_name="History")
41
+ """
42
+
43
+ def parse_sections(content: str) -> list[tuple]:
44
+ """Parse Wikipedia content into sections based on == markers."""
45
+ sections = []
46
+
47
+ # Find all section headers with regex
48
+ section_pattern = r"^(={2,})\s*([^=]+?)\s*\1\s*$"
49
+ matches = list(re.finditer(section_pattern, content, re.MULTILINE))
50
+
51
+ if not matches:
52
+ return [("Full Content", content.strip())]
53
+
54
+ for i, match in enumerate(matches):
55
+ title = match.group(2).strip()
56
+ start_pos = match.end()
57
+
58
+ # Find the end position (start of next section or end of content)
59
+ if i + 1 < len(matches):
60
+ end_pos = matches[i + 1].start()
61
+ else:
62
+ end_pos = len(content)
63
+
64
+ section_content = content[start_pos:end_pos].strip()
65
+ sections.append((title, section_content))
66
+
67
+ return sections
68
+
69
+ # Configure wikipedia settings
70
+ wikipedia.set_lang("en")
71
+ wikipedia.set_rate_limiting(True)
72
+
73
+ if action == "summary":
74
+ return wikipedia.summary(title, sentences=sentences, auto_suggest=True)
75
+
76
+ elif action == "full":
77
+ page = wikipedia.page(title, auto_suggest=True)
78
+ return page.content
79
+
80
+ elif action == "sections":
81
+ page = wikipedia.page(title, auto_suggest=True)
82
+ sections = parse_sections(page.content)
83
+ return [section_title for section_title, _ in sections]
84
+
85
+ elif action == "section":
86
+ if not section_name:
87
+ raise ValueError("section_name is required when action='section'")
88
+
89
+ page = wikipedia.page(title, auto_suggest=True)
90
+ sections = parse_sections(page.content)
91
+
92
+ # Find matching section (fuzzy match)
93
+ for section_title, section_content in sections:
94
+ if section_name.lower() in section_title.lower():
95
+ return section_content
96
+
97
+ # If no match found, return empty string
98
+ return "No matching section found. Available sections: " + ", ".join(
99
+ [section_title for section_title, _ in sections]
100
+ )
101
+
102
+ else:
103
+ raise ValueError(f"Invalid action: {action}. Must be one of: summary, full, sections, section")