lwant commited on
Commit
470fd47
Β·
1 Parent(s): 7eff6fa

Add Wikipedia tool and enable embedding model configuration for tools in `tools.py`

Browse files
src/gaia_solving_agent/agent.py CHANGED
@@ -6,8 +6,6 @@ from llama_index.core.prompts import RichPromptTemplate
6
  from llama_index.llms.nebius import NebiusLLM
7
  from llama_index.llms.mistralai import MistralAI
8
  from llama_index.llms.openai import OpenAI
9
- from llama_index.tools.requests import RequestsToolSpec
10
- from llama_index.tools.wikipedia import WikipediaToolSpec
11
  from workflows import Workflow, step, Context
12
  from workflows.events import StartEvent, Event, StopEvent
13
 
@@ -15,13 +13,13 @@ from gaia_solving_agent import NEBIUS_API_KEY, MISTRAL_API_KEY, OPENAI_API_KEY
15
  from gaia_solving_agent.prompts import PLANING_PROMPT, FORMAT_ANSWER
16
  from gaia_solving_agent.tools import (
17
  tavily_search_web,
18
- load_and_search_tools_from_toolspec,
19
  simple_web_page_reader_toolspec,
20
  vllm_ask_image_tool,
21
  youtube_transcript_reader_toolspec,
22
  text_content_analysis,
23
  research_paper_reader_toolspec,
24
  get_text_representation_of_additional_file,
 
25
  )
26
  from gaia_solving_agent.utils import extract_pattern
27
 
@@ -171,7 +169,7 @@ gaia_solving_agent = FunctionAgent(
171
  get_text_representation_of_additional_file,
172
  vllm_ask_image_tool,
173
  tavily_search_web,
174
- *load_and_search_tools_from_toolspec(WikipediaToolSpec()),
175
  *simple_web_page_reader_toolspec.to_tool_list(),
176
  *youtube_transcript_reader_toolspec.to_tool_list(),
177
  *research_paper_reader_toolspec.to_tool_list(),
 
6
  from llama_index.llms.nebius import NebiusLLM
7
  from llama_index.llms.mistralai import MistralAI
8
  from llama_index.llms.openai import OpenAI
 
 
9
  from workflows import Workflow, step, Context
10
  from workflows.events import StartEvent, Event, StopEvent
11
 
 
13
  from gaia_solving_agent.prompts import PLANING_PROMPT, FORMAT_ANSWER
14
  from gaia_solving_agent.tools import (
15
  tavily_search_web,
 
16
  simple_web_page_reader_toolspec,
17
  vllm_ask_image_tool,
18
  youtube_transcript_reader_toolspec,
19
  text_content_analysis,
20
  research_paper_reader_toolspec,
21
  get_text_representation_of_additional_file,
22
+ wikipedia_toolspec,
23
  )
24
  from gaia_solving_agent.utils import extract_pattern
25
 
 
169
  get_text_representation_of_additional_file,
170
  vllm_ask_image_tool,
171
  tavily_search_web,
172
+ *wikipedia_toolspec,
173
  *simple_web_page_reader_toolspec.to_tool_list(),
174
  *youtube_transcript_reader_toolspec.to_tool_list(),
175
  *research_paper_reader_toolspec.to_tool_list(),
src/gaia_solving_agent/tools.py CHANGED
@@ -9,16 +9,33 @@ from llama_index.llms.openai import OpenAI
9
  from llama_index.readers.web import SimpleWebPageReader
10
  from llama_index.readers.youtube_transcript import YoutubeTranscriptReader
11
  from llama_index.readers.papers import ArxivReader
 
12
  from tavily import AsyncTavilyClient
13
  from workflows import Context
14
 
15
  from gaia_solving_agent import TAVILY_API_KEY, NEBIUS_API_KEY, MISTRAL_API_KEY, OPENAI_API_KEY
16
 
 
17
 
18
- def load_and_search_tools_from_toolspec(tool_spec: BaseToolSpec) -> list[FunctionTool]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  tools_list = []
20
  for tool in tool_spec.to_tool_list():
21
- tools_list.extend(LoadAndSearchToolSpec.from_defaults(tool).to_tool_list())
22
  return tools_list
23
 
24
 
@@ -144,6 +161,7 @@ async def vllm_ask_image(query: str, images: ImageDocument | list[ImageDocument]
144
 
145
  simple_web_page_reader_tool = OnDemandLoaderTool.from_defaults(
146
  SimpleWebPageReader(html_to_text=True),
 
147
  name="simple_web_page_reader_tool",
148
  description="""
149
  Tool for loading content from a web page and return it as text.
@@ -159,10 +177,14 @@ Do not use this tool for:
159
  => You have specialized tools for those needs.
160
  """,
161
  )
162
- simple_web_page_reader_toolspec = LoadAndSearchToolSpec.from_defaults(simple_web_page_reader_tool)
 
 
 
163
 
164
  youtube_transcript_reader_tool = OnDemandLoaderTool.from_defaults(
165
  YoutubeTranscriptReader(),
 
166
  name="youtube_transcript_reader_tool",
167
  description=r"""
168
  Tool for loading the audio transcript from a youtube video and return it as text.
@@ -176,14 +198,26 @@ Supported formats include:
176
  If you are provided with a youtube link in the wrong format, make it fit one the supported format.
177
  """,
178
  )
179
- youtube_transcript_reader_toolspec = LoadAndSearchToolSpec.from_defaults(youtube_transcript_reader_tool)
 
 
 
180
 
181
 
182
  research_paper_reader_tool = OnDemandLoaderTool.from_defaults(
183
  ArxivReader(),
 
184
  name="research_paper_reader_tool",
185
  description=r"""
186
  Gets a search query, return a list of Documents of the top corresponding scientific papers on Arxiv.
187
  """,
188
  )
189
- research_paper_reader_toolspec = LoadAndSearchToolSpec.from_defaults(research_paper_reader_tool)
 
 
 
 
 
 
 
 
 
9
  from llama_index.readers.web import SimpleWebPageReader
10
  from llama_index.readers.youtube_transcript import YoutubeTranscriptReader
11
  from llama_index.readers.papers import ArxivReader
12
+ from llama_index.tools.wikipedia import WikipediaToolSpec
13
  from tavily import AsyncTavilyClient
14
  from workflows import Context
15
 
16
  from gaia_solving_agent import TAVILY_API_KEY, NEBIUS_API_KEY, MISTRAL_API_KEY, OPENAI_API_KEY
17
 
18
+ embedding_model = "text-embedding-3-small"
19
 
20
+
21
+ def load_and_search_tools_from_toolspec(tool_spec: BaseToolSpec, **kwargs) -> list[FunctionTool]:
22
+ """
23
+ Loads and searches tools based on the provided tool specification, allowing
24
+ enhanced flexibility and customization through additional parameters.
25
+
26
+ Args:
27
+ tool_spec (BaseToolSpec): The tool specification derived from BaseToolSpec
28
+ which defines the source tools to be processed.
29
+ **kwargs: Arbitrary keyword arguments for customizing the tool processing
30
+ and search behavior.
31
+
32
+ Returns:
33
+ list[FunctionTool]: A list of processed and searched tools derived from
34
+ the input tool_spec and based on the provided custom parameters.
35
+ """
36
  tools_list = []
37
  for tool in tool_spec.to_tool_list():
38
+ tools_list.extend(LoadAndSearchToolSpec.from_defaults(tool, **kwargs).to_tool_list())
39
  return tools_list
40
 
41
 
 
161
 
162
  simple_web_page_reader_tool = OnDemandLoaderTool.from_defaults(
163
  SimpleWebPageReader(html_to_text=True),
164
+ index_kwargs={"embed_model": embedding_model},
165
  name="simple_web_page_reader_tool",
166
  description="""
167
  Tool for loading content from a web page and return it as text.
 
177
  => You have specialized tools for those needs.
178
  """,
179
  )
180
+ simple_web_page_reader_toolspec = LoadAndSearchToolSpec.from_defaults(
181
+ simple_web_page_reader_tool,
182
+ index_kwargs={"embed_model": embedding_model},
183
+ )
184
 
185
  youtube_transcript_reader_tool = OnDemandLoaderTool.from_defaults(
186
  YoutubeTranscriptReader(),
187
+ index_kwargs={"embed_model": embedding_model},
188
  name="youtube_transcript_reader_tool",
189
  description=r"""
190
  Tool for loading the audio transcript from a youtube video and return it as text.
 
198
  If you are provided with a youtube link in the wrong format, make it fit one the supported format.
199
  """,
200
  )
201
+ youtube_transcript_reader_toolspec = LoadAndSearchToolSpec.from_defaults(
202
+ youtube_transcript_reader_tool,
203
+ index_kwargs={"embed_model": embedding_model},
204
+ )
205
 
206
 
207
  research_paper_reader_tool = OnDemandLoaderTool.from_defaults(
208
  ArxivReader(),
209
+ index_kwargs={"embed_model": embedding_model},
210
  name="research_paper_reader_tool",
211
  description=r"""
212
  Gets a search query, return a list of Documents of the top corresponding scientific papers on Arxiv.
213
  """,
214
  )
215
+ research_paper_reader_toolspec = LoadAndSearchToolSpec.from_defaults(
216
+ research_paper_reader_tool,
217
+ index_kwargs={"embed_model": embedding_model},
218
+ )
219
+
220
+ wikipedia_toolspec = load_and_search_tools_from_toolspec(
221
+ WikipediaToolSpec(),
222
+ index_kwargs={"embed_model": embedding_model},
223
+ )