lwant commited on
Commit
449333b
Β·
1 Parent(s): 7e0251d

Add OpenAI integration and enhance toolset in `agent.py` and `tools.py`

Browse files
src/gaia_solving_agent/agent.py CHANGED
@@ -4,12 +4,14 @@ from typing import Literal
4
  from llama_index.core.agent.workflow import FunctionAgent
5
  from llama_index.core.prompts import RichPromptTemplate
6
  from llama_index.llms.nebius import NebiusLLM
 
 
7
  from llama_index.tools.requests import RequestsToolSpec
8
  from llama_index.tools.wikipedia import WikipediaToolSpec
9
  from workflows import Workflow, step, Context
10
  from workflows.events import StartEvent, Event, StopEvent
11
 
12
- from gaia_solving_agent import NEBIUS_API_KEY
13
  from gaia_solving_agent.prompts import PLANING_PROMPT, FORMAT_ANSWER
14
  from gaia_solving_agent.tools import (
15
  tavily_search_web,
@@ -17,6 +19,9 @@ from gaia_solving_agent.tools import (
17
  simple_web_page_reader_toolspec,
18
  vllm_ask_image_tool,
19
  youtube_transcript_reader_toolspec,
 
 
 
20
  )
21
  from gaia_solving_agent.utils import extract_pattern
22
 
@@ -26,6 +31,19 @@ light_model_name = "Qwen/Qwen2.5-32B-Instruct"
26
  balanced_model_name = "meta-llama/Meta-Llama-3.1-70B-Instruct"
27
  reasoning_model_name = "deepseek-ai/DeepSeek-R1-0528"
28
  vlm_model_name = "mistralai/Mistral-Small-3.1-24B-Instruct-2503" # For VLM needs
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  def get_llm(model_name=cheap_model_name):
31
  return NebiusLLM(
@@ -192,26 +210,18 @@ You are a helpful assistant that searches Wikipedia and visit Wikipedia pages.
192
  )
193
 
194
 
195
- # gaia_solving_agent = AgentWorkflow(
196
- # agents = [tavily_search_engine, visit_web_page_agent, wikipedia_agent],
197
- # initial_state = dict(),
198
- # root_agent = tavily_search_engine.name,
199
- # handoff_prompt = None,
200
- # handoff_output_prompt = None,
201
- # state_prompt = None,
202
- # num_concurrent_runs=1,
203
- # )
204
-
205
-
206
  gaia_solving_agent = FunctionAgent(
207
  tools = [
 
208
  vllm_ask_image_tool,
209
  tavily_search_web,
210
  *load_and_search_tools_from_toolspec(WikipediaToolSpec()),
211
  *simple_web_page_reader_toolspec.to_tool_list(),
212
  *youtube_transcript_reader_toolspec.to_tool_list(),
 
 
213
  ],
214
- llm=get_llm(balanced_model_name),
215
  system_prompt="""
216
  You are a helpful assistant that uses tools to browse additional information and resources on the web to answer questions.
217
 
@@ -221,10 +231,10 @@ gaia_solving_agent = FunctionAgent(
221
  - Analyse or compute : specialized tools to provide a specific analysis or computation.
222
 
223
  Try to get resources before querying them.
 
 
224
  If the analysis require a new external resource get it first.(e.g. a set of rules or a process)
225
 
226
- When calling a tool, the inputs should be in a valid JSON format.
227
-
228
  You will be provided a question, some known facts summarizing the user provided context and some sub-tasks to complete.
229
  You should follow the order of the sub-tasks.
230
  """,
 
4
  from llama_index.core.agent.workflow import FunctionAgent
5
  from llama_index.core.prompts import RichPromptTemplate
6
  from llama_index.llms.nebius import NebiusLLM
7
+ from llama_index.llms.mistralai import MistralAI
8
+ from llama_index.llms.openai import OpenAI
9
  from llama_index.tools.requests import RequestsToolSpec
10
  from llama_index.tools.wikipedia import WikipediaToolSpec
11
  from workflows import Workflow, step, Context
12
  from workflows.events import StartEvent, Event, StopEvent
13
 
14
+ from gaia_solving_agent import NEBIUS_API_KEY, MISTRAL_API_KEY, OPENAI_API_KEY
15
  from gaia_solving_agent.prompts import PLANING_PROMPT, FORMAT_ANSWER
16
  from gaia_solving_agent.tools import (
17
  tavily_search_web,
 
19
  simple_web_page_reader_toolspec,
20
  vllm_ask_image_tool,
21
  youtube_transcript_reader_toolspec,
22
+ text_content_analysis,
23
+ research_paper_reader_toolspec,
24
+ get_text_representation_of_additional_file,
25
  )
26
  from gaia_solving_agent.utils import extract_pattern
27
 
 
31
  balanced_model_name = "meta-llama/Meta-Llama-3.1-70B-Instruct"
32
  reasoning_model_name = "deepseek-ai/DeepSeek-R1-0528"
33
  vlm_model_name = "mistralai/Mistral-Small-3.1-24B-Instruct-2503" # For VLM needs
34
+ openai_llm = OpenAI(
35
+ model="gpt-4.1",
36
+ api_key=OPENAI_API_KEY,
37
+ temperature=.1,
38
+ max_retries=5,
39
+ )
40
+ mistral_llm = MistralAI(
41
+ model="mistral-small-latest",
42
+ api_key=MISTRAL_API_KEY,
43
+ temperature=.1,
44
+ max_retries=5,
45
+ # is_function_calling_model=True,
46
+ )
47
 
48
  def get_llm(model_name=cheap_model_name):
49
  return NebiusLLM(
 
210
  )
211
 
212
 
 
 
 
 
 
 
 
 
 
 
 
213
  gaia_solving_agent = FunctionAgent(
214
  tools = [
215
+ get_text_representation_of_additional_file,
216
  vllm_ask_image_tool,
217
  tavily_search_web,
218
  *load_and_search_tools_from_toolspec(WikipediaToolSpec()),
219
  *simple_web_page_reader_toolspec.to_tool_list(),
220
  *youtube_transcript_reader_toolspec.to_tool_list(),
221
+ *research_paper_reader_toolspec.to_tool_list(),
222
+ text_content_analysis,
223
  ],
224
+ llm=openai_llm,
225
  system_prompt="""
226
  You are a helpful assistant that uses tools to browse additional information and resources on the web to answer questions.
227
 
 
231
  - Analyse or compute : specialized tools to provide a specific analysis or computation.
232
 
233
  Try to get resources before querying them.
234
+ If it is an additional file, you can access its content through the get_text_representation_of_additional_file tool.
235
+ If you need the original Document, you can use the llamaindex context with ctx.store.get("additional_file").
236
  If the analysis require a new external resource get it first.(e.g. a set of rules or a process)
237
 
 
 
238
  You will be provided a question, some known facts summarizing the user provided context and some sub-tasks to complete.
239
  You should follow the order of the sub-tasks.
240
  """,
src/gaia_solving_agent/tools.py CHANGED
@@ -5,13 +5,14 @@ from llama_index.core.tools.tool_spec.base import BaseToolSpec
5
  from llama_index.core.tools.tool_spec.load_and_search import LoadAndSearchToolSpec
6
  from llama_index.multi_modal_llms.mistralai import MistralAIMultiModal
7
  from llama_index.multi_modal_llms.nebius import NebiusMultiModal
 
8
  from llama_index.readers.web import SimpleWebPageReader
9
  from llama_index.readers.youtube_transcript import YoutubeTranscriptReader
10
  from llama_index.readers.papers import ArxivReader
11
  from tavily import AsyncTavilyClient
12
  from workflows import Context
13
 
14
- from gaia_solving_agent import TAVILY_API_KEY, NEBIUS_API_KEY, MISTRAL_API_KEY
15
 
16
 
17
  def load_and_search_tools_from_toolspec(tool_spec: BaseToolSpec) -> list[FunctionTool]:
@@ -58,6 +59,33 @@ async def get_text_representation_of_additional_file(ctx: Context) -> str :
58
  return text_representation
59
 
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  async def vllm_ask_image_tool(ctx: Context, query: str) -> str:
62
  """
63
  Asynchronously processes a visual-linguistic query paired with image data
 
5
  from llama_index.core.tools.tool_spec.load_and_search import LoadAndSearchToolSpec
6
  from llama_index.multi_modal_llms.mistralai import MistralAIMultiModal
7
  from llama_index.multi_modal_llms.nebius import NebiusMultiModal
8
+ from llama_index.llms.openai import OpenAI
9
  from llama_index.readers.web import SimpleWebPageReader
10
  from llama_index.readers.youtube_transcript import YoutubeTranscriptReader
11
  from llama_index.readers.papers import ArxivReader
12
  from tavily import AsyncTavilyClient
13
  from workflows import Context
14
 
15
+ from gaia_solving_agent import TAVILY_API_KEY, NEBIUS_API_KEY, MISTRAL_API_KEY, OPENAI_API_KEY
16
 
17
 
18
  def load_and_search_tools_from_toolspec(tool_spec: BaseToolSpec) -> list[FunctionTool]:
 
59
  return text_representation
60
 
61
 
62
+ async def text_content_analysis(text: str, query: str) -> str:
63
+ """
64
+ Analysis of the text provided as input.
65
+ For example, extracting or filtering information from it.
66
+
67
+ Parameters:
68
+ text (str): The text to analyze.
69
+ query (str): What you need to analyze in the text or extract from it.
70
+
71
+ Returns:
72
+ str
73
+ The result of the analysis.
74
+ """
75
+ reasonning_llm = OpenAI(
76
+ model="o3-mini",
77
+ api_key=OPENAI_API_KEY
78
+ )
79
+ prompt = f"""
80
+ You are a good at text analysis. You are being asked the following:
81
+ { query }
82
+
83
+ There is the text you must analyze :
84
+ { text }
85
+ """
86
+ return reasonning_llm.complete(prompt).text
87
+
88
+
89
  async def vllm_ask_image_tool(ctx: Context, query: str) -> str:
90
  """
91
  Asynchronously processes a visual-linguistic query paired with image data