Spaces:
Runtime error
Runtime error
Add OpenAI integration and enhance toolset in `agent.py` and `tools.py`
Browse files- src/gaia_solving_agent/agent.py +25 -15
- src/gaia_solving_agent/tools.py +29 -1
src/gaia_solving_agent/agent.py
CHANGED
|
@@ -4,12 +4,14 @@ from typing import Literal
|
|
| 4 |
from llama_index.core.agent.workflow import FunctionAgent
|
| 5 |
from llama_index.core.prompts import RichPromptTemplate
|
| 6 |
from llama_index.llms.nebius import NebiusLLM
|
|
|
|
|
|
|
| 7 |
from llama_index.tools.requests import RequestsToolSpec
|
| 8 |
from llama_index.tools.wikipedia import WikipediaToolSpec
|
| 9 |
from workflows import Workflow, step, Context
|
| 10 |
from workflows.events import StartEvent, Event, StopEvent
|
| 11 |
|
| 12 |
-
from gaia_solving_agent import NEBIUS_API_KEY
|
| 13 |
from gaia_solving_agent.prompts import PLANING_PROMPT, FORMAT_ANSWER
|
| 14 |
from gaia_solving_agent.tools import (
|
| 15 |
tavily_search_web,
|
|
@@ -17,6 +19,9 @@ from gaia_solving_agent.tools import (
|
|
| 17 |
simple_web_page_reader_toolspec,
|
| 18 |
vllm_ask_image_tool,
|
| 19 |
youtube_transcript_reader_toolspec,
|
|
|
|
|
|
|
|
|
|
| 20 |
)
|
| 21 |
from gaia_solving_agent.utils import extract_pattern
|
| 22 |
|
|
@@ -26,6 +31,19 @@ light_model_name = "Qwen/Qwen2.5-32B-Instruct"
|
|
| 26 |
balanced_model_name = "meta-llama/Meta-Llama-3.1-70B-Instruct"
|
| 27 |
reasoning_model_name = "deepseek-ai/DeepSeek-R1-0528"
|
| 28 |
vlm_model_name = "mistralai/Mistral-Small-3.1-24B-Instruct-2503" # For VLM needs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
def get_llm(model_name=cheap_model_name):
|
| 31 |
return NebiusLLM(
|
|
@@ -192,26 +210,18 @@ You are a helpful assistant that searches Wikipedia and visit Wikipedia pages.
|
|
| 192 |
)
|
| 193 |
|
| 194 |
|
| 195 |
-
# gaia_solving_agent = AgentWorkflow(
|
| 196 |
-
# agents = [tavily_search_engine, visit_web_page_agent, wikipedia_agent],
|
| 197 |
-
# initial_state = dict(),
|
| 198 |
-
# root_agent = tavily_search_engine.name,
|
| 199 |
-
# handoff_prompt = None,
|
| 200 |
-
# handoff_output_prompt = None,
|
| 201 |
-
# state_prompt = None,
|
| 202 |
-
# num_concurrent_runs=1,
|
| 203 |
-
# )
|
| 204 |
-
|
| 205 |
-
|
| 206 |
gaia_solving_agent = FunctionAgent(
|
| 207 |
tools = [
|
|
|
|
| 208 |
vllm_ask_image_tool,
|
| 209 |
tavily_search_web,
|
| 210 |
*load_and_search_tools_from_toolspec(WikipediaToolSpec()),
|
| 211 |
*simple_web_page_reader_toolspec.to_tool_list(),
|
| 212 |
*youtube_transcript_reader_toolspec.to_tool_list(),
|
|
|
|
|
|
|
| 213 |
],
|
| 214 |
-
llm=
|
| 215 |
system_prompt="""
|
| 216 |
You are a helpful assistant that uses tools to browse additional information and resources on the web to answer questions.
|
| 217 |
|
|
@@ -221,10 +231,10 @@ gaia_solving_agent = FunctionAgent(
|
|
| 221 |
- Analyse or compute : specialized tools to provide a specific analysis or computation.
|
| 222 |
|
| 223 |
Try to get resources before querying them.
|
|
|
|
|
|
|
| 224 |
If the analysis require a new external resource get it first.(e.g. a set of rules or a process)
|
| 225 |
|
| 226 |
-
When calling a tool, the inputs should be in a valid JSON format.
|
| 227 |
-
|
| 228 |
You will be provided a question, some known facts summarizing the user provided context and some sub-tasks to complete.
|
| 229 |
You should follow the order of the sub-tasks.
|
| 230 |
""",
|
|
|
|
| 4 |
from llama_index.core.agent.workflow import FunctionAgent
|
| 5 |
from llama_index.core.prompts import RichPromptTemplate
|
| 6 |
from llama_index.llms.nebius import NebiusLLM
|
| 7 |
+
from llama_index.llms.mistralai import MistralAI
|
| 8 |
+
from llama_index.llms.openai import OpenAI
|
| 9 |
from llama_index.tools.requests import RequestsToolSpec
|
| 10 |
from llama_index.tools.wikipedia import WikipediaToolSpec
|
| 11 |
from workflows import Workflow, step, Context
|
| 12 |
from workflows.events import StartEvent, Event, StopEvent
|
| 13 |
|
| 14 |
+
from gaia_solving_agent import NEBIUS_API_KEY, MISTRAL_API_KEY, OPENAI_API_KEY
|
| 15 |
from gaia_solving_agent.prompts import PLANING_PROMPT, FORMAT_ANSWER
|
| 16 |
from gaia_solving_agent.tools import (
|
| 17 |
tavily_search_web,
|
|
|
|
| 19 |
simple_web_page_reader_toolspec,
|
| 20 |
vllm_ask_image_tool,
|
| 21 |
youtube_transcript_reader_toolspec,
|
| 22 |
+
text_content_analysis,
|
| 23 |
+
research_paper_reader_toolspec,
|
| 24 |
+
get_text_representation_of_additional_file,
|
| 25 |
)
|
| 26 |
from gaia_solving_agent.utils import extract_pattern
|
| 27 |
|
|
|
|
| 31 |
balanced_model_name = "meta-llama/Meta-Llama-3.1-70B-Instruct"
|
| 32 |
reasoning_model_name = "deepseek-ai/DeepSeek-R1-0528"
|
| 33 |
vlm_model_name = "mistralai/Mistral-Small-3.1-24B-Instruct-2503" # For VLM needs
|
| 34 |
+
openai_llm = OpenAI(
|
| 35 |
+
model="gpt-4.1",
|
| 36 |
+
api_key=OPENAI_API_KEY,
|
| 37 |
+
temperature=.1,
|
| 38 |
+
max_retries=5,
|
| 39 |
+
)
|
| 40 |
+
mistral_llm = MistralAI(
|
| 41 |
+
model="mistral-small-latest",
|
| 42 |
+
api_key=MISTRAL_API_KEY,
|
| 43 |
+
temperature=.1,
|
| 44 |
+
max_retries=5,
|
| 45 |
+
# is_function_calling_model=True,
|
| 46 |
+
)
|
| 47 |
|
| 48 |
def get_llm(model_name=cheap_model_name):
|
| 49 |
return NebiusLLM(
|
|
|
|
| 210 |
)
|
| 211 |
|
| 212 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 213 |
gaia_solving_agent = FunctionAgent(
|
| 214 |
tools = [
|
| 215 |
+
get_text_representation_of_additional_file,
|
| 216 |
vllm_ask_image_tool,
|
| 217 |
tavily_search_web,
|
| 218 |
*load_and_search_tools_from_toolspec(WikipediaToolSpec()),
|
| 219 |
*simple_web_page_reader_toolspec.to_tool_list(),
|
| 220 |
*youtube_transcript_reader_toolspec.to_tool_list(),
|
| 221 |
+
*research_paper_reader_toolspec.to_tool_list(),
|
| 222 |
+
text_content_analysis,
|
| 223 |
],
|
| 224 |
+
llm=openai_llm,
|
| 225 |
system_prompt="""
|
| 226 |
You are a helpful assistant that uses tools to browse additional information and resources on the web to answer questions.
|
| 227 |
|
|
|
|
| 231 |
- Analyse or compute : specialized tools to provide a specific analysis or computation.
|
| 232 |
|
| 233 |
Try to get resources before querying them.
|
| 234 |
+
If it is an additional file, you can access its content through the get_text_representation_of_additional_file tool.
|
| 235 |
+
If you need the original Document, you can use the llamaindex context with ctx.store.get("additional_file").
|
| 236 |
If the analysis require a new external resource get it first.(e.g. a set of rules or a process)
|
| 237 |
|
|
|
|
|
|
|
| 238 |
You will be provided a question, some known facts summarizing the user provided context and some sub-tasks to complete.
|
| 239 |
You should follow the order of the sub-tasks.
|
| 240 |
""",
|
src/gaia_solving_agent/tools.py
CHANGED
|
@@ -5,13 +5,14 @@ from llama_index.core.tools.tool_spec.base import BaseToolSpec
|
|
| 5 |
from llama_index.core.tools.tool_spec.load_and_search import LoadAndSearchToolSpec
|
| 6 |
from llama_index.multi_modal_llms.mistralai import MistralAIMultiModal
|
| 7 |
from llama_index.multi_modal_llms.nebius import NebiusMultiModal
|
|
|
|
| 8 |
from llama_index.readers.web import SimpleWebPageReader
|
| 9 |
from llama_index.readers.youtube_transcript import YoutubeTranscriptReader
|
| 10 |
from llama_index.readers.papers import ArxivReader
|
| 11 |
from tavily import AsyncTavilyClient
|
| 12 |
from workflows import Context
|
| 13 |
|
| 14 |
-
from gaia_solving_agent import TAVILY_API_KEY, NEBIUS_API_KEY, MISTRAL_API_KEY
|
| 15 |
|
| 16 |
|
| 17 |
def load_and_search_tools_from_toolspec(tool_spec: BaseToolSpec) -> list[FunctionTool]:
|
|
@@ -58,6 +59,33 @@ async def get_text_representation_of_additional_file(ctx: Context) -> str :
|
|
| 58 |
return text_representation
|
| 59 |
|
| 60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
async def vllm_ask_image_tool(ctx: Context, query: str) -> str:
|
| 62 |
"""
|
| 63 |
Asynchronously processes a visual-linguistic query paired with image data
|
|
|
|
| 5 |
from llama_index.core.tools.tool_spec.load_and_search import LoadAndSearchToolSpec
|
| 6 |
from llama_index.multi_modal_llms.mistralai import MistralAIMultiModal
|
| 7 |
from llama_index.multi_modal_llms.nebius import NebiusMultiModal
|
| 8 |
+
from llama_index.llms.openai import OpenAI
|
| 9 |
from llama_index.readers.web import SimpleWebPageReader
|
| 10 |
from llama_index.readers.youtube_transcript import YoutubeTranscriptReader
|
| 11 |
from llama_index.readers.papers import ArxivReader
|
| 12 |
from tavily import AsyncTavilyClient
|
| 13 |
from workflows import Context
|
| 14 |
|
| 15 |
+
from gaia_solving_agent import TAVILY_API_KEY, NEBIUS_API_KEY, MISTRAL_API_KEY, OPENAI_API_KEY
|
| 16 |
|
| 17 |
|
| 18 |
def load_and_search_tools_from_toolspec(tool_spec: BaseToolSpec) -> list[FunctionTool]:
|
|
|
|
| 59 |
return text_representation
|
| 60 |
|
| 61 |
|
| 62 |
+
async def text_content_analysis(text: str, query: str) -> str:
|
| 63 |
+
"""
|
| 64 |
+
Analysis of the text provided as input.
|
| 65 |
+
For example, extracting or filtering information from it.
|
| 66 |
+
|
| 67 |
+
Parameters:
|
| 68 |
+
text (str): The text to analyze.
|
| 69 |
+
query (str): What you need to analyze in the text or extract from it.
|
| 70 |
+
|
| 71 |
+
Returns:
|
| 72 |
+
str
|
| 73 |
+
The result of the analysis.
|
| 74 |
+
"""
|
| 75 |
+
reasonning_llm = OpenAI(
|
| 76 |
+
model="o3-mini",
|
| 77 |
+
api_key=OPENAI_API_KEY
|
| 78 |
+
)
|
| 79 |
+
prompt = f"""
|
| 80 |
+
You are a good at text analysis. You are being asked the following:
|
| 81 |
+
{ query }
|
| 82 |
+
|
| 83 |
+
There is the text you must analyze :
|
| 84 |
+
{ text }
|
| 85 |
+
"""
|
| 86 |
+
return reasonning_llm.complete(prompt).text
|
| 87 |
+
|
| 88 |
+
|
| 89 |
async def vllm_ask_image_tool(ctx: Context, query: str) -> str:
|
| 90 |
"""
|
| 91 |
Asynchronously processes a visual-linguistic query paired with image data
|