File size: 7,925 Bytes
5e9156a
 
 
7e0251d
4f70efd
 
 
 
dc47641
 
449333b
4f70efd
84c7ca2
7e0251d
470fd47
fa2bac9
0f23fa9
fa2bac9
449333b
fa2bac9
5e9156a
470fd47
5e9156a
fa2bac9
470fd47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4f70efd
 
470fd47
4f70efd
 
 
fa2bac9
 
 
 
 
 
8c16572
895a491
7e0251d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a7fc271
7e0251d
 
 
 
 
449333b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0f23fa9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9c4e981
0f23fa9
 
dc47641
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4f70efd
 
 
 
84c7ca2
895a491
 
 
 
 
 
 
 
 
 
 
 
 
4f70efd
470fd47
 
 
84c7ca2
 
 
 
895a491
 
 
 
 
 
 
 
 
 
 
84c7ca2
470fd47
 
 
7e0251d
 
 
 
 
 
 
 
 
470fd47
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
from llama_index.core import Settings
from llama_index.embeddings.openai import OpenAIEmbedding

from llama_index.core.schema import ImageDocument, Document
from llama_index.core.tools import FunctionTool
from llama_index.core.tools.ondemand_loader_tool import OnDemandLoaderTool
from llama_index.core.tools.tool_spec.base import BaseToolSpec
from llama_index.core.tools.tool_spec.load_and_search import LoadAndSearchToolSpec
from llama_index.multi_modal_llms.mistralai import MistralAIMultiModal
from llama_index.multi_modal_llms.nebius import NebiusMultiModal
from llama_index.llms.openai import OpenAI
from llama_index.readers.web import SimpleWebPageReader
from llama_index.readers.youtube_transcript import YoutubeTranscriptReader
from llama_index.readers.papers import ArxivReader
from llama_index.tools.wikipedia import WikipediaToolSpec
from tavily import AsyncTavilyClient
from workflows import Context

from gaia_solving_agent import TAVILY_API_KEY, NEBIUS_API_KEY, MISTRAL_API_KEY, OPENAI_API_KEY

# changing the global default
embedding_model = "text-embedding-3-small"
Settings.embed_model = OpenAIEmbedding(model=embedding_model, api_key=OPENAI_API_KEY)


def load_and_search_tools_from_toolspec(tool_spec: BaseToolSpec, **kwargs) -> list[FunctionTool]:
    """
    Loads and searches tools based on the provided tool specification, allowing
    enhanced flexibility and customization through additional parameters.

    Args:
        tool_spec (BaseToolSpec): The tool specification derived from BaseToolSpec
            which defines the source tools to be processed.
        **kwargs: Arbitrary keyword arguments for customizing the tool processing
            and search behavior.

    Returns:
        list[FunctionTool]: A list of processed and searched tools derived from
        the input tool_spec and based on the provided custom parameters.
    """
    tools_list = []
    for tool in tool_spec.to_tool_list():
        tools_list.extend(LoadAndSearchToolSpec.from_defaults(tool, **kwargs).to_tool_list())
    return tools_list


async def tavily_search_web(query: str) -> str:
    """Useful for using the web to answer questions."""
    if TAVILY_API_KEY is None or "x" in TAVILY_API_KEY:
        raise ValueError("Tavily API key not set.")
    client = AsyncTavilyClient(api_key=TAVILY_API_KEY)
    return str(await client.search(query))


async def get_text_representation_of_additional_file(ctx: Context) -> str :
    """
    Retrieves the text representation of an additional file stored in the llamaindex context.

    If the retrieved file is not a valid `Document` instance, it produces a
    message indicating the mismatched type. Otherwise, it retrieves the text content
    from the `Document`.

    Parameters
    ----------
    ctx : Context
        The llamaindex context object which contains the additional file.

    Returns
    -------
    str
        The text representation of the additional file if it is a `Document` instance;
        otherwise, a descriptive message indicating the mismatched type.
    """
    additional_file = await ctx.store.get("additional_file")
    if not isinstance(additional_file, Document):
        return f"The additional file is not a LlamaIndex Document but a {type(additional_file)} object. I cannot get text representation, you should try other tool."

    text_representation = additional_file.get_content()
    if not text_representation:
        return f"The additional {additional_file.__class__.__name__} file does not have any text content. I cannot get text representation, you should try other tool."
    return text_representation


async def text_content_analysis(text: str, query: str) -> str:
    """
    Analysis of the text provided as input.
    For example, extracting or filtering information from it.

    Parameters:
        text (str): The text to analyze.
        query (str): What you need to analyze in the text or extract from it.

    Returns:
    str
        The result of the analysis.
    """
    reasonning_llm = OpenAI(
        model="o3-mini",
        api_key=OPENAI_API_KEY
    )
    prompt = f"""
You are a good at text analysis. You are being asked the following:
{ query }

There is the text you must analyze :
{ text }    
"""
    return reasonning_llm.complete(prompt).text


async def vllm_ask_image_tool(ctx: Context, query: str) -> str:
    """
    Asynchronously processes a visual-linguistic query paired with image data
    and returns corresponding results. This function leverages visual
    understanding and language processing to answer the provided query based
    on the content of the given image(s).

    Parameters:
    ctx: Context
        LlamaIndex Workflow Context to search images into
    query: str
        The question or request related to the provided image(s).

    Returns:
    str
        The result or response to the provided query based on the processed
        image content.
    """
    images = await ctx.store.get("additional_file")
    return await vllm_ask_image(query=query, images=images)


async def vllm_ask_image(query: str, images: ImageDocument | list[ImageDocument]) -> str:
    """
    Asynchronously processes a visual-linguistic query paired with image data
    and returns corresponding results. This function leverages visual
    understanding and language processing to answer the provided query based
    on the content of the given image(s).

    Parameters:
    query: str
        The question or request related to the provided image(s).
    images: ImageDocument | list[ImageDocument]
        Image data provided as a llamaindex ImageDocument or list of.

    Returns:
    str
        The result or response to the provided query based on the processed
        image content.
    """
    multimodal_llm = MistralAIMultiModal(
        model="mistral-small-2506",
        api_key=MISTRAL_API_KEY,
        temperature=.1,
        max_retries=5,
    )

    if not isinstance(images, list):
        images = [images]
    vllm_output = multimodal_llm.complete(
        prompt = query,
        image_documents=images
    )
    return vllm_output.text


simple_web_page_reader_tool = OnDemandLoaderTool.from_defaults(
    SimpleWebPageReader(html_to_text=True),
    name="simple_web_page_reader_tool",
    description="""
Tool for loading content from a web page and return it as text.
Only the text content of the page is returned.

You need to be provided with a URL.
DO NOT GUESS ANY URL !
If you don't have a URL in the user request, first use a browser tool to get one. 

Do not use this tool for:
- Web search
- Wikipedia pages
=> You have specialized tools for those needs.
""",
)
simple_web_page_reader_toolspec = LoadAndSearchToolSpec.from_defaults(
    simple_web_page_reader_tool,
)

youtube_transcript_reader_tool = OnDemandLoaderTool.from_defaults(
    YoutubeTranscriptReader(),
    name="youtube_transcript_reader_tool",
    description=r"""
Tool for loading the audio transcript from a youtube video and return it as text.

You must provide a youtube link in one of the following format:
Supported formats include:
- youtube.com/watch?v={video_id} (with or without 'www.')
- youtube.com/embed?v={video_id} (with or without 'www.')
- youtu.be{video_id} (never includes www subdomain)

If you are provided with a youtube link in the wrong format, make it fit one the supported format.
""",
)
youtube_transcript_reader_toolspec = LoadAndSearchToolSpec.from_defaults(
    youtube_transcript_reader_tool,
)


research_paper_reader_tool = OnDemandLoaderTool.from_defaults(
    ArxivReader(),
    name="research_paper_reader_tool",
    description=r"""
Gets a search query, return a list of Documents of the top corresponding scientific papers on Arxiv.
""",
)
research_paper_reader_toolspec = LoadAndSearchToolSpec.from_defaults(
    research_paper_reader_tool,
)

wikipedia_toolspec = load_and_search_tools_from_toolspec(
    WikipediaToolSpec(),
)