lwant commited on
Commit
0f23fa9
Β·
1 Parent(s): 3dbdc7b

Update `vllm_ask_image` to `vllm_ask_image_tool`, add `Context` support, and implement asynchronous image query handling in tools.

Browse files
Files changed (1) hide show
  1. src/gaia_solving_agent/tools.py +22 -0
src/gaia_solving_agent/tools.py CHANGED
@@ -8,6 +8,7 @@ from llama_index.multi_modal_llms.nebius import NebiusMultiModal
8
  from llama_index.readers.web import SimpleWebPageReader
9
  from llama_index.readers.youtube_transcript import YoutubeTranscriptReader
10
  from tavily import AsyncTavilyClient
 
11
 
12
  from gaia_solving_agent import TAVILY_API_KEY, NEBIUS_API_KEY, MISTRAL_API_KEY
13
 
@@ -26,6 +27,27 @@ async def tavily_search_web(query: str) -> str:
26
  client = AsyncTavilyClient(api_key=TAVILY_API_KEY)
27
  return str(await client.search(query))
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  async def vllm_ask_image(query: str, images: ImageDocument | list[ImageDocument]) -> str:
31
  """
 
8
  from llama_index.readers.web import SimpleWebPageReader
9
  from llama_index.readers.youtube_transcript import YoutubeTranscriptReader
10
  from tavily import AsyncTavilyClient
11
+ from workflows import Context
12
 
13
  from gaia_solving_agent import TAVILY_API_KEY, NEBIUS_API_KEY, MISTRAL_API_KEY
14
 
 
27
  client = AsyncTavilyClient(api_key=TAVILY_API_KEY)
28
  return str(await client.search(query))
29
 
30
+ async def vllm_ask_image_tool(ctx: Context, query: str) -> str:
31
+ """
32
+ Asynchronously processes a visual-linguistic query paired with image data
33
+ and returns corresponding results. This function leverages visual
34
+ understanding and language processing to answer the provided query based
35
+ on the content of the given image(s).
36
+
37
+ Parameters:
38
+ ctx: Context
39
+ LlamaIndex Workflow Context to search images into
40
+ query: str
41
+ The question or request related to the provided image(s).
42
+
43
+ Returns:
44
+ str
45
+ The result or response to the provided query based on the processed
46
+ image content.
47
+ """
48
+ images = await ctx.get("additional_file")
49
+ return await vllm_ask_image(query=query, images=images)
50
+
51
 
52
  async def vllm_ask_image(query: str, images: ImageDocument | list[ImageDocument]) -> str:
53
  """