Spaces:
Runtime error
Runtime error
| from llama_index.core.schema import ImageDocument | |
| from llama_index.core.tools import FunctionTool | |
| from llama_index.core.tools.ondemand_loader_tool import OnDemandLoaderTool | |
| from llama_index.core.tools.tool_spec.base import BaseToolSpec | |
| from llama_index.core.tools.tool_spec.load_and_search import LoadAndSearchToolSpec | |
| from llama_index.multi_modal_llms.mistralai import MistralAIMultiModal | |
| from llama_index.multi_modal_llms.nebius import NebiusMultiModal | |
| from llama_index.readers.web import SimpleWebPageReader | |
| from llama_index.readers.youtube_transcript import YoutubeTranscriptReader | |
| from tavily import AsyncTavilyClient | |
| from gaia_solving_agent import TAVILY_API_KEY, NEBIUS_API_KEY, MISTRAL_API_KEY | |
| def load_and_search_tools_from_toolspec(tool_spec: BaseToolSpec) -> list[FunctionTool]: | |
| tools_list = [] | |
| for tool in tool_spec.to_tool_list(): | |
| tools_list.extend(LoadAndSearchToolSpec.from_defaults(tool).to_tool_list()) | |
| return tools_list | |
| async def tavily_search_web(query: str) -> str: | |
| """Useful for using the web to answer questions.""" | |
| if TAVILY_API_KEY is None or "x" in TAVILY_API_KEY: | |
| raise ValueError("Tavily API key not set.") | |
| client = AsyncTavilyClient(api_key=TAVILY_API_KEY) | |
| return str(await client.search(query)) | |
| async def vllm_ask_image(query: str, images: ImageDocument | list[ImageDocument]) -> str: | |
| """ | |
| Asynchronously processes a visual-linguistic query paired with image data | |
| and returns corresponding results. This function leverages visual | |
| understanding and language processing to answer the provided query based | |
| on the content of the given image(s). | |
| Parameters: | |
| query: str | |
| The question or request related to the provided image(s). | |
| images: ImageDocument | list[ImageDocument] | |
| Image data provided as a llamaindex ImageDocument or list of. | |
| Returns: | |
| str | |
| The result or response to the provided query based on the processed | |
| image content. | |
| """ | |
| multimodal_llm = MistralAIMultiModal( | |
| model="mistral-small-2506", | |
| api_key=MISTRAL_API_KEY, | |
| temperature=.1, | |
| max_retries=5, | |
| ) | |
| if not isinstance(images, list): | |
| images = [images] | |
| vllm_output = multimodal_llm.complete( | |
| prompt = query, | |
| image_documents=images | |
| ) | |
| return vllm_output.text | |
| simple_web_page_reader_tool = OnDemandLoaderTool.from_defaults( | |
| SimpleWebPageReader(html_to_text=True), | |
| name="simple_web_page_reader_tool", | |
| description="Tool for loading content from a web page and return it as text", | |
| ) | |
| simple_web_page_reader_toolspec = LoadAndSearchToolSpec.from_defaults(simple_web_page_reader_tool) | |
| youtube_transcript_reader_tool = OnDemandLoaderTool.from_defaults( | |
| YoutubeTranscriptReader(), | |
| name="youtube_transcript_reader_tool", | |
| description="Tool for loading the audio transcript from a youtube video and return it as text", | |
| ) | |
| youtube_transcript_reader_toolspec = LoadAndSearchToolSpec.from_defaults(youtube_transcript_reader_tool) | |