lwant's picture
Add MISTRAL integration, update dependencies, and adjust `.example.env` for new API key support.
dc47641
raw
history blame
1.69 kB
from llama_index.core.schema import ImageDocument
from llama_index.multi_modal_llms.mistralai import MistralAIMultiModal
from llama_index.multi_modal_llms.nebius import NebiusMultiModal
from tavily import AsyncTavilyClient
from gaia_solving_agent import TAVILY_API_KEY, NEBIUS_API_KEY, MISTRAL_API_KEY
async def tavily_search_web(query: str) -> str:
"""Useful for using the web to answer questions."""
if TAVILY_API_KEY is None or "x" in TAVILY_API_KEY:
raise ValueError("Tavily API key not set.")
client = AsyncTavilyClient(api_key=TAVILY_API_KEY)
return str(await client.search(query))
async def vllm_ask_image(query: str, images: ImageDocument | list[ImageDocument]) -> str:
"""
Asynchronously processes a visual-linguistic query paired with image data
and returns corresponding results. This function leverages visual
understanding and language processing to answer the provided query based
on the content of the given image(s).
Parameters:
query: str
The question or request related to the provided image(s).
images: ImageDocument | list[ImageDocument]
Image data provided as a llamaindex ImageDocument or list of.
Returns:
str
The result or response to the provided query based on the processed
image content.
"""
multimodal_llm = MistralAIMultiModal(
model="mistral-small-2506",
api_key=MISTRAL_API_KEY,
temperature=.1,
max_retries=5,
)
if not isinstance(images, list):
images = [images]
vllm_output = multimodal_llm.complete(
prompt = query,
image_documents=images
)
return vllm_output.text