Agent_Course_Final_Assignment

Runtime error

App Files Files Community

Agent_Course_Final_Assignment / src /gaia_solving_agent /tools.py

lwant

Add `YoutubeTranscriptReader` tool integration, update dependencies, and enhance agent functionality.

84c7ca2 10 months ago

raw

history blame

3.03 kB

	from llama_index.core.schema import ImageDocument
	from llama_index.core.tools import FunctionTool
	from llama_index.core.tools.ondemand_loader_tool import OnDemandLoaderTool
	from llama_index.core.tools.tool_spec.base import BaseToolSpec
	from llama_index.core.tools.tool_spec.load_and_search import LoadAndSearchToolSpec
	from llama_index.multi_modal_llms.mistralai import MistralAIMultiModal
	from llama_index.multi_modal_llms.nebius import NebiusMultiModal
	from llama_index.readers.web import SimpleWebPageReader
	from llama_index.readers.youtube_transcript import YoutubeTranscriptReader
	from tavily import AsyncTavilyClient

	from gaia_solving_agent import TAVILY_API_KEY, NEBIUS_API_KEY, MISTRAL_API_KEY


	def load_and_search_tools_from_toolspec(tool_spec: BaseToolSpec) -> list[FunctionTool]:
	tools_list = []
	for tool in tool_spec.to_tool_list():
	tools_list.extend(LoadAndSearchToolSpec.from_defaults(tool).to_tool_list())
	return tools_list


	async def tavily_search_web(query: str) -> str:
	"""Useful for using the web to answer questions."""
	if TAVILY_API_KEY is None or "x" in TAVILY_API_KEY:
	raise ValueError("Tavily API key not set.")
	client = AsyncTavilyClient(api_key=TAVILY_API_KEY)
	return str(await client.search(query))


	async def vllm_ask_image(query: str, images: ImageDocument \| list[ImageDocument]) -> str:
	"""
	Asynchronously processes a visual-linguistic query paired with image data
	and returns corresponding results. This function leverages visual
	understanding and language processing to answer the provided query based
	on the content of the given image(s).

	Parameters:
	query: str
	The question or request related to the provided image(s).
	images: ImageDocument \| list[ImageDocument]
	Image data provided as a llamaindex ImageDocument or list of.

	Returns:
	str
	The result or response to the provided query based on the processed
	image content.
	"""
	multimodal_llm = MistralAIMultiModal(
	model="mistral-small-2506",
	api_key=MISTRAL_API_KEY,
	temperature=.1,
	max_retries=5,
	)

	if not isinstance(images, list):
	images = [images]
	vllm_output = multimodal_llm.complete(
	prompt = query,
	image_documents=images
	)
	return vllm_output.text


	simple_web_page_reader_tool = OnDemandLoaderTool.from_defaults(
	SimpleWebPageReader(html_to_text=True),
	name="simple_web_page_reader_tool",
	description="Tool for loading content from a web page and return it as text",
	)
	simple_web_page_reader_toolspec = LoadAndSearchToolSpec.from_defaults(simple_web_page_reader_tool)

	youtube_transcript_reader_tool = OnDemandLoaderTool.from_defaults(
	YoutubeTranscriptReader(),
	name="youtube_transcript_reader_tool",
	description="Tool for loading the audio transcript from a youtube video and return it as text",
	)
	youtube_transcript_reader_toolspec = LoadAndSearchToolSpec.from_defaults(youtube_transcript_reader_tool)