Spaces:

msanton
/

agents-course-final-assignment

Runtime error

App Files Files Community

agents-course-final-assignment / tools.py

msanton

Add Agent and Tools

49ab10c verified 4 months ago

raw

history blame contribute delete

6.2 kB

	import os
	import re
	import requests
	import openai
	from typing import List
	from dotenv import load_dotenv
	from langchain_core.tools import tool
	from langchain_community.document_loaders import WebBaseLoader, WikipediaLoader, ImageCaptionLoader, ArxivLoader
	from langchain_community.tools import DuckDuckGoSearchResults
	from langchain_text_splitters import CharacterTextSplitter

	load_dotenv()

	@tool
	def multiply(a: int, b: int) -> int:
	"""
	Multiply two integers and return the result

	Args:
	a: The first integer to multiply
	b: The second integer to multiply

	Returns:
	int: The result of the multiplication
	"""
	return a * b

	@tool
	def add(a: int, b: int) -> int:
	"""
	Add two integers and return the result

	Args:
	a: The first integer to add
	b: The second integer to add

	Returns:
	int: The result of the addition
	"""
	return a + b

	@tool
	def subtract(a: int, b: int) -> int:
	"""
	Subtract two integers and return the result

	Args:
	a: The first integer to subtract
	b: The second integer to subtract

	Returns:
	int: The result of the subtraction
	"""
	return a - b

	@tool
	def divide(a: int, b: int) -> int:
	"""
	Divide the first integer by the second integer and return the result

	Args:
	a: The first integer to divide
	b: The second integer to divide

	Returns:
	int: The result of the division
	"""
	return a / b

	FILE_URL = "https://agents-course-unit4-scoring.hf.space/files/"

	@tool
	def read_file(task_id: str) -> str:
	"""
	Download a file based on the task_id and then read the content of the file

	Args:
	task_id: The id of the task to download the file from

	Returns:
	str: The content of the file
	"""
	file_url = f"{FILE_URL}{task_id}"
	response = requests.get(file_url, timeout=10, allow_redirects=True)
	with open('temp', 'wb') as fp:
	fp.write(response.content)
	with open('temp') as file:
	return file.read()

	@tool
	def analyze_image(task_id: str) -> str:
	"""
	Analyze an image based on the task_id and return a description of the content of the image

	Args:
	task_id: The id of the task to analyze the image from

	Returns:
	str: The description of the content of the image
	"""
	file_url = f"{FILE_URL}{task_id}"
	image = ImageCaptionLoader(images=[file_url])
	return image.load()[0].page_content

	@tool
	def analyze_audio(task_id: str) -> str:
	"""
	Analyze an mp3 file based on the task_id and return a description of the content of the audio file

	Args:
	task_id: The id of the task to analyze the audio file from

	Returns:
	str: The description of the content of the audio file
	"""
	file_url = f"{FILE_URL}{task_id}"
	response = requests.get(file_url, timeout=10, allow_redirects=True)
	temp_file = 'temp.mp3'
	with open(temp_file, 'wb') as fp:
	fp.write(response.content)
	with open(temp_file, "rb") as audio_file:
	transcript = openai.audio.transcriptions.create(
	file=audio_file,
	model="whisper-1"
	)
	return transcript.text

	@tool
	def analyze_youtube_video(youtube_url: str, question: str) -> str:
	"""
	Analyze a youtube video based on the youtube_url and the question and return the answer to the question

	Args:
	youtube_url: The url of the youtube video to analyze
	question: The question to answer based on the youtube video

	Returns:
	str: The answer to the question
	"""


	@tool
	def web_search(query: str) -> str:
	"""
	Search the web for the given query and return the results

	Args:
	query: The query to search the web for

	Returns:
	str: The text content of the web search results
	"""
	search_engine = DuckDuckGoSearchResults(output_type="list", num_results=3)
	results = search_engine.invoke({"query": query})
	page_urls = [url["link"] for url in results]

	loader = WebBaseLoader(web_paths=page_urls)
	docs = loader.load()

	combined_text = "\n\n".join(doc.page_content[:15000] for doc in docs)

	# Clean up excessive newlines, spaces and strip leading/trailing whitespace
	cleaned_text = re.sub(r'\n{3,}', '\n\n', combined_text).strip()
	cleaned_text = re.sub(r'[ \t]{6,}', ' ', cleaned_text)

	# Strip leading/trailing whitespace
	cleaned_text = cleaned_text.strip()
	return cleaned_text

	@tool
	def wikipedia_search(query: str) -> str:
	"""
	Search Wikipedia articles with the given query and return the pages

	Args:
	query: The query to search Wikipedia for

	Returns:
	str: The text content of the Wikipedia articles related to the query
	"""
	print("Searching Wikipedia for the query: ", query)
	search_docs = WikipediaLoader(query=query, load_max_docs=3).load()
	formatted_search_docs = "\n\n---\n\n".join(
	[
	f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
	for doc in search_docs
	])
	return formatted_search_docs

	@tool
	def arxiv_search(query: str) -> str:
	"""
	Search arxiv for the given query and return the results

	Args:
	query: The query to search arxiv for

	Returns:
	str: The text content of the arxiv search results

	"""
	search_docs = ArxivLoader(query=query, load_max_docs=3).load()
	formatted_search_docs = "\n\n---\n\n".join(
	[
	f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
	for doc in search_docs
	])
	return formatted_search_docs

	@tool
	def text_splitter(text: str) -> List[str]:
	"""
	Split a large text into smaller chunks using Langchain's CharacterTextSplitter

	Args:
	text: The large text to split into smaller chunks

	Returns:
	List[str]: a list container the smaller chunks of the text
	"""

	splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=10)
	return splitter.split_text(text)