Agent_Course_Final_Assignment

Sleeping

App Files Files Community

Agent_Course_Final_Assignment / src /tools.py

giulia-fontanella

add presentation notebook

a741f9e unverified 9 months ago

raw

history blame contribute delete

10.3 kB

	import base64

	import ffmpeg
	import pandas as pd
	import whisper
	import yt_dlp
	from langchain.tools import tool
	from langchain.tools.tavily_search import TavilySearchResults
	from langchain_community.document_loaders import ArxivLoader, WikipediaLoader
	from langchain_core.messages import HumanMessage
	from typing import List
	from functools import reduce
	import operator
	import contextlib
	import os


	@tool
	def read_excel(file_path: str) -> str:
	"""Extract readable text from an Excel file (.xlsx or .xls).

	Args:
	file_path: Path to the Excel file.

	Returns:
	A string representation of all sheets and their content.
	"""
	try:
	df_dict = pd.read_excel(file_path, sheet_name=None) # Read all sheets
	result = []
	for sheet_name, sheet_df in df_dict.items():
	sheet_text = sheet_df.to_json(orient="records", lines=False)
	result.append({f"Sheet: {sheet_name}": sheet_text})

	full_text = ""
	for sheet in result:
	for sheet_name, sheet_data in sheet.items():
	full_text += f"{sheet_name}\n{sheet_data}\n\n"

	return full_text

	except Exception as e:
	return f"Error reading Excel file: {str(e)}"


	@tool
	def read_python(file_path: str) -> str:
	"""Extract source code from a Python (.py) file.

	Args:
	file_path: Path to the Python file.

	Returns:
	A string containing the full source code of the file.
	"""
	try:
	with open(file_path, "r", encoding="utf-8") as f:
	return f.read()
	except Exception as e:
	return f"Error reading Python file: {str(e)}"


	class ExtractTextFromImage:
	"""Class to initialize the extract_text_from_image tool."""

	def __init__(self, multimodal_model):
	"""Initialize multimodal model."""
	self.multimodal_model = multimodal_model

	def __call_extract_text_from_image__(self, img_path: str) -> str:
	"""Extract text from an image file.

	Args:
	img_path: A string representing the path to an image (e.g., PNG, JPEG).

	Returns:
	A single string containing the concatenated text extracted from the image.
	"""
	all_text = ""
	try:
	# Read image and encode as base64
	with open(img_path, "rb") as image_file:
	image_bytes = image_file.read()

	image_base64 = base64.b64encode(image_bytes).decode("utf-8")

	# Prepare the prompt including the base64 image data
	message = [
	HumanMessage(
	content=[
	{
	"type": "text",
	"text": (
	"Extract all the text from this image. "
	"Return only the extracted text, no explanations."
	),
	},
	{
	"type": "image_url",
	"image_url": {
	"url": f"data:image/png;base64,{image_base64}"
	},
	},
	]
	)
	]

	# Call the vision-capable model
	response = self.multimodal_model.invoke(message)

	# Append extracted text
	all_text += response.content + "\n\n"

	return all_text.strip()
	except Exception as e:
	error_msg = f"Error extracting text: {str(e)}"
	print(error_msg)
	return ""


	class DescribeImage:
	"""Class to initialize the describe_image tool."""

	def __init__(self, multimodal_model):
	"""Initialize multimodal model."""
	self.multimodal_model = multimodal_model

	def __call_describe_image__(self, img_path: str, query: str) -> str:
	"""Generate a detailed description of an image.

	This function reads a image from an url, encodes it, and sends it to a
	vision-capable language model to obtain a comprehensive, natural language
	description of the image's content, including its objects, actions, and context,
	following a specific query.

	Args:
	img_path: A string representing the path to an image (e.g., PNG, JPEG).
	query: Information to extract from the image.

	Returns:
	A single string containing a detailed description of the image.
	"""
	try:
	# Read image and encode as base64
	with open(img_path, "rb") as image_file:
	image_bytes = image_file.read()

	image_base64 = base64.b64encode(image_bytes).decode("utf-8")

	# Prepare message payload
	message = [
	HumanMessage(
	content=[
	{
	"type": "text",
	"text": (
	f"Describe this image in rich detail. Include objects, people, setting, background elements, and any inferred actions or context. Avoid technical jargon. In particular, extract the following information: {query}"
	),
	},
	{
	"type": "image_url",
	"image_url": {
	"url": f"data:image/png;base64,{image_base64}"
	},
	},
	]
	)
	]
	response = self.multimodal_model.invoke(message)
	return response.content.strip()

	except Exception as e:
	error_msg = f"Error describing image: {str(e)}"
	print(error_msg)
	return ""


	@tool
	def transcribe_audio(audio_path: str) -> str:
	"""Transcribe an MP3 file.

	Args:
	audio_path: Path to the MP3 audio file.

	Returns:
	Transcribed text as a string.
	"""
	try:

	model = whisper.load_model("small") # or "tiny", "small", "medium", "large"
	result = model.transcribe(audio_path)
	return result

	except Exception as e:
	error_msg = f"Error transcribing audio: {str(e)}"
	print(error_msg)
	return ""


	@tool
	def download_youtube_video(youtube_url: str, output_path: str) -> str:
	"""Download a YouTube video as an MP4 file.

	Args:
	youtube_url: The YouTube video URL.
	output_path: Desired output path for the downloaded MP4 file.

	Returns:
	Path to the saved video file.
	"""
	ydl_opts = {
	"format": "bestvideo+bestaudio/best",
	"outtmpl": output_path,
	"merge_output_format": "mp4",
	"quiet": True,
	}
	with contextlib.redirect_stderr(open(os.devnull, "w")):
	with yt_dlp.YoutubeDL(ydl_opts) as ydl:
	ydl.download([youtube_url])
	return output_path


	@tool
	def extract_audio_from_video(video_path: str, audio_output: str) -> str:
	"""Extracts audio from an MP4 video file and saves it as MP3.

	Args:
	video_path: Path to the input MP4 video file.
	audio_output: Path for the output MP3 file.

	Returns:
	Path to the audio file.
	"""
	try:
	(
	ffmpeg.input(video_path)
	.output(
	audio_output, format="mp3", acodec="libmp3lame", t=60
	) # limit to 60 sec
	.overwrite_output()
	.run(quiet=True)
	)
	return audio_output
	except Exception as e:
	error_msg = f"Error transcribing audio: {str(e)}"
	print(error_msg)
	return ""


	@tool
	def wiki_search(query: str) -> str:
	"""Search Wikipedia for a query and return maximum 2 results.

	Args:
	query: The search query.
	"""
	search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
	formatted_search_docs = "\n\n---\n\n".join(
	[
	f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
	for doc in search_docs
	]
	)
	return {"wiki_results": formatted_search_docs}


	@tool
	def web_search(query: str) -> str:
	"""Search Tavily for a query and return maximum 3 results.

	Args:
	query: The search query.
	"""
	search_docs = TavilySearchResults(max_results=3).invoke(query)
	formatted_search_docs = "\n\n---\n\n".join(
	[
	f'<Document source="{doc["url"]}" title="{doc["title"]}" score="{doc.get("score", "")}">\n{doc["content"]}\n</Document>'
	for doc in search_docs
	]
	)
	return {"web_results": formatted_search_docs}


	@tool
	def arxiv_search(query: str) -> str:
	"""Search Arxiv for a paper.

	Args:
	query: The search query to retrieve a specific paper, consisting
	of title and/or authors name and/or year of publication.
	"""
	search_docs = ArxivLoader(query=query, load_max_docs=2).load()
	formatted_search_docs = "\n\n---\n\n".join(
	[
	(
	f'<Document title="{doc.metadata.get("Title", "")}" '
	f'published="{doc.metadata.get("Published", "")}" '
	f'authors="{doc.metadata.get("Authors", "")}">\n'
	f'Summary: {doc.metadata.get("Summary", "")}\n\n'
	f"{doc.page_content}\n"
	f"</Document>"
	)
	for doc in search_docs
	]
	)
	return {"arvix_results": formatted_search_docs}


	@tool
	def add(numbers: List[float]) -> float:
	"""Calculates the sum of a list of numbers.

	Args:
	numbers: A list of numeric values to be summed.

	Returns:
	The sum of all numbers in the list.
	"""
	return sum(numbers)


	@tool
	def multiply(numbers: List[float]) -> float:
	"""Calculates the product of a list of numbers.

	Args:
	numbers: A list of numeric values to be multiplied.

	Returns:
	The product of all numbers in the list.
	"""
	return reduce(operator.mul, numbers, 1.0)


	@tool
	def divide(a: int, b: int) -> float:
	"""Divide a and b.

	Args:
	a: first number
	b: second number
	"""
	return a / b