Agent_Course_Final_Assignment

Sleeping

Agent_Course_Final_Assignment / tools.py

CUMANI Paolo

[CHG] Working agent implementation

17e605d 5 months ago

5.88 kB

	import base64
	import io
	import pandas as pd
	from youtube_transcript_api import YouTubeTranscriptApi
	from langchain_core.tools import tool
	from langchain_experimental.utilities import PythonREPL
	from langchain_community.document_loaders import WebBaseLoader
	from langchain_community.utilities import WikipediaAPIWrapper
	from langchain_community.tools import DuckDuckGoSearchRun, WikipediaQueryRun, ArxivQueryRun
	from langchain_tavily.tavily_search import TavilySearch

	@tool
	def python_repl_tool(command: str) -> str:
	"""A tool to execute Python commands. If you want to see the output of a value, you should print it out with `print(...)`.
	Args:
	command (str): A valid Python command to execute.
	Returns:
	str: The output of the command."""
	print('Python shell tool called')
	result = PythonREPL.run(command)
	return str(result)

	@tool
	def read_excel_csv(input_str: str, file_type: str = 'csv') -> str:
	"""
	Extracts information from a base64-encoded file or a path to a csv or excel file.

	Args:
	input_str (str): String containing a base64-encoded file or its path.
	file_type (str): Type of the file encoded in base64 ('csv' or 'excel').

	Returns:
	str: Content of input file.
	"""
	print(f'Read excel/csv tool called {file_type} ({input_str[:20]})')
	try:
	# Decode the base64 string
	byte_path = io.BytesIO(base64.b64decode(input_str))
	except Exception as e:
	# Assume it's a file path if decoding fails
	byte_path = input_str

	# Load into a DataFrame based on file type
	if file_type == 'csv':
	df = pd.read_csv(byte_path)
	elif file_type in ['xlsx', 'excel']:
	df = pd.read_excel(byte_path)
	else:
	raise ValueError("Unsupported file_type. Use 'csv' or 'excel'.")

	result = f"{file_type.upper()} file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
	result += f"Columns: {', '.join(df.columns)}\n\n"

	# Add summary statistics
	result += "Summary statistics:\n"
	result += str(df.describe())
	#print(result)
	return result

	@tool
	def wikipedia_query_tool(query: str) -> str:
	"""A tool to query Wikipedia. It returns a summary of the page, not the full content. To get the full content, you can use another tool.
	Args:
	query (str): A search query for Wikipedia.
	Returns:
	str: A summary of the related Wikipedia page."""
	print('Wikipedia query tool called:', query)
	wiki = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(top_k_results=2))
	result = wiki.run(query)
	print(f"Wikipedia query {query} result (limited to 10 chars): {result[:10]}")
	return result.strip()

	@tool
	def arxiv_query_tool(query: str) -> str:
	"""A tool to query arXiv.org
	Useful for when you need to answer physics, mathematics, computer science, quantitative biology, quantitative finance, statistics, electrical engineering and systems science, and economics
	questions from scientific articles on arxiv.
	Args:
	query (str): A search query for ArXiv.
	Returns:
	str: The text content of the ArXiv page.
	"""
	print('ArXiv query tool called', query)
	arxiv = ArxivQueryRun()
	result = arxiv.run(query)
	print(f"ArXiv query {query} result (limited to 50 chars): {result[:50]}")
	return result.strip()

	@tool
	def webpage_reader_tool(page_url: str) -> str:
	"""A tool to read the full content of a webpage.
	Args:
	page_url (str): A valid URL of the webpage to read.
	Returns:
	str: The text content of the webpage.
	"""
	print('Web page reader tool called', page_url)
	loader = WebBaseLoader(web_paths=[page_url])
	docs = []
	for doc in loader.lazy_load():
	docs.append(doc)

	assert len(docs) == 1
	doc = docs[0]

	return f'<Document source="{page_url}" title="{doc.get("title", "")}"/>\n{doc.page_content.strip()}\n</Document>'

	@tool
	def web_search_tool(query: str) -> str:
	"""Search internet for a query and return maximum 3 results.
	Args:
	query: The search query.
	Returns:
	str: The formatted search results.
	"""

	print('Web search tool called', query)

	try:
	search_docs = TavilySearch(max_results=3).invoke(query)
	formatted_search_docs = "\n\n---\n\n".join(
	[
	f'<Document source="{doc.get("url", "")}" title="{doc.get("title", "")}"/>\n{doc.get("content", "")}\n</Document>'
	for doc in search_docs['results']
	]
	)
	except Exception as e:
	print(f'\tError {e}, passing to DuckDuckgo')
	search_docs = DuckDuckGoSearchRun().invoke(query)
	formatted_search_docs = "\n\n---\n\n".join(
	[
	f'<Document source="{doc.get("url", "")}" title="{doc.get("title", "")}"/>\n{doc.get("content", "")}\n</Document>'
	for doc in search_docs['results']
	]
	)
	return formatted_search_docs

	@tool
	def transcribe_youtube_video_tool(video_id: str) -> str:
	"""A tool to transcribe the audio of a YouTube video.
	Args:
	video_id (str): A valid YouTube video ID or URL.
	Returns:
	str: The transcribed text of the video.
	"""
	print(f"Transcribing YouTube video with ID: {video_id}")
	if 'youtube' in video_id or 'watch' in video_id:
	# Extract video ID from URL
	video_id = video_id.split('v=')[-1].split('&')[0]

	transcript_api = YouTubeTranscriptApi()
	try:
	transcript = transcript_api.fetch(video_id)
	transcript_text = ' '.join([entry.text for entry in transcript])
	print(f"\t {transcript_text}")
	return transcript_text.strip()
	except transcript_api._errors.TranscriptsDisabled as e:
	return f"Transcription is disabled for this video: {e}"