Final_Assignment_Template

Sleeping

App Files Files Community

Final_Assignment_Template / tools.py

hanshan1988

changed tool to use youtube transcript api

a654024 3 months ago

raw

history blame contribute delete

5.01 kB

	import time
	import requests
	from bs4 import BeautifulSoup

	from langchain.tools import tool
	from langchain_community.utilities import WikipediaAPIWrapper
	from langchain_community.tools import WikipediaQueryRun, DuckDuckGoSearchRun, DuckDuckGoSearchResults
	from langchain_community.document_loaders import YoutubeLoader, WebBaseLoader
	from langchain_experimental.utilities import PythonREPL
	from youtube_transcript_api import YouTubeTranscriptApi

	# Initialize Python REPL
	python_repl = PythonREPL()

	# Initialise Youtube
	youtube_loader = YouTubeTranscriptApi()

	@tool
	def youtube_transcript(url: str) -> list[dict]:
	"""Retrieve transcript from Youtube based url.
	Args:
	url: input youtube url.
	Returns:
	A list of dictionaries containing the transcript of the youtube videos.
	Each dictionary has 'text', 'start', and 'duration' keys.
	"""
	try:
	video_id = url.split("watch?v=")[-1]
	transcript = youtube_loader.fetch(video_id).to_raw_data()
	return transcript
	except Exception as e:
	return f"Error retrieving transcript: {str(e)}"

	@tool
	def duckduckgo_search_results(query: str) -> list[dict]:
	"""Perform a DuckDuckGo search for the given query and return the results.
	Args:
	query: The search query string.
	Returns:
	A list of search results, where each result is a dictionary that includes the snippet, title, and link.
	"""
	try:
	search = DuckDuckGoSearchResults(output_format="list")
	return search.invoke(query)
	except Exception as e:
	return f"Error performing search: {str(e)}"

	@tool
	def fetch_website(url:str) -> str:
	"""Fetch the content of a website.
	Args:
	url: The URL of the website to fetch.
	Returns:
	The title and content of the website.
	"""
	loader = WebBaseLoader(url)
	docs = loader.load()
	return docs[0].page_content

	def get_wiki_title(query: str) -> str:
	"""Retrieve Wikipedia page title based on a user query.
	Args:
	query: A user query.
	Returns:
	A single string containing the retrieved article page title from Wikipedia.
	"""
	if not query.strip():
	return "Please provide a valid query."
	try:
	# Reduce length of retrieved content as we just need the title
	wiki_toolapi_wrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=1000)
	wiki_tool = WikipediaQueryRun(api_wrapper=wiki_toolapi_wrapper)
	result = wiki_tool.run(query)
	# Extract the title from the result (assuming it's in the format "Page: <title>\nSummary: <summary>")
	title = result.split("\n")[0].replace("Page: ", "")
	return title
	except Exception as e:
	return f"Error retrieving information: {str(e)}"

	@tool
	def get_wiki_full(query: str) -> str:
	"""Scrape the content of a Wikipedia page based on the user query.

	Args:
	query: The user query to search for on Wikipedia.
	Returns:
	A single string containing the content of the Wikipedia page.
	"""
	title = get_wiki_title(query)
	url = f'https://en.wikipedia.org/wiki/{title.replace(" ", "_")}'
	headers = {'User-Agent': 'Mozilla/5.0'}

	response = requests.get(url, headers=headers)
	soup = BeautifulSoup(response.content, 'html.parser')

	# Get all content from main article
	content = soup.find('div', {'id': 'mw-content-text'})

	return content.get_text()[:32_000] # Limit to 8k tokens to avoid excessive length

	# @tool
	# def youtube_transcript(url: str) -> str:
	# """Retrieve transcript from Youtube based url.
	# Args:
	# url: input youtube url.
	# Returns:
	# A single string containing the transcript of the youtube videos.
	# """
	# max_attempts = 5 # Set a maximum number of attempts
	# attempts = 0
	# loader = YoutubeLoader.from_youtube_url(url, add_video_info=True)
	# while attempts < max_attempts:
	# try:
	# docs = loader.load()
	# return docs[0].page_content
	# except Exception as e:
	# attempts += 1
	# print(f"Attempt {attempts} failed: {e}")
	# # Optionally add a delay before retrying
	# time.sleep(1) # Import the time module
	# return "Failed to retrieve transcript after multiple attempts."

	@tool
	def python_repl_tool(code: str) -> str:
	"""
	Execute Python code and return the output.

	Use this tool to run Python code for calculations, data analysis,
	or any computational tasks. The code runs in a persistent Python
	environment, so variables and imports are preserved between calls.

	Args:
	code: Python code to execute

	Returns:
	The output of the code execution (stdout) or error message
	"""
	try:
	result = python_repl.run(code)
	return result if result else "Code executed successfully (no output)"
	except Exception as e:
	return f"Error: {str(e)}"