import time import requests from bs4 import BeautifulSoup from langchain.tools import tool from langchain_community.utilities import WikipediaAPIWrapper from langchain_community.tools import WikipediaQueryRun, DuckDuckGoSearchRun, DuckDuckGoSearchResults from langchain_community.document_loaders import YoutubeLoader, WebBaseLoader from langchain_experimental.utilities import PythonREPL from youtube_transcript_api import YouTubeTranscriptApi # Initialize Python REPL python_repl = PythonREPL() # Initialise Youtube youtube_loader = YouTubeTranscriptApi() @tool def youtube_transcript(url: str) -> list[dict]: """Retrieve transcript from Youtube based url. Args: url: input youtube url. Returns: A list of dictionaries containing the transcript of the youtube videos. Each dictionary has 'text', 'start', and 'duration' keys. """ try: video_id = url.split("watch?v=")[-1] transcript = youtube_loader.fetch(video_id).to_raw_data() return transcript except Exception as e: return f"Error retrieving transcript: {str(e)}" @tool def duckduckgo_search_results(query: str) -> list[dict]: """Perform a DuckDuckGo search for the given query and return the results. Args: query: The search query string. Returns: A list of search results, where each result is a dictionary that includes the snippet, title, and link. """ try: search = DuckDuckGoSearchResults(output_format="list") return search.invoke(query) except Exception as e: return f"Error performing search: {str(e)}" @tool def fetch_website(url:str) -> str: """Fetch the content of a website. Args: url: The URL of the website to fetch. Returns: The title and content of the website. """ loader = WebBaseLoader(url) docs = loader.load() return docs[0].page_content def get_wiki_title(query: str) -> str: """Retrieve Wikipedia page title based on a user query. Args: query: A user query. Returns: A single string containing the retrieved article page title from Wikipedia. """ if not query.strip(): return "Please provide a valid query." try: # Reduce length of retrieved content as we just need the title wiki_toolapi_wrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=1000) wiki_tool = WikipediaQueryRun(api_wrapper=wiki_toolapi_wrapper) result = wiki_tool.run(query) # Extract the title from the result (assuming it's in the format "Page: