Final_Assignment_Template

Sleeping

File size: 5,011 Bytes

import time
import requests
from bs4 import BeautifulSoup

from langchain.tools import tool
from langchain_community.utilities import WikipediaAPIWrapper
from langchain_community.tools import WikipediaQueryRun, DuckDuckGoSearchRun, DuckDuckGoSearchResults
from langchain_community.document_loaders import YoutubeLoader, WebBaseLoader
from langchain_experimental.utilities import PythonREPL
from youtube_transcript_api import YouTubeTranscriptApi

# Initialize Python REPL
python_repl = PythonREPL()

# Initialise Youtube 
youtube_loader = YouTubeTranscriptApi()

@tool
def youtube_transcript(url: str) -> list[dict]:    
    """Retrieve transcript from Youtube based url.
    Args:
        url: input youtube url.
    Returns:
        A list of dictionaries containing the transcript of the youtube videos. 
        Each dictionary has 'text', 'start', and 'duration' keys.
    """
    try:
        video_id = url.split("watch?v=")[-1]
        transcript = youtube_loader.fetch(video_id).to_raw_data()
        return transcript
    except Exception as e:
        return f"Error retrieving transcript: {str(e)}"

@tool
def duckduckgo_search_results(query: str) -> list[dict]:
    """Perform a DuckDuckGo search for the given query and return the results.
    Args:
        query: The search query string.
    Returns:
        A list of search results, where each result is a dictionary that includes the snippet, title, and link.
    """
    try:
        search = DuckDuckGoSearchResults(output_format="list")
        return search.invoke(query)
    except Exception as e:
        return f"Error performing search: {str(e)}"

@tool
def fetch_website(url:str) -> str:
    """Fetch the content of a website.
    Args:
        url: The URL of the website to fetch.
    Returns:
        The title and content of the website.
    """
    loader = WebBaseLoader(url)
    docs = loader.load()
    return docs[0].page_content

def get_wiki_title(query: str) -> str:
    """Retrieve Wikipedia page title based on a user query.
    Args:
        query: A user query.
    Returns:
        A single string containing the retrieved article page title from Wikipedia.
    """
    if not query.strip():
        return "Please provide a valid query."
    try:
        # Reduce length of retrieved content as we just need the title
        wiki_toolapi_wrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=1000)
        wiki_tool = WikipediaQueryRun(api_wrapper=wiki_toolapi_wrapper)
        result = wiki_tool.run(query)
        # Extract the title from the result (assuming it's in the format "Page: <title>\nSummary: <summary>")
        title = result.split("\n")[0].replace("Page: ", "")
        return title
    except Exception as e:
        return f"Error retrieving information: {str(e)}"
    
@tool
def get_wiki_full(query: str) -> str:
    """Scrape the content of a Wikipedia page based on the user query.
    
    Args:
        query: The user query to search for on Wikipedia.
    Returns:
        A single string containing the content of the Wikipedia page.
    """
    title = get_wiki_title(query)
    url = f'https://en.wikipedia.org/wiki/{title.replace(" ", "_")}'
    headers = {'User-Agent': 'Mozilla/5.0'}
    
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Get all content from main article
    content = soup.find('div', {'id': 'mw-content-text'})
    
    return content.get_text()[:32_000]  # Limit to 8k tokens to avoid excessive length

# @tool
# def youtube_transcript(url: str) -> str:
#     """Retrieve transcript from Youtube based url.
#     Args:
#         url: input youtube url.
#     Returns:
#         A single string containing the transcript of the youtube videos.
#     """
#     max_attempts = 5  # Set a maximum number of attempts
#     attempts = 0
#     loader = YoutubeLoader.from_youtube_url(url, add_video_info=True)
#     while attempts < max_attempts:
#         try:
#             docs  = loader.load()
#             return docs[0].page_content
#         except Exception as e:
#             attempts += 1
#             print(f"Attempt {attempts} failed: {e}")
#             # Optionally add a delay before retrying
#             time.sleep(1) # Import the time module
#     return "Failed to retrieve transcript after multiple attempts."

@tool
def python_repl_tool(code: str) -> str:
    """
    Execute Python code and return the output.
    
    Use this tool to run Python code for calculations, data analysis,
    or any computational tasks. The code runs in a persistent Python
    environment, so variables and imports are preserved between calls.
    
    Args:
        code: Python code to execute
        
    Returns:
        The output of the code execution (stdout) or error message
    """
    try:
        result = python_repl.run(code)
        return result if result else "Code executed successfully (no output)"
    except Exception as e:
        return f"Error: {str(e)}"