File size: 5,011 Bytes
d63c54e
ee950fa
 
 
d63c54e
 
963d8bd
d63c54e
 
a654024
ee950fa
d63c54e
 
 
a654024
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
963d8bd
 
 
 
 
 
 
 
 
 
 
 
 
 
d63c54e
 
 
 
 
 
 
 
 
 
 
 
ee950fa
 
d63c54e
 
 
ee950fa
d63c54e
 
 
 
ee950fa
 
d63c54e
 
ee950fa
 
 
d63c54e
 
ee950fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d63c54e
a654024
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d63c54e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import time
import requests
from bs4 import BeautifulSoup

from langchain.tools import tool
from langchain_community.utilities import WikipediaAPIWrapper
from langchain_community.tools import WikipediaQueryRun, DuckDuckGoSearchRun, DuckDuckGoSearchResults
from langchain_community.document_loaders import YoutubeLoader, WebBaseLoader
from langchain_experimental.utilities import PythonREPL
from youtube_transcript_api import YouTubeTranscriptApi

# Initialize Python REPL
python_repl = PythonREPL()

# Initialise Youtube 
youtube_loader = YouTubeTranscriptApi()

@tool
def youtube_transcript(url: str) -> list[dict]:    
    """Retrieve transcript from Youtube based url.
    Args:
        url: input youtube url.
    Returns:
        A list of dictionaries containing the transcript of the youtube videos. 
        Each dictionary has 'text', 'start', and 'duration' keys.
    """
    try:
        video_id = url.split("watch?v=")[-1]
        transcript = youtube_loader.fetch(video_id).to_raw_data()
        return transcript
    except Exception as e:
        return f"Error retrieving transcript: {str(e)}"

@tool
def duckduckgo_search_results(query: str) -> list[dict]:
    """Perform a DuckDuckGo search for the given query and return the results.
    Args:
        query: The search query string.
    Returns:
        A list of search results, where each result is a dictionary that includes the snippet, title, and link.
    """
    try:
        search = DuckDuckGoSearchResults(output_format="list")
        return search.invoke(query)
    except Exception as e:
        return f"Error performing search: {str(e)}"

@tool
def fetch_website(url:str) -> str:
    """Fetch the content of a website.
    Args:
        url: The URL of the website to fetch.
    Returns:
        The title and content of the website.
    """
    loader = WebBaseLoader(url)
    docs = loader.load()
    return docs[0].page_content

def get_wiki_title(query: str) -> str:
    """Retrieve Wikipedia page title based on a user query.
    Args:
        query: A user query.
    Returns:
        A single string containing the retrieved article page title from Wikipedia.
    """
    if not query.strip():
        return "Please provide a valid query."
    try:
        # Reduce length of retrieved content as we just need the title
        wiki_toolapi_wrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=1000)
        wiki_tool = WikipediaQueryRun(api_wrapper=wiki_toolapi_wrapper)
        result = wiki_tool.run(query)
        # Extract the title from the result (assuming it's in the format "Page: <title>\nSummary: <summary>")
        title = result.split("\n")[0].replace("Page: ", "")
        return title
    except Exception as e:
        return f"Error retrieving information: {str(e)}"
    
@tool
def get_wiki_full(query: str) -> str:
    """Scrape the content of a Wikipedia page based on the user query.
    
    Args:
        query: The user query to search for on Wikipedia.
    Returns:
        A single string containing the content of the Wikipedia page.
    """
    title = get_wiki_title(query)
    url = f'https://en.wikipedia.org/wiki/{title.replace(" ", "_")}'
    headers = {'User-Agent': 'Mozilla/5.0'}
    
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Get all content from main article
    content = soup.find('div', {'id': 'mw-content-text'})
    
    return content.get_text()[:32_000]  # Limit to 8k tokens to avoid excessive length

# @tool
# def youtube_transcript(url: str) -> str:
#     """Retrieve transcript from Youtube based url.
#     Args:
#         url: input youtube url.
#     Returns:
#         A single string containing the transcript of the youtube videos.
#     """
#     max_attempts = 5  # Set a maximum number of attempts
#     attempts = 0
#     loader = YoutubeLoader.from_youtube_url(url, add_video_info=True)
#     while attempts < max_attempts:
#         try:
#             docs  = loader.load()
#             return docs[0].page_content
#         except Exception as e:
#             attempts += 1
#             print(f"Attempt {attempts} failed: {e}")
#             # Optionally add a delay before retrying
#             time.sleep(1) # Import the time module
#     return "Failed to retrieve transcript after multiple attempts."

@tool
def python_repl_tool(code: str) -> str:
    """
    Execute Python code and return the output.
    
    Use this tool to run Python code for calculations, data analysis,
    or any computational tasks. The code runs in a persistent Python
    environment, so variables and imports are preserved between calls.
    
    Args:
        code: Python code to execute
        
    Returns:
        The output of the code execution (stdout) or error message
    """
    try:
        result = python_repl.run(code)
        return result if result else "Code executed successfully (no output)"
    except Exception as e:
        return f"Error: {str(e)}"