File size: 5,011 Bytes
d63c54e ee950fa d63c54e 963d8bd d63c54e a654024 ee950fa d63c54e a654024 963d8bd d63c54e ee950fa d63c54e ee950fa d63c54e ee950fa d63c54e ee950fa d63c54e ee950fa d63c54e a654024 d63c54e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 | import time
import requests
from bs4 import BeautifulSoup
from langchain.tools import tool
from langchain_community.utilities import WikipediaAPIWrapper
from langchain_community.tools import WikipediaQueryRun, DuckDuckGoSearchRun, DuckDuckGoSearchResults
from langchain_community.document_loaders import YoutubeLoader, WebBaseLoader
from langchain_experimental.utilities import PythonREPL
from youtube_transcript_api import YouTubeTranscriptApi
# Initialize Python REPL
python_repl = PythonREPL()
# Initialise Youtube
youtube_loader = YouTubeTranscriptApi()
@tool
def youtube_transcript(url: str) -> list[dict]:
"""Retrieve transcript from Youtube based url.
Args:
url: input youtube url.
Returns:
A list of dictionaries containing the transcript of the youtube videos.
Each dictionary has 'text', 'start', and 'duration' keys.
"""
try:
video_id = url.split("watch?v=")[-1]
transcript = youtube_loader.fetch(video_id).to_raw_data()
return transcript
except Exception as e:
return f"Error retrieving transcript: {str(e)}"
@tool
def duckduckgo_search_results(query: str) -> list[dict]:
"""Perform a DuckDuckGo search for the given query and return the results.
Args:
query: The search query string.
Returns:
A list of search results, where each result is a dictionary that includes the snippet, title, and link.
"""
try:
search = DuckDuckGoSearchResults(output_format="list")
return search.invoke(query)
except Exception as e:
return f"Error performing search: {str(e)}"
@tool
def fetch_website(url:str) -> str:
"""Fetch the content of a website.
Args:
url: The URL of the website to fetch.
Returns:
The title and content of the website.
"""
loader = WebBaseLoader(url)
docs = loader.load()
return docs[0].page_content
def get_wiki_title(query: str) -> str:
"""Retrieve Wikipedia page title based on a user query.
Args:
query: A user query.
Returns:
A single string containing the retrieved article page title from Wikipedia.
"""
if not query.strip():
return "Please provide a valid query."
try:
# Reduce length of retrieved content as we just need the title
wiki_toolapi_wrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=1000)
wiki_tool = WikipediaQueryRun(api_wrapper=wiki_toolapi_wrapper)
result = wiki_tool.run(query)
# Extract the title from the result (assuming it's in the format "Page: <title>\nSummary: <summary>")
title = result.split("\n")[0].replace("Page: ", "")
return title
except Exception as e:
return f"Error retrieving information: {str(e)}"
@tool
def get_wiki_full(query: str) -> str:
"""Scrape the content of a Wikipedia page based on the user query.
Args:
query: The user query to search for on Wikipedia.
Returns:
A single string containing the content of the Wikipedia page.
"""
title = get_wiki_title(query)
url = f'https://en.wikipedia.org/wiki/{title.replace(" ", "_")}'
headers = {'User-Agent': 'Mozilla/5.0'}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, 'html.parser')
# Get all content from main article
content = soup.find('div', {'id': 'mw-content-text'})
return content.get_text()[:32_000] # Limit to 8k tokens to avoid excessive length
# @tool
# def youtube_transcript(url: str) -> str:
# """Retrieve transcript from Youtube based url.
# Args:
# url: input youtube url.
# Returns:
# A single string containing the transcript of the youtube videos.
# """
# max_attempts = 5 # Set a maximum number of attempts
# attempts = 0
# loader = YoutubeLoader.from_youtube_url(url, add_video_info=True)
# while attempts < max_attempts:
# try:
# docs = loader.load()
# return docs[0].page_content
# except Exception as e:
# attempts += 1
# print(f"Attempt {attempts} failed: {e}")
# # Optionally add a delay before retrying
# time.sleep(1) # Import the time module
# return "Failed to retrieve transcript after multiple attempts."
@tool
def python_repl_tool(code: str) -> str:
"""
Execute Python code and return the output.
Use this tool to run Python code for calculations, data analysis,
or any computational tasks. The code runs in a persistent Python
environment, so variables and imports are preserved between calls.
Args:
code: Python code to execute
Returns:
The output of the code execution (stdout) or error message
"""
try:
result = python_repl.run(code)
return result if result else "Code executed successfully (no output)"
except Exception as e:
return f"Error: {str(e)}" |