import requests from bs4 import BeautifulSoup import os from langchain.tools import tool class WebSearchTools: @tool("Search the internet") def search_internet(query): """ Search the internet for the given query and return a list of search results with title, link, and snippet. Args: query (str): The search query. Returns: list: A list of dictionaries containing the title, link, and snippet of each search result. """ search_url = f"https://www.google.com/search?q={query}&tbm=nws" headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" } response = requests.get(search_url, headers=headers) soup = BeautifulSoup(response.text, "html.parser") results = [] for item in soup.find_all('div', attrs={'class': 'BVG0Nb'}): title = item.find('div', attrs={'class': 'BNeawe vvjwJb AP7Wnd'}).get_text() link = item.find('a')['href'] snippet = item.find('div', attrs={'class': 'BNeawe s3v9rd AP7Wnd'}).get_text() results.append({ 'title': title, 'link': link, 'snippet': snippet }) return results @tool("Scrape website content") def scrape_and_summarize_website(url): """ Scrape the given website URL and return the extracted text content. Download images and save them locally. Args: url (str): The website URL to scrape. Returns: str: The extracted text content from the website. """ response = requests.get(url) soup = BeautifulSoup(response.text, 'html.parser') # Extract and summarize text paragraphs = soup.find_all('p') text = ' '.join([para.get_text() for para in paragraphs]) # Download images img_tags = soup.find_all('img') image_urls = [img['src'] for img in img_tags if 'src' in img.attrs] image_folder = "downloaded_images" os.makedirs(image_folder, exist_ok=True) for i, img_url in enumerate(image_urls): img_data = requests.get(img_url).content img_name = os.path.join(image_folder, f'image_{i+1}.jpg') with open(img_name, 'wb') as img_file: img_file.write(img_data) return text @tool("Download image") def download_image(image_url, save_path): """ Download an image from the given URL and save it to the specified path. Args: image_url (str): The URL of the image to download. save_path (str): The path where the image will be saved. Returns: None """ img_data = requests.get(image_url).content with open(save_path, 'wb') as img_file: img_file.write(img_data)