import requests from bs4 import BeautifulSoup from PIL import Image from io import BytesIO import json import os from langchain.tools import tool class WebSearchTools: @tool("Search the internet") def search_internet(query): """ Search the internet for the given query and return a list of search results with title, link, and snippet. Args: query (str): The search query. Returns: list: A list of dictionaries containing the title, link, and snippet of each search result. """ search_url = f"https://www.google.com/search?q={query}&tbm=nws" headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" } response = requests.get(search_url, headers=headers) soup = BeautifulSoup(response.text, "html.parser") results = [] for item in soup.find_all('div', attrs={'class': 'BVG0Nb'}): title = item.find('div', attrs={'class': 'BNeawe vvjwJb AP7Wnd'}).get_text() link = item.find('a')['href'] snippet = item.find('div', attrs={'class': 'BNeawe s3v9rd AP7Wnd'}).get_text() results.append({ 'title': title, 'link': link, 'snippet': snippet }) return results @tool("Scrape website content") def scrape_and_summarize_website(url): """ Scrape the given website URL and return the extracted text content. Download images and save them locally. Args: url (str): The website URL to scrape. Returns: str: The extracted text content from the website. """ response = requests.get(url) soup = BeautifulSoup(response.text, 'html.parser') # Extract and summarize text paragraphs = soup.find_all('p') text = ' '.join([para.get_text() for para in paragraphs]) # Download images img_tags = soup.find_all('img') image_urls = [img['src'] for img in img_tags if 'src' in img.attrs] image_folder = "downloaded_images" os.makedirs(image_folder, exist_ok=True) for i, img_url in enumerate(image_urls): img_data = requests.get(img_url).content img_name = os.path.join(image_folder, f'image_{i+1}.jpg') with open(img_name, 'wb') as img_file: img_file.write(img_data) return text @tool("Download image") def download_image(image_url, save_path): """ Download an image from the given URL and save it to the specified path. Args: image_url (str): The URL of the image to download. save_path (str): The path where the image will be saved. Returns: None """ img_data = requests.get(image_url).content with open(save_path, 'wb') as img_file: img_file.write(img_data) @tool("Search images") def bing_image_search(query, num_images=5): """ Search Bing for images related to the given query and return the URLs and titles of the images. Args: query (str): The search query. num_images (int): The number of images to retrieve. Returns: list: A list of dictionaries containing the title and URL of each image. """ url = 'https://www.bing.com/images/search' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' } params = { 'q': query, 'count': num_images } response = requests.get(url, headers=headers, params=params) if response.status_code != 200: print(f"Failed to retrieve results: {response.status_code}") return [] soup = BeautifulSoup(response.text, 'html.parser') results = [] for img_tag in soup.find_all('a', class_='iusc')[:num_images]: m = img_tag.get('m') m_json = json.loads(m) img_url = m_json.get('murl') img_title = m_json.get('turl') results.append({'title': img_title, 'url': img_url}) return results @tool("Download images") def download_images(results, folder='images'): """ Download images from the given list of results and save them to the specified folder. Args: results (list): A list of dictionaries containing the title and URL of each image. folder (str): The folder where the images will be saved. Returns: list: A list of paths where the images are saved. """ if not os.path.exists(folder): os.makedirs(folder) image_paths = [] for i, result in enumerate(results, 1): try: img_response = requests.get(result['url']) img = Image.open(BytesIO(img_response.content)) img_path = os.path.join(folder, f"image_{i}.jpg") img.save(img_path) image_paths.append(img_path) print(f"Downloaded {result['title']} to {img_path}") except Exception as e: print(f"Failed to download {result['title']}: {e}") return image_paths