Spaces:
Build error
Build error
| import requests | |
| from bs4 import BeautifulSoup | |
| from PIL import Image | |
| from io import BytesIO | |
| import json | |
| import os | |
| from langchain.tools import tool | |
| class WebSearchTools: | |
| def search_internet(query): | |
| """ | |
| Search the internet for the given query and return a list of search results with title, link, and snippet. | |
| Args: | |
| query (str): The search query. | |
| Returns: | |
| list: A list of dictionaries containing the title, link, and snippet of each search result. | |
| """ | |
| search_url = f"https://www.google.com/search?q={query}&tbm=nws" | |
| headers = { | |
| "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" | |
| } | |
| response = requests.get(search_url, headers=headers) | |
| soup = BeautifulSoup(response.text, "html.parser") | |
| results = [] | |
| for item in soup.find_all('div', attrs={'class': 'BVG0Nb'}): | |
| title = item.find('div', attrs={'class': 'BNeawe vvjwJb AP7Wnd'}).get_text() | |
| link = item.find('a')['href'] | |
| snippet = item.find('div', attrs={'class': 'BNeawe s3v9rd AP7Wnd'}).get_text() | |
| results.append({ | |
| 'title': title, | |
| 'link': link, | |
| 'snippet': snippet | |
| }) | |
| return results | |
| def scrape_and_summarize_website(url): | |
| """ | |
| Scrape the given website URL and return the extracted text content. Download images and save them locally. | |
| Args: | |
| url (str): The website URL to scrape. | |
| Returns: | |
| str: The extracted text content from the website. | |
| """ | |
| response = requests.get(url) | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| # Extract and summarize text | |
| paragraphs = soup.find_all('p') | |
| text = ' '.join([para.get_text() for para in paragraphs]) | |
| # Download images | |
| img_tags = soup.find_all('img') | |
| image_urls = [img['src'] for img in img_tags if 'src' in img.attrs] | |
| image_folder = "downloaded_images" | |
| os.makedirs(image_folder, exist_ok=True) | |
| for i, img_url in enumerate(image_urls): | |
| img_data = requests.get(img_url).content | |
| img_name = os.path.join(image_folder, f'image_{i+1}.jpg') | |
| with open(img_name, 'wb') as img_file: | |
| img_file.write(img_data) | |
| return text | |
| def download_image(image_url, save_path): | |
| """ | |
| Download an image from the given URL and save it to the specified path. | |
| Args: | |
| image_url (str): The URL of the image to download. | |
| save_path (str): The path where the image will be saved. | |
| Returns: | |
| None | |
| """ | |
| img_data = requests.get(image_url).content | |
| with open(save_path, 'wb') as img_file: | |
| img_file.write(img_data) | |
| def bing_image_search(query, num_images=5): | |
| """ | |
| Search Bing for images related to the given query and return the URLs and titles of the images. | |
| Args: | |
| query (str): The search query. | |
| num_images (int): The number of images to retrieve. | |
| Returns: | |
| list: A list of dictionaries containing the title and URL of each image. | |
| """ | |
| url = 'https://www.bing.com/images/search' | |
| headers = { | |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' | |
| } | |
| params = { | |
| 'q': query, | |
| 'count': num_images | |
| } | |
| response = requests.get(url, headers=headers, params=params) | |
| if response.status_code != 200: | |
| print(f"Failed to retrieve results: {response.status_code}") | |
| return [] | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| results = [] | |
| for img_tag in soup.find_all('a', class_='iusc')[:num_images]: | |
| m = img_tag.get('m') | |
| m_json = json.loads(m) | |
| img_url = m_json.get('murl') | |
| img_title = m_json.get('turl') | |
| results.append({'title': img_title, 'url': img_url}) | |
| return results | |
| def download_images(results, folder='images'): | |
| """ | |
| Download images from the given list of results and save them to the specified folder. | |
| Args: | |
| results (list): A list of dictionaries containing the title and URL of each image. | |
| folder (str): The folder where the images will be saved. | |
| Returns: | |
| list: A list of paths where the images are saved. | |
| """ | |
| if not os.path.exists(folder): | |
| os.makedirs(folder) | |
| image_paths = [] | |
| for i, result in enumerate(results, 1): | |
| try: | |
| img_response = requests.get(result['url']) | |
| img = Image.open(BytesIO(img_response.content)) | |
| img_path = os.path.join(folder, f"image_{i}.jpg") | |
| img.save(img_path) | |
| image_paths.append(img_path) | |
| print(f"Downloaded {result['title']} to {img_path}") | |
| except Exception as e: | |
| print(f"Failed to download {result['title']}: {e}") | |
| return image_paths | |