Spaces:
Build error
Build error
| import requests | |
| from bs4 import BeautifulSoup | |
| import os | |
| from langchain.tools import tool | |
| class WebSearchTools: | |
| def search_internet(query): | |
| """ | |
| Search the internet for the given query and return a list of search results with title, link, and snippet. | |
| Args: | |
| query (str): The search query. | |
| Returns: | |
| list: A list of dictionaries containing the title, link, and snippet of each search result. | |
| """ | |
| search_url = f"https://www.google.com/search?q={query}&tbm=nws" | |
| headers = { | |
| "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" | |
| } | |
| response = requests.get(search_url, headers=headers) | |
| soup = BeautifulSoup(response.text, "html.parser") | |
| results = [] | |
| for item in soup.find_all('div', attrs={'class': 'BVG0Nb'}): | |
| title = item.find('div', attrs={'class': 'BNeawe vvjwJb AP7Wnd'}).get_text() | |
| link = item.find('a')['href'] | |
| snippet = item.find('div', attrs={'class': 'BNeawe s3v9rd AP7Wnd'}).get_text() | |
| results.append({ | |
| 'title': title, | |
| 'link': link, | |
| 'snippet': snippet | |
| }) | |
| return results | |
| def scrape_and_summarize_website(url): | |
| """ | |
| Scrape the given website URL and return the extracted text content. Download images and save them locally. | |
| Args: | |
| url (str): The website URL to scrape. | |
| Returns: | |
| str: The extracted text content from the website. | |
| """ | |
| response = requests.get(url) | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| # Extract and summarize text | |
| paragraphs = soup.find_all('p') | |
| text = ' '.join([para.get_text() for para in paragraphs]) | |
| # Download images | |
| img_tags = soup.find_all('img') | |
| image_urls = [img['src'] for img in img_tags if 'src' in img.attrs] | |
| image_folder = "downloaded_images" | |
| os.makedirs(image_folder, exist_ok=True) | |
| for i, img_url in enumerate(image_urls): | |
| img_data = requests.get(img_url).content | |
| img_name = os.path.join(image_folder, f'image_{i+1}.jpg') | |
| with open(img_name, 'wb') as img_file: | |
| img_file.write(img_data) | |
| return text | |
| def download_image(image_url, save_path): | |
| """ | |
| Download an image from the given URL and save it to the specified path. | |
| Args: | |
| image_url (str): The URL of the image to download. | |
| save_path (str): The path where the image will be saved. | |
| Returns: | |
| None | |
| """ | |
| img_data = requests.get(image_url).content | |
| with open(save_path, 'wb') as img_file: | |
| img_file.write(img_data) |