Spaces:
Build error
Build error
File size: 5,520 Bytes
c8c5091 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 | import requests
from bs4 import BeautifulSoup
from PIL import Image
from io import BytesIO
import json
import os
from langchain.tools import tool
class WebSearchTools:
@tool("Search the internet")
def search_internet(query):
"""
Search the internet for the given query and return a list of search results with title, link, and snippet.
Args:
query (str): The search query.
Returns:
list: A list of dictionaries containing the title, link, and snippet of each search result.
"""
search_url = f"https://www.google.com/search?q={query}&tbm=nws"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
response = requests.get(search_url, headers=headers)
soup = BeautifulSoup(response.text, "html.parser")
results = []
for item in soup.find_all('div', attrs={'class': 'BVG0Nb'}):
title = item.find('div', attrs={'class': 'BNeawe vvjwJb AP7Wnd'}).get_text()
link = item.find('a')['href']
snippet = item.find('div', attrs={'class': 'BNeawe s3v9rd AP7Wnd'}).get_text()
results.append({
'title': title,
'link': link,
'snippet': snippet
})
return results
@tool("Scrape website content")
def scrape_and_summarize_website(url):
"""
Scrape the given website URL and return the extracted text content. Download images and save them locally.
Args:
url (str): The website URL to scrape.
Returns:
str: The extracted text content from the website.
"""
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
# Extract and summarize text
paragraphs = soup.find_all('p')
text = ' '.join([para.get_text() for para in paragraphs])
# Download images
img_tags = soup.find_all('img')
image_urls = [img['src'] for img in img_tags if 'src' in img.attrs]
image_folder = "downloaded_images"
os.makedirs(image_folder, exist_ok=True)
for i, img_url in enumerate(image_urls):
img_data = requests.get(img_url).content
img_name = os.path.join(image_folder, f'image_{i+1}.jpg')
with open(img_name, 'wb') as img_file:
img_file.write(img_data)
return text
@tool("Download image")
def download_image(image_url, save_path):
"""
Download an image from the given URL and save it to the specified path.
Args:
image_url (str): The URL of the image to download.
save_path (str): The path where the image will be saved.
Returns:
None
"""
img_data = requests.get(image_url).content
with open(save_path, 'wb') as img_file:
img_file.write(img_data)
@tool("Search images")
def bing_image_search(query, num_images=5):
"""
Search Bing for images related to the given query and return the URLs and titles of the images.
Args:
query (str): The search query.
num_images (int): The number of images to retrieve.
Returns:
list: A list of dictionaries containing the title and URL of each image.
"""
url = 'https://www.bing.com/images/search'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
params = {
'q': query,
'count': num_images
}
response = requests.get(url, headers=headers, params=params)
if response.status_code != 200:
print(f"Failed to retrieve results: {response.status_code}")
return []
soup = BeautifulSoup(response.text, 'html.parser')
results = []
for img_tag in soup.find_all('a', class_='iusc')[:num_images]:
m = img_tag.get('m')
m_json = json.loads(m)
img_url = m_json.get('murl')
img_title = m_json.get('turl')
results.append({'title': img_title, 'url': img_url})
return results
@tool("Download images")
def download_images(results, folder='images'):
"""
Download images from the given list of results and save them to the specified folder.
Args:
results (list): A list of dictionaries containing the title and URL of each image.
folder (str): The folder where the images will be saved.
Returns:
list: A list of paths where the images are saved.
"""
if not os.path.exists(folder):
os.makedirs(folder)
image_paths = []
for i, result in enumerate(results, 1):
try:
img_response = requests.get(result['url'])
img = Image.open(BytesIO(img_response.content))
img_path = os.path.join(folder, f"image_{i}.jpg")
img.save(img_path)
image_paths.append(img_path)
print(f"Downloaded {result['title']} to {img_path}")
except Exception as e:
print(f"Failed to download {result['title']}: {e}")
return image_paths
|