Spaces:

SwatGarg
/

Content_Creation

Build error

App Files Files Community

Content_Creation / tools /web_search_tools.py

SwatGarg

Create web_search_tools.py

c8c5091 verified over 1 year ago

raw

history blame contribute delete

5.52 kB

	import requests
	from bs4 import BeautifulSoup
	from PIL import Image
	from io import BytesIO
	import json
	import os
	from langchain.tools import tool

	class WebSearchTools:
	@tool("Search the internet")
	def search_internet(query):
	"""
	Search the internet for the given query and return a list of search results with title, link, and snippet.

	Args:
	query (str): The search query.

	Returns:
	list: A list of dictionaries containing the title, link, and snippet of each search result.
	"""
	search_url = f"https://www.google.com/search?q={query}&tbm=nws"
	headers = {
	"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
	}
	response = requests.get(search_url, headers=headers)
	soup = BeautifulSoup(response.text, "html.parser")
	results = []
	for item in soup.find_all('div', attrs={'class': 'BVG0Nb'}):
	title = item.find('div', attrs={'class': 'BNeawe vvjwJb AP7Wnd'}).get_text()
	link = item.find('a')['href']
	snippet = item.find('div', attrs={'class': 'BNeawe s3v9rd AP7Wnd'}).get_text()
	results.append({
	'title': title,
	'link': link,
	'snippet': snippet
	})
	return results

	@tool("Scrape website content")
	def scrape_and_summarize_website(url):
	"""
	Scrape the given website URL and return the extracted text content. Download images and save them locally.

	Args:
	url (str): The website URL to scrape.

	Returns:
	str: The extracted text content from the website.
	"""
	response = requests.get(url)
	soup = BeautifulSoup(response.text, 'html.parser')
	# Extract and summarize text
	paragraphs = soup.find_all('p')
	text = ' '.join([para.get_text() for para in paragraphs])
	# Download images
	img_tags = soup.find_all('img')
	image_urls = [img['src'] for img in img_tags if 'src' in img.attrs]
	image_folder = "downloaded_images"
	os.makedirs(image_folder, exist_ok=True)
	for i, img_url in enumerate(image_urls):
	img_data = requests.get(img_url).content
	img_name = os.path.join(image_folder, f'image_{i+1}.jpg')
	with open(img_name, 'wb') as img_file:
	img_file.write(img_data)
	return text

	@tool("Download image")
	def download_image(image_url, save_path):
	"""
	Download an image from the given URL and save it to the specified path.

	Args:
	image_url (str): The URL of the image to download.
	save_path (str): The path where the image will be saved.

	Returns:
	None
	"""
	img_data = requests.get(image_url).content
	with open(save_path, 'wb') as img_file:
	img_file.write(img_data)

	@tool("Search images")
	def bing_image_search(query, num_images=5):
	"""
	Search Bing for images related to the given query and return the URLs and titles of the images.

	Args:
	query (str): The search query.
	num_images (int): The number of images to retrieve.

	Returns:
	list: A list of dictionaries containing the title and URL of each image.
	"""
	url = 'https://www.bing.com/images/search'
	headers = {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
	}
	params = {
	'q': query,
	'count': num_images
	}
	response = requests.get(url, headers=headers, params=params)

	if response.status_code != 200:
	print(f"Failed to retrieve results: {response.status_code}")
	return []

	soup = BeautifulSoup(response.text, 'html.parser')

	results = []
	for img_tag in soup.find_all('a', class_='iusc')[:num_images]:
	m = img_tag.get('m')
	m_json = json.loads(m)
	img_url = m_json.get('murl')
	img_title = m_json.get('turl')
	results.append({'title': img_title, 'url': img_url})

	return results

	@tool("Download images")
	def download_images(results, folder='images'):
	"""
	Download images from the given list of results and save them to the specified folder.

	Args:
	results (list): A list of dictionaries containing the title and URL of each image.
	folder (str): The folder where the images will be saved.

	Returns:
	list: A list of paths where the images are saved.
	"""
	if not os.path.exists(folder):
	os.makedirs(folder)

	image_paths = []
	for i, result in enumerate(results, 1):
	try:
	img_response = requests.get(result['url'])
	img = Image.open(BytesIO(img_response.content))
	img_path = os.path.join(folder, f"image_{i}.jpg")
	img.save(img_path)
	image_paths.append(img_path)
	print(f"Downloaded {result['title']} to {img_path}")
	except Exception as e:
	print(f"Failed to download {result['title']}: {e}")

	return image_paths