Spaces:

SwatGarg
/

Content_Creation

Build error

File size: 5,520 Bytes

c8c5091

import requests
from bs4 import BeautifulSoup
from PIL import Image
from io import BytesIO
import json
import os
from langchain.tools import tool

class WebSearchTools:
    @tool("Search the internet")
    def search_internet(query):
        """
        Search the internet for the given query and return a list of search results with title, link, and snippet.
        
        Args:
            query (str): The search query.
        
        Returns:
            list: A list of dictionaries containing the title, link, and snippet of each search result.
        """
        search_url = f"https://www.google.com/search?q={query}&tbm=nws"
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
        }
        response = requests.get(search_url, headers=headers)
        soup = BeautifulSoup(response.text, "html.parser")
        results = []
        for item in soup.find_all('div', attrs={'class': 'BVG0Nb'}):
            title = item.find('div', attrs={'class': 'BNeawe vvjwJb AP7Wnd'}).get_text()
            link = item.find('a')['href']
            snippet = item.find('div', attrs={'class': 'BNeawe s3v9rd AP7Wnd'}).get_text()
            results.append({
                'title': title,
                'link': link,
                'snippet': snippet
            })
        return results

    @tool("Scrape website content")
    def scrape_and_summarize_website(url):
        """
        Scrape the given website URL and return the extracted text content. Download images and save them locally.
        
        Args:
            url (str): The website URL to scrape.
        
        Returns:
            str: The extracted text content from the website.
        """
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')
        # Extract and summarize text
        paragraphs = soup.find_all('p')
        text = ' '.join([para.get_text() for para in paragraphs])
        # Download images
        img_tags = soup.find_all('img')
        image_urls = [img['src'] for img in img_tags if 'src' in img.attrs]
        image_folder = "downloaded_images"
        os.makedirs(image_folder, exist_ok=True)
        for i, img_url in enumerate(image_urls):
            img_data = requests.get(img_url).content
            img_name = os.path.join(image_folder, f'image_{i+1}.jpg')
            with open(img_name, 'wb') as img_file:
                img_file.write(img_data)
        return text

    @tool("Download image")
    def download_image(image_url, save_path):
        """
        Download an image from the given URL and save it to the specified path.
        
        Args:
            image_url (str): The URL of the image to download.
            save_path (str): The path where the image will be saved.
        
        Returns:
            None
        """
        img_data = requests.get(image_url).content
        with open(save_path, 'wb') as img_file:
            img_file.write(img_data)

    @tool("Search images")
    def bing_image_search(query, num_images=5):
        """
        Search Bing for images related to the given query and return the URLs and titles of the images.
        
        Args:
            query (str): The search query.
            num_images (int): The number of images to retrieve.
        
        Returns:
            list: A list of dictionaries containing the title and URL of each image.
        """
        url = 'https://www.bing.com/images/search'
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
        params = {
            'q': query,
            'count': num_images
        }
        response = requests.get(url, headers=headers, params=params)

        if response.status_code != 200:
            print(f"Failed to retrieve results: {response.status_code}")
            return []

        soup = BeautifulSoup(response.text, 'html.parser')

        results = []
        for img_tag in soup.find_all('a', class_='iusc')[:num_images]:
            m = img_tag.get('m')
            m_json = json.loads(m)
            img_url = m_json.get('murl')
            img_title = m_json.get('turl')
            results.append({'title': img_title, 'url': img_url})

        return results

    @tool("Download images")
    def download_images(results, folder='images'):
        """
        Download images from the given list of results and save them to the specified folder.
        
        Args:
            results (list): A list of dictionaries containing the title and URL of each image.
            folder (str): The folder where the images will be saved.
        
        Returns:
            list: A list of paths where the images are saved.
        """
        if not os.path.exists(folder):
            os.makedirs(folder)

        image_paths = []
        for i, result in enumerate(results, 1):
            try:
                img_response = requests.get(result['url'])
                img = Image.open(BytesIO(img_response.content))
                img_path = os.path.join(folder, f"image_{i}.jpg")
                img.save(img_path)
                image_paths.append(img_path)
                print(f"Downloaded {result['title']} to {img_path}")
            except Exception as e:
                print(f"Failed to download {result['title']}: {e}")

        return image_paths