Spaces:

SwatGarg
/

Content_Creation

Build error

File size: 3,003 Bytes

d43bcfa
 
 
039a7c8
d43bcfa
 
039a7c8
d43bcfa
c1bc865
 
 
 
 
 
 
 
 
d43bcfa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bf334a4
039a7c8
d43bcfa
c1bc865
 
 
 
 
 
 
 
 
d43bcfa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
039a7c8
bf334a4
c1bc865
 
 
 
 
 
 
 
 
 
bf334a4
 
c1bc865

import requests
from bs4 import BeautifulSoup
import os
from langchain.tools import tool

class WebSearchTools:
    @tool("Search the internet")
    def search_internet(query):
        """
        Search the internet for the given query and return a list of search results with title, link, and snippet.
        
        Args:
            query (str): The search query.
        
        Returns:
            list: A list of dictionaries containing the title, link, and snippet of each search result.
        """
        search_url = f"https://www.google.com/search?q={query}&tbm=nws"
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
        }
        response = requests.get(search_url, headers=headers)
        soup = BeautifulSoup(response.text, "html.parser")
        results = []
        for item in soup.find_all('div', attrs={'class': 'BVG0Nb'}):
            title = item.find('div', attrs={'class': 'BNeawe vvjwJb AP7Wnd'}).get_text()
            link = item.find('a')['href']
            snippet = item.find('div', attrs={'class': 'BNeawe s3v9rd AP7Wnd'}).get_text()
            results.append({
                'title': title,
                'link': link,
                'snippet': snippet
            })
        return results

    @tool("Scrape website content")
    def scrape_and_summarize_website(url):
        """
        Scrape the given website URL and return the extracted text content. Download images and save them locally.
        
        Args:
            url (str): The website URL to scrape.
        
        Returns:
            str: The extracted text content from the website.
        """
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')
        # Extract and summarize text
        paragraphs = soup.find_all('p')
        text = ' '.join([para.get_text() for para in paragraphs])
        # Download images
        img_tags = soup.find_all('img')
        image_urls = [img['src'] for img in img_tags if 'src' in img.attrs]
        image_folder = "downloaded_images"
        os.makedirs(image_folder, exist_ok=True)
        for i, img_url in enumerate(image_urls):
            img_data = requests.get(img_url).content
            img_name = os.path.join(image_folder, f'image_{i+1}.jpg')
            with open(img_name, 'wb') as img_file:
                img_file.write(img_data)
        return text

    @tool("Download image")
    def download_image(image_url, save_path):
        """
        Download an image from the given URL and save it to the specified path.
        
        Args:
            image_url (str): The URL of the image to download.
            save_path (str): The path where the image will be saved.
        
        Returns:
            None
        """
        img_data = requests.get(image_url).content
        with open(save_path, 'wb') as img_file:
            img_file.write(img_data)