File size: 5,520 Bytes
c8c5091
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import requests
from bs4 import BeautifulSoup
from PIL import Image
from io import BytesIO
import json
import os
from langchain.tools import tool

class WebSearchTools:
    @tool("Search the internet")
    def search_internet(query):
        """
        Search the internet for the given query and return a list of search results with title, link, and snippet.
        
        Args:
            query (str): The search query.
        
        Returns:
            list: A list of dictionaries containing the title, link, and snippet of each search result.
        """
        search_url = f"https://www.google.com/search?q={query}&tbm=nws"
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
        }
        response = requests.get(search_url, headers=headers)
        soup = BeautifulSoup(response.text, "html.parser")
        results = []
        for item in soup.find_all('div', attrs={'class': 'BVG0Nb'}):
            title = item.find('div', attrs={'class': 'BNeawe vvjwJb AP7Wnd'}).get_text()
            link = item.find('a')['href']
            snippet = item.find('div', attrs={'class': 'BNeawe s3v9rd AP7Wnd'}).get_text()
            results.append({
                'title': title,
                'link': link,
                'snippet': snippet
            })
        return results

    @tool("Scrape website content")
    def scrape_and_summarize_website(url):
        """
        Scrape the given website URL and return the extracted text content. Download images and save them locally.
        
        Args:
            url (str): The website URL to scrape.
        
        Returns:
            str: The extracted text content from the website.
        """
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')
        # Extract and summarize text
        paragraphs = soup.find_all('p')
        text = ' '.join([para.get_text() for para in paragraphs])
        # Download images
        img_tags = soup.find_all('img')
        image_urls = [img['src'] for img in img_tags if 'src' in img.attrs]
        image_folder = "downloaded_images"
        os.makedirs(image_folder, exist_ok=True)
        for i, img_url in enumerate(image_urls):
            img_data = requests.get(img_url).content
            img_name = os.path.join(image_folder, f'image_{i+1}.jpg')
            with open(img_name, 'wb') as img_file:
                img_file.write(img_data)
        return text

    @tool("Download image")
    def download_image(image_url, save_path):
        """
        Download an image from the given URL and save it to the specified path.
        
        Args:
            image_url (str): The URL of the image to download.
            save_path (str): The path where the image will be saved.
        
        Returns:
            None
        """
        img_data = requests.get(image_url).content
        with open(save_path, 'wb') as img_file:
            img_file.write(img_data)

    @tool("Search images")
    def bing_image_search(query, num_images=5):
        """
        Search Bing for images related to the given query and return the URLs and titles of the images.
        
        Args:
            query (str): The search query.
            num_images (int): The number of images to retrieve.
        
        Returns:
            list: A list of dictionaries containing the title and URL of each image.
        """
        url = 'https://www.bing.com/images/search'
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
        params = {
            'q': query,
            'count': num_images
        }
        response = requests.get(url, headers=headers, params=params)

        if response.status_code != 200:
            print(f"Failed to retrieve results: {response.status_code}")
            return []

        soup = BeautifulSoup(response.text, 'html.parser')

        results = []
        for img_tag in soup.find_all('a', class_='iusc')[:num_images]:
            m = img_tag.get('m')
            m_json = json.loads(m)
            img_url = m_json.get('murl')
            img_title = m_json.get('turl')
            results.append({'title': img_title, 'url': img_url})

        return results

    @tool("Download images")
    def download_images(results, folder='images'):
        """
        Download images from the given list of results and save them to the specified folder.
        
        Args:
            results (list): A list of dictionaries containing the title and URL of each image.
            folder (str): The folder where the images will be saved.
        
        Returns:
            list: A list of paths where the images are saved.
        """
        if not os.path.exists(folder):
            os.makedirs(folder)

        image_paths = []
        for i, result in enumerate(results, 1):
            try:
                img_response = requests.get(result['url'])
                img = Image.open(BytesIO(img_response.content))
                img_path = os.path.join(folder, f"image_{i}.jpg")
                img.save(img_path)
                image_paths.append(img_path)
                print(f"Downloaded {result['title']} to {img_path}")
            except Exception as e:
                print(f"Failed to download {result['title']}: {e}")

        return image_paths