SwatGarg commited on
Commit
d43bcfa
·
verified ·
1 Parent(s): 05de743

Create web_search_tools.py

Browse files
Files changed (1) hide show
  1. tools/web_search_tools.py +50 -0
tools/web_search_tools.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+ import os
4
+
5
+ class WebSearchTools:
6
+ @staticmethod
7
+ def search_internet(query):
8
+ search_url = f"https://www.google.com/search?q={query}&tbm=nws"
9
+ headers = {
10
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
11
+ }
12
+ response = requests.get(search_url, headers=headers)
13
+ soup = BeautifulSoup(response.text, "html.parser")
14
+ results = []
15
+ for item in soup.find_all('div', attrs={'class': 'BVG0Nb'}):
16
+ title = item.find('div', attrs={'class': 'BNeawe vvjwJb AP7Wnd'}).get_text()
17
+ link = item.find('a')['href']
18
+ snippet = item.find('div', attrs={'class': 'BNeawe s3v9rd AP7Wnd'}).get_text()
19
+ results.append({
20
+ 'title': title,
21
+ 'link': link,
22
+ 'snippet': snippet
23
+ })
24
+ return results
25
+ @staticmethod
26
+ def scrape_and_summarize_website(url):
27
+ response = requests.get(url)
28
+ soup = BeautifulSoup(response.text, 'html.parser')
29
+ # Extract and summarize text
30
+ paragraphs = soup.find_all('p')
31
+ text = ' '.join([para.get_text() for para in paragraphs])
32
+ # Download images
33
+ img_tags = soup.find_all('img')
34
+ image_urls = [img['src'] for img in img_tags if 'src' in img.attrs]
35
+ image_folder = "downloaded_images"
36
+ os.makedirs(image_folder, exist_ok=True)
37
+ for i, img_url in enumerate(image_urls):
38
+ img_data = requests.get(img_url).content
39
+ img_name = os.path.join(image_folder, f'image_{i+1}.jpg')
40
+ with open(img_name, 'wb') as img_file:
41
+ img_file.write(img_data)
42
+ return text
43
+
44
+ # Example usage
45
+ query = "latest news"
46
+ results = WebSearchTools.search_internet(query)
47
+ print(results)
48
+ url = "https://example.com" # Replace with an actual URL
49
+ summary = WebSearchTools.scrape_and_summarize_website(url)
50
+ print(summary)