Spaces:

SwatGarg
/

Content_Creation

Build error

App Files Files Community

SwatGarg commited on Aug 2, 2024

Commit

d43bcfa

verified ·

1 Parent(s): 05de743

Create web_search_tools.py

Browse files

Files changed (1) hide show

tools/web_search_tools.py +50 -0

tools/web_search_tools.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import requests
+from bs4 import BeautifulSoup
+import os
+class WebSearchTools:
+    @staticmethod
+    def search_internet(query):
+        search_url = f"https://www.google.com/search?q={query}&tbm=nws"
+        headers = {
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
+        }
+        response = requests.get(search_url, headers=headers)
+        soup = BeautifulSoup(response.text, "html.parser")
+        results = []
+        for item in soup.find_all('div', attrs={'class': 'BVG0Nb'}):
+            title = item.find('div', attrs={'class': 'BNeawe vvjwJb AP7Wnd'}).get_text()
+            link = item.find('a')['href']
+            snippet = item.find('div', attrs={'class': 'BNeawe s3v9rd AP7Wnd'}).get_text()
+            results.append({
+                'title': title,
+                'link': link,
+                'snippet': snippet
+            })
+        return results
+    @staticmethod
+    def scrape_and_summarize_website(url):
+        response = requests.get(url)
+        soup = BeautifulSoup(response.text, 'html.parser')
+        # Extract and summarize text
+        paragraphs = soup.find_all('p')
+        text = ' '.join([para.get_text() for para in paragraphs])
+        # Download images
+        img_tags = soup.find_all('img')
+        image_urls = [img['src'] for img in img_tags if 'src' in img.attrs]
+        image_folder = "downloaded_images"
+        os.makedirs(image_folder, exist_ok=True)
+        for i, img_url in enumerate(image_urls):
+            img_data = requests.get(img_url).content
+            img_name = os.path.join(image_folder, f'image_{i+1}.jpg')
+            with open(img_name, 'wb') as img_file:
+                img_file.write(img_data)
+        return text
+# Example usage
+query = "latest news"
+results = WebSearchTools.search_internet(query)
+print(results)
+url = "https://example.com"  # Replace with an actual URL
+summary = WebSearchTools.scrape_and_summarize_website(url)
+print(summary)