Spaces:
Running
Running
| # Import necessary libraries | |
| import requests | |
| from bs4 import BeautifulSoup | |
| import pandas as pd | |
| class WebScraper: | |
| def __init__(self, urls): | |
| self.urls = urls | |
| self.data = pd.DataFrame() | |
| def scrape(self): | |
| for url in self.urls: | |
| response = requests.get(url) | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| text = ' '.join(map(lambda p: p.text, soup.find_all('p'))) | |
| self.data = self.data.append({'url': url, 'content': text}, ignore_index=True) | |
| def get_data(self): | |
| return self.data |