Spaces:
Runtime error
Runtime error
Commit ·
4c8c7a5
1
Parent(s): a6b5498
Update scraper/utils/HebScraper.py
Browse files- scraper/utils/HebScraper.py +14 -2
scraper/utils/HebScraper.py
CHANGED
|
@@ -207,7 +207,13 @@ class HebScraper:
|
|
| 207 |
|
| 208 |
def search_category_pages(self, category):
|
| 209 |
url = f"https://www.heb.com:443/search/?q={category}"
|
| 210 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
soup = BeautifulSoup(response.text, 'html.parser')
|
| 212 |
total_pages = soup.find_all('a', {'data-qe-id': 'paginationListNum'})[-1].text
|
| 213 |
print(total_pages)
|
|
@@ -229,7 +235,13 @@ class HebScraper:
|
|
| 229 |
url = f"{self.base_url}{product['product']['productPageURL']}"
|
| 230 |
urls.append(url)
|
| 231 |
print(urls)
|
| 232 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 233 |
|
| 234 |
def get_all_products_from_category_page(self, urls):
|
| 235 |
for url in urls:
|
|
|
|
| 207 |
|
| 208 |
def search_category_pages(self, category):
|
| 209 |
url = f"https://www.heb.com:443/search/?q={category}"
|
| 210 |
+
try:
|
| 211 |
+
response = self.session.get(url)
|
| 212 |
+
except:
|
| 213 |
+
try:
|
| 214 |
+
response = self.session.get(url)
|
| 215 |
+
except:
|
| 216 |
+
return 0
|
| 217 |
soup = BeautifulSoup(response.text, 'html.parser')
|
| 218 |
total_pages = soup.find_all('a', {'data-qe-id': 'paginationListNum'})[-1].text
|
| 219 |
print(total_pages)
|
|
|
|
| 235 |
url = f"{self.base_url}{product['product']['productPageURL']}"
|
| 236 |
urls.append(url)
|
| 237 |
print(urls)
|
| 238 |
+
try:
|
| 239 |
+
self.get_all_products_from_category_page(urls)
|
| 240 |
+
except:
|
| 241 |
+
try:
|
| 242 |
+
self.get_all_products_from_category_page(urls)
|
| 243 |
+
except:
|
| 244 |
+
print('Error')
|
| 245 |
|
| 246 |
def get_all_products_from_category_page(self, urls):
|
| 247 |
for url in urls:
|