Spaces:
Runtime error
Runtime error
Commit ·
1dd34a7
1
Parent(s): f834bba
Update scraper/utils/HebScraper.py
Browse files
scraper/utils/HebScraper.py
CHANGED
|
@@ -226,10 +226,14 @@ class HebScraper:
|
|
| 226 |
self.query['variables']['params']['query'] = category
|
| 227 |
# burp0_url = f"https://www.heb.com:443/search/?q={category}&pageNumber={page}"
|
| 228 |
url = "https://www.heb.com:443/graphql"
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 233 |
urls = []
|
| 234 |
for product in products:
|
| 235 |
url = f"{self.base_url}{product['product']['productPageURL']}"
|
|
@@ -275,6 +279,7 @@ class HebScraper:
|
|
| 275 |
for category in self.categories:
|
| 276 |
pages = self.search_category_pages(category)
|
| 277 |
self.get_urls_of_category_from_page(category, pages)
|
|
|
|
| 278 |
print(f'Finished {category}')
|
| 279 |
return True
|
| 280 |
if __name__ == "__main__":
|
|
|
|
| 226 |
self.query['variables']['params']['query'] = category
|
| 227 |
# burp0_url = f"https://www.heb.com:443/search/?q={category}&pageNumber={page}"
|
| 228 |
url = "https://www.heb.com:443/graphql"
|
| 229 |
+
try:
|
| 230 |
+
response = self.session.post(url, json=self.query)
|
| 231 |
+
products = response.json()['data']['productSearchV2']['records']
|
| 232 |
+
except:
|
| 233 |
+
self.generate_session()
|
| 234 |
+
response = self.session.post(url, json=self.query)
|
| 235 |
+
products = response.json()['data']['productSearchV2']['records']
|
| 236 |
+
|
| 237 |
urls = []
|
| 238 |
for product in products:
|
| 239 |
url = f"{self.base_url}{product['product']['productPageURL']}"
|
|
|
|
| 279 |
for category in self.categories:
|
| 280 |
pages = self.search_category_pages(category)
|
| 281 |
self.get_urls_of_category_from_page(category, pages)
|
| 282 |
+
self.generate_session()
|
| 283 |
print(f'Finished {category}')
|
| 284 |
return True
|
| 285 |
if __name__ == "__main__":
|