mumer119131 commited on
Commit
1dd34a7
·
1 Parent(s): f834bba

Update scraper/utils/HebScraper.py

Browse files
Files changed (1) hide show
  1. scraper/utils/HebScraper.py +9 -4
scraper/utils/HebScraper.py CHANGED
@@ -226,10 +226,14 @@ class HebScraper:
226
  self.query['variables']['params']['query'] = category
227
  # burp0_url = f"https://www.heb.com:443/search/?q={category}&pageNumber={page}"
228
  url = "https://www.heb.com:443/graphql"
229
- response = self.session.post(url, json=self.query)
230
- with open('heb.json', 'w+', encoding='utf-8') as file:
231
- file.write(response.text)
232
- products = response.json()['data']['productSearchV2']['records']
 
 
 
 
233
  urls = []
234
  for product in products:
235
  url = f"{self.base_url}{product['product']['productPageURL']}"
@@ -275,6 +279,7 @@ class HebScraper:
275
  for category in self.categories:
276
  pages = self.search_category_pages(category)
277
  self.get_urls_of_category_from_page(category, pages)
 
278
  print(f'Finished {category}')
279
  return True
280
  if __name__ == "__main__":
 
226
  self.query['variables']['params']['query'] = category
227
  # burp0_url = f"https://www.heb.com:443/search/?q={category}&pageNumber={page}"
228
  url = "https://www.heb.com:443/graphql"
229
+ try:
230
+ response = self.session.post(url, json=self.query)
231
+ products = response.json()['data']['productSearchV2']['records']
232
+ except:
233
+ self.generate_session()
234
+ response = self.session.post(url, json=self.query)
235
+ products = response.json()['data']['productSearchV2']['records']
236
+
237
  urls = []
238
  for product in products:
239
  url = f"{self.base_url}{product['product']['productPageURL']}"
 
279
  for category in self.categories:
280
  pages = self.search_category_pages(category)
281
  self.get_urls_of_category_from_page(category, pages)
282
+ self.generate_session()
283
  print(f'Finished {category}')
284
  return True
285
  if __name__ == "__main__":