mumer119131 commited on
Commit
b8f895a
·
1 Parent(s): 4c8c7a5

Update scraper/utils/WallmartScraper.py

Browse files
Files changed (1) hide show
  1. scraper/utils/WallmartScraper.py +15 -5
scraper/utils/WallmartScraper.py CHANGED
@@ -5,7 +5,7 @@ import csv
5
  import json
6
  import time
7
  from .DatabaseDataSaver import save_product
8
-
9
  class WallmartScraper:
10
  def __init__(self):
11
  self.ac = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9"
@@ -14,6 +14,20 @@ class WallmartScraper:
14
  self.categories = [
15
  'health', 'beauty', 'personal care'
16
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  def get_product_detail(self, url):
18
  response = self.session.get(
19
  url, headers=self.headers)
@@ -124,7 +138,3 @@ class WallmartScraper:
124
  return True
125
 
126
 
127
- if __name__ == "__main__":
128
- print('Starting...')
129
- scraper = HebScraper()
130
- response = scraper.run()
 
5
  import json
6
  import time
7
  from .DatabaseDataSaver import save_product
8
+ from undetected_chromedriver import Chrome, ChromeOptions
9
  class WallmartScraper:
10
  def __init__(self):
11
  self.ac = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9"
 
14
  self.categories = [
15
  'health', 'beauty', 'personal care'
16
  ]
17
+ self.generate_session()
18
+
19
+ def generate_session(self):
20
+ options = ChromeOptions()
21
+ options.add_argument("--headless")
22
+ options.add_argument("--disable-gpu")
23
+ options.add_argument("--no-sandbox")
24
+
25
+ driver = Chrome(options=options)
26
+ driver.get("https://www.walmart.com/")
27
+ print(driver.get_cookies())
28
+ cookies = driver.get_cookies()
29
+ [self.session.cookies.set(cookie['name'], cookie['value']) for cookie in cookies]
30
+
31
  def get_product_detail(self, url):
32
  response = self.session.get(
33
  url, headers=self.headers)
 
138
  return True
139
 
140