Spaces:
Runtime error
Runtime error
Commit ·
b8f895a
1
Parent(s): 4c8c7a5
Update scraper/utils/WallmartScraper.py
Browse files
scraper/utils/WallmartScraper.py
CHANGED
|
@@ -5,7 +5,7 @@ import csv
|
|
| 5 |
import json
|
| 6 |
import time
|
| 7 |
from .DatabaseDataSaver import save_product
|
| 8 |
-
|
| 9 |
class WallmartScraper:
|
| 10 |
def __init__(self):
|
| 11 |
self.ac = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9"
|
|
@@ -14,6 +14,20 @@ class WallmartScraper:
|
|
| 14 |
self.categories = [
|
| 15 |
'health', 'beauty', 'personal care'
|
| 16 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
def get_product_detail(self, url):
|
| 18 |
response = self.session.get(
|
| 19 |
url, headers=self.headers)
|
|
@@ -124,7 +138,3 @@ class WallmartScraper:
|
|
| 124 |
return True
|
| 125 |
|
| 126 |
|
| 127 |
-
if __name__ == "__main__":
|
| 128 |
-
print('Starting...')
|
| 129 |
-
scraper = HebScraper()
|
| 130 |
-
response = scraper.run()
|
|
|
|
| 5 |
import json
|
| 6 |
import time
|
| 7 |
from .DatabaseDataSaver import save_product
|
| 8 |
+
from undetected_chromedriver import Chrome, ChromeOptions
|
| 9 |
class WallmartScraper:
|
| 10 |
def __init__(self):
|
| 11 |
self.ac = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9"
|
|
|
|
| 14 |
self.categories = [
|
| 15 |
'health', 'beauty', 'personal care'
|
| 16 |
]
|
| 17 |
+
self.generate_session()
|
| 18 |
+
|
| 19 |
+
def generate_session(self):
|
| 20 |
+
options = ChromeOptions()
|
| 21 |
+
options.add_argument("--headless")
|
| 22 |
+
options.add_argument("--disable-gpu")
|
| 23 |
+
options.add_argument("--no-sandbox")
|
| 24 |
+
|
| 25 |
+
driver = Chrome(options=options)
|
| 26 |
+
driver.get("https://www.walmart.com/")
|
| 27 |
+
print(driver.get_cookies())
|
| 28 |
+
cookies = driver.get_cookies()
|
| 29 |
+
[self.session.cookies.set(cookie['name'], cookie['value']) for cookie in cookies]
|
| 30 |
+
|
| 31 |
def get_product_detail(self, url):
|
| 32 |
response = self.session.get(
|
| 33 |
url, headers=self.headers)
|
|
|
|
| 138 |
return True
|
| 139 |
|
| 140 |
|
|
|
|
|
|
|
|
|
|
|
|