attendantelectro commited on
Commit
882b44e
·
verified ·
1 Parent(s): 4ec624a

Update scraper.py

Browse files
Files changed (1) hide show
  1. scraper.py +8 -10
scraper.py CHANGED
@@ -1,18 +1,16 @@
1
  from selenium import webdriver
2
- from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
3
  import zipfile
4
  import os
5
 
6
  def scrape_pages(base_url, start_page, end_page):
7
- options = webdriver.ChromeOptions()
8
- options.add_argument('--headless')
9
- options.add_argument('--no-sandbox')
10
- options.add_argument('--disable-dev-shm-usage')
11
-
12
- driver = webdriver.Remote(
13
- command_executor='http://localhost:4444/wd/hub',
14
- options=options
15
- )
16
 
17
  if not os.path.exists('pages'):
18
  os.makedirs('pages')
 
1
  from selenium import webdriver
2
+ from selenium.webdriver.chrome.options import Options
3
  import zipfile
4
  import os
5
 
6
  def scrape_pages(base_url, start_page, end_page):
7
+ chrome_options = Options()
8
+ chrome_options.add_argument("--headless")
9
+ chrome_options.add_argument("--no-sandbox")
10
+ chrome_options.add_argument("--disable-dev-shm-usage")
11
+
12
+ # استفاده مستقیم از ChromeDriver داخل کانتینر
13
+ driver = webdriver.Chrome(options=chrome_options)
 
 
14
 
15
  if not os.path.exists('pages'):
16
  os.makedirs('pages')