hatamo commited on
Commit
69a7c18
·
1 Parent(s): 83dd3e1

Updated selenium dependencies

Browse files
Dockerfile CHANGED
@@ -2,6 +2,12 @@ FROM python:3.10
2
 
3
  WORKDIR /app
4
 
 
 
 
 
 
 
5
  # Copy requirements first for better caching
6
  COPY requirements.txt .
7
 
 
2
 
3
  WORKDIR /app
4
 
5
+ # Install system dependencies for Chrome/Chromium
6
+ RUN apt-get update && apt-get install -y \
7
+ chromium-browser \
8
+ chromium-chromedriver \
9
+ && rm -rf /var/lib/apt/lists/*
10
+
11
  # Copy requirements first for better caching
12
  COPY requirements.txt .
13
 
code/web_scraper_allegro.py CHANGED
@@ -1,8 +1,11 @@
1
  # scrape_allegro_offer.py
2
  import undetected_chromedriver as uc
3
  from selenium.webdriver.common.by import By
 
 
4
  import time
5
  import requests
 
6
 
7
  def sanitize_folder_name(text): # helper function
8
  polish_chars = {
@@ -26,7 +29,24 @@ def scrape_allegro_offer(url: str):
26
  """Zwraca dane aukcji bez zapisywania na dysk"""
27
  options = uc.ChromeOptions()
28
  options.add_argument("--window-position=-3000,0")
29
- driver = uc.Chrome(use_subprocess=True, options=options)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  try:
32
  print(f"🔍 Allegro: {url}")
 
1
  # scrape_allegro_offer.py
2
  import undetected_chromedriver as uc
3
  from selenium.webdriver.common.by import By
4
+ from webdriver_manager.chrome import ChromeDriverManager
5
+ from selenium.webdriver.chrome.service import Service
6
  import time
7
  import requests
8
+ import os
9
 
10
  def sanitize_folder_name(text): # helper function
11
  polish_chars = {
 
29
  """Zwraca dane aukcji bez zapisywania na dysk"""
30
  options = uc.ChromeOptions()
31
  options.add_argument("--window-position=-3000,0")
32
+ options.add_argument("--headless")
33
+ options.add_argument("--no-sandbox")
34
+ options.add_argument("--disable-dev-shm-usage")
35
+
36
+ # Ustawienie binarki Chrome'a - najpierw szukaj systemowego chromium
37
+ if os.path.exists('/usr/bin/chromium-browser'):
38
+ options.binary_location = '/usr/bin/chromium-browser'
39
+
40
+ try:
41
+ driver = uc.Chrome(
42
+ service=Service(ChromeDriverManager().install()),
43
+ options=options,
44
+ use_subprocess=True
45
+ )
46
+ except Exception as e:
47
+ print(f"Error initializing Chrome with webdriver-manager: {e}")
48
+ # Fallback: próbuj bez service
49
+ driver = uc.Chrome(options=options, use_subprocess=True)
50
 
51
  try:
52
  print(f"🔍 Allegro: {url}")
code/web_scraper_ebay.py CHANGED
@@ -1,15 +1,35 @@
1
  # scrape_ebay_offer.py
2
  import undetected_chromedriver as uc
3
  from selenium.webdriver.common.by import By
 
 
4
  import time
5
  import requests
 
6
 
7
  def scrape_ebay_offer(url: str):
8
  """Zwraca dane aukcji bez zapisywania na dysk"""
9
  print(f"🔍 eBay: {url}")
10
  options = uc.ChromeOptions()
11
  options.add_argument("--window-position=-3000,0")
12
- driver = uc.Chrome(use_subprocess=True, options=options)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  try:
15
  driver.get(url)
 
1
  # scrape_ebay_offer.py
2
  import undetected_chromedriver as uc
3
  from selenium.webdriver.common.by import By
4
+ from webdriver_manager.chrome import ChromeDriverManager
5
+ from selenium.webdriver.chrome.service import Service
6
  import time
7
  import requests
8
+ import os
9
 
10
  def scrape_ebay_offer(url: str):
11
  """Zwraca dane aukcji bez zapisywania na dysk"""
12
  print(f"🔍 eBay: {url}")
13
  options = uc.ChromeOptions()
14
  options.add_argument("--window-position=-3000,0")
15
+ options.add_argument("--headless")
16
+ options.add_argument("--no-sandbox")
17
+ options.add_argument("--disable-dev-shm-usage")
18
+
19
+ # Ustawienie binarki Chrome'a - najpierw szukaj systemowego chromium
20
+ if os.path.exists('/usr/bin/chromium-browser'):
21
+ options.binary_location = '/usr/bin/chromium-browser'
22
+
23
+ try:
24
+ driver = uc.Chrome(
25
+ service=Service(ChromeDriverManager().install()),
26
+ options=options,
27
+ use_subprocess=True
28
+ )
29
+ except Exception as e:
30
+ print(f"Error initializing Chrome with webdriver-manager: {e}")
31
+ # Fallback: próbuj bez service
32
+ driver = uc.Chrome(options=options, use_subprocess=True)
33
 
34
  try:
35
  driver.get(url)
requirements.txt CHANGED
@@ -9,6 +9,8 @@ fastapi
9
  uvicorn
10
  python-multipart
11
  undetected_chromedriver
 
12
  bs4
13
  requests
14
- flask
 
 
9
  uvicorn
10
  python-multipart
11
  undetected_chromedriver
12
+ webdriver-manager
13
  bs4
14
  requests
15
+ flask
16
+ selenium>=4.0