File size: 1,825 Bytes
565a379
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import logging

logger = logging.getLogger(__name__)

class SeleniumScraper:
    """
    Fallback scraper using Selenium for sites where Playwright fails.
    """

    def __init__(self, headless: bool = True):
        self.headless = headless
        self.driver = None

    def _setup_driver(self):
        """
        Initialize Chrome Driver.
        """
        try:
            options = Options()
            if self.headless:
                options.add_argument('--headless')
            options.add_argument('--no-sandbox')
            options.add_argument('--disable-dev-shm-usage')
            
            # Auto-install driver
            service = Service(ChromeDriverManager().install())
            self.driver = webdriver.Chrome(service=service, options=options)
        except Exception as e:
            logger.error(f"Failed to setup Selenium driver: {e}")
            raise e

    def scrape(self, url: str) -> str:
        """
        Scrape a URL and return page source.
        """
        if not self.driver:
            self._setup_driver()

        try:
            logger.info(f"Selenium scraping: {url}")
            self.driver.get(url)
            # Add explicit waits if needed
            return self.driver.page_source
        except Exception as e:
            logger.error(f"Selenium scrape failed: {e}")
            return ""
        finally:
            # For simplistic usage, we might close after each scrape or keep open.
            # Here we close to save resources as it's a fallback.
            if self.driver:
                self.driver.quit()
                self.driver = None