Sentiment / services /_driver.py
NzTama's picture
Initial clean deploy: Sentiment Analysis
fa8ff66
"""
_driver.py – Shared Selenium Chrome driver factory.
All scrapers import _create_driver() from here so that Docker env-vars
(CHROME_BIN, CHROMEDRIVER_PATH) are respected in one place.
"""
from __future__ import annotations
import os
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
def _create_driver(mobile: bool = False) -> webdriver.Chrome:
"""
Return a headless Chrome/Chromium instance tuned for Docker.
Picks up:
CHROME_BIN – path to chromium binary (default: /usr/bin/chromium)
CHROMEDRIVER_PATH – path to chromedriver (default: /usr/bin/chromedriver)
"""
chrome_bin = os.environ.get("CHROME_BIN", "/usr/bin/chromium")
driver_bin = os.environ.get("CHROMEDRIVER_PATH", "/usr/bin/chromedriver")
options = webdriver.ChromeOptions()
options.binary_location = chrome_bin
# ── Headless & sandbox flags ──────────────────────────────────────────────
options.add_argument("--headless=new")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--disable-gpu")
options.add_argument("--disable-software-rasterizer")
options.add_argument("--disable-extensions")
options.add_argument("--disable-infobars")
options.add_argument("--disable-notifications")
options.add_argument("--disable-popup-blocking")
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("--ignore-certificate-errors")
options.add_argument("--window-size=1920,1080")
options.add_argument("--remote-debugging-port=0") # avoid port conflicts
# ── User-Agent ────────────────────────────────────────────────────────────
if mobile:
options.add_argument(
"--user-agent=Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) "
"AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1"
)
else:
options.add_argument(
"--user-agent=Mozilla/5.0 (X11; Linux x86_64) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
)
options.add_experimental_option("excludeSwitches", ["enable-automation", "enable-logging"])
options.add_experimental_option("useAutomationExtension", False)
service = Service(executable_path=driver_bin)
driver = webdriver.Chrome(service=service, options=options)
# Hide webdriver fingerprint
driver.execute_cdp_cmd(
"Page.addScriptToEvaluateOnNewDocument",
{"source": "Object.defineProperty(navigator,'webdriver',{get:()=>undefined})"},
)
return driver