""" Legacy scraper implementation for performance comparison """ import chromedriver_autoinstaller from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.by import By from selenium.common.exceptions import StaleElementReferenceException import json import time chromedriver_autoinstaller.install() def create_legacy_options(): """Create legacy Chrome options""" options = Options() options.binary_location = "/usr/bin/chromium-browser" options.add_argument("--headless") options.add_argument("--no-sandbox") options.add_argument("--disable-dev-shm-usage") return options def safe_get_attribute(elements, attr): """Legacy safe attribute extraction""" values = [] for e in elements: try: value = e.get_attribute(attr) if value: values.append(value) except StaleElementReferenceException: continue # Skip stale elements return values def legacy_scraper(link: str): """Legacy scraper implementation - creates new driver each time""" options = create_legacy_options() driver = webdriver.Chrome(options=options) try: driver.get(link) time.sleep(2) # Fixed delay for dynamic content try: page_text = driver.find_element(By.TAG_NAME, "body").text except StaleElementReferenceException: page_text = "" scripts = driver.find_elements(By.TAG_NAME, "script") links = driver.find_elements(By.TAG_NAME, "link") script_sources = safe_get_attribute(scripts, "src") link_sources = safe_get_attribute(links, "href") return { "page_text": page_text, "script_sources": script_sources, "link_sources": link_sources, } finally: driver.quit()