import time import os from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.service import Service from webdriver_manager.chrome import ChromeDriverManager from bs4 import BeautifulSoup class BrowserScraper: def __init__(self): self.options = Options() self.options.add_argument("--headless") self.options.add_argument("--no-sandbox") self.options.add_argument("--disable-dev-shm-usage") chrome_bin = os.getenv("CHROME_BIN") if chrome_bin: self.options.binary_location = chrome_bin def fetch_page_metadata(self, url): """ Uses explicit Browser (Selenium) to scrape the page title and summary. This fulfills the requirement of 'real browser operation'. """ driver = None try: print(f"🌍 Browser Navigating to: {url}") service = Service(ChromeDriverManager().install()) driver = webdriver.Chrome(service=service, options=self.options) driver.get(url) time.sleep(2) # Wait for JS to load title = driver.title content = driver.find_element("tag name", "body").text[:500] # Get first 500 chars for summary return { "title": title, "summary": content, "status": "success" } except Exception as e: return { "title": "Error", "summary": str(e), "status": "failed" } finally: if driver: driver.quit()