|
|
from selenium import webdriver |
|
|
from selenium.webdriver.common.by import By |
|
|
from selenium.webdriver.chrome.service import Service |
|
|
from webdriver_manager.chrome import ChromeDriverManager |
|
|
import time |
|
|
from selenium.common.exceptions import TimeoutException, WebDriverException |
|
|
from contextlib import contextmanager |
|
|
|
|
|
class FacebookScraper: |
|
|
def __init__(self): |
|
|
self.driver = None |
|
|
|
|
|
def _setup_driver(self): |
|
|
options = webdriver.ChromeOptions() |
|
|
options.add_argument("--headless") |
|
|
options.add_argument("--no-sandbox") |
|
|
options.add_argument("--disable-dev-shm-usage") |
|
|
return webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options) |
|
|
|
|
|
@contextmanager |
|
|
def _get_driver(self): |
|
|
try: |
|
|
self.driver = self._setup_driver() |
|
|
yield self.driver |
|
|
finally: |
|
|
if self.driver: |
|
|
self.driver.quit() |
|
|
|
|
|
def scrape_ads(self, search_query, num_scrolls=3): |
|
|
with self._get_driver() as driver: |
|
|
try: |
|
|
url = f"https://www.facebook.com/ads/library/?active_status=all&ad_type=all&country=ALL&q={search_query}&search_type=keyword" |
|
|
driver.get(url) |
|
|
driver.implicitly_wait(5) |
|
|
|
|
|
ads = [] |
|
|
for _ in range(num_scrolls): |
|
|
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") |
|
|
time.sleep(3) |
|
|
|
|
|
ad_elements = driver.find_elements(By.CSS_SELECTOR, "div.x1yztbdb") |
|
|
return [ad.text for ad in ad_elements if ad.text] |
|
|
|
|
|
except (TimeoutException, WebDriverException) as e: |
|
|
print(f"Error during scraping: {e}") |
|
|
return [] |