Create services/scraper.py
Browse files- app/services/scraper.py +38 -0
app/services/scraper.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from selenium import webdriver
|
| 2 |
+
from selenium.webdriver.common.by import By
|
| 3 |
+
from selenium.webdriver.chrome.service import Service
|
| 4 |
+
from webdriver_manager.chrome import ChromeDriverManager
|
| 5 |
+
import time
|
| 6 |
+
|
| 7 |
+
class FacebookScraper:
|
| 8 |
+
def __init__(self):
|
| 9 |
+
self.driver = self._setup_driver()
|
| 10 |
+
|
| 11 |
+
def _setup_driver(self):
|
| 12 |
+
options = webdriver.ChromeOptions()
|
| 13 |
+
options.add_argument("--headless")
|
| 14 |
+
options.add_argument("--no-sandbox")
|
| 15 |
+
options.add_argument("--disable-dev-shm-usage")
|
| 16 |
+
return webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
|
| 17 |
+
|
| 18 |
+
def scrape_ads(self, search_query, num_scrolls=3):
|
| 19 |
+
url = f"https://www.facebook.com/ads/library/?active_status=all&ad_type=all&country=ALL&q={search_query}&search_type=keyword"
|
| 20 |
+
self.driver.get(url)
|
| 21 |
+
time.sleep(5) # Wait for page to load
|
| 22 |
+
|
| 23 |
+
ads = []
|
| 24 |
+
for _ in range(num_scrolls):
|
| 25 |
+
self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
|
| 26 |
+
time.sleep(3)
|
| 27 |
+
|
| 28 |
+
ad_elements = self.driver.find_elements(By.CSS_SELECTOR, "div.x1yztbdb")
|
| 29 |
+
for ad in ad_elements:
|
| 30 |
+
try:
|
| 31 |
+
ads.append(ad.text)
|
| 32 |
+
except Exception as e:
|
| 33 |
+
print("Error extracting ad:", e)
|
| 34 |
+
|
| 35 |
+
return ads
|
| 36 |
+
|
| 37 |
+
def close(self):
|
| 38 |
+
self.driver.quit()
|