rastof9 commited on
Commit
15da11b
·
verified ·
1 Parent(s): 07685f0

Create services/scraper.py

Browse files
Files changed (1) hide show
  1. app/services/scraper.py +38 -0
app/services/scraper.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from selenium import webdriver
2
+ from selenium.webdriver.common.by import By
3
+ from selenium.webdriver.chrome.service import Service
4
+ from webdriver_manager.chrome import ChromeDriverManager
5
+ import time
6
+
7
+ class FacebookScraper:
8
+ def __init__(self):
9
+ self.driver = self._setup_driver()
10
+
11
+ def _setup_driver(self):
12
+ options = webdriver.ChromeOptions()
13
+ options.add_argument("--headless")
14
+ options.add_argument("--no-sandbox")
15
+ options.add_argument("--disable-dev-shm-usage")
16
+ return webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
17
+
18
+ def scrape_ads(self, search_query, num_scrolls=3):
19
+ url = f"https://www.facebook.com/ads/library/?active_status=all&ad_type=all&country=ALL&q={search_query}&search_type=keyword"
20
+ self.driver.get(url)
21
+ time.sleep(5) # Wait for page to load
22
+
23
+ ads = []
24
+ for _ in range(num_scrolls):
25
+ self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
26
+ time.sleep(3)
27
+
28
+ ad_elements = self.driver.find_elements(By.CSS_SELECTOR, "div.x1yztbdb")
29
+ for ad in ad_elements:
30
+ try:
31
+ ads.append(ad.text)
32
+ except Exception as e:
33
+ print("Error extracting ad:", e)
34
+
35
+ return ads
36
+
37
+ def close(self):
38
+ self.driver.quit()