| import os |
| import time |
| import json |
| import csv |
| from datetime import datetime |
| import undetected_chromedriver as uc |
| from selenium.webdriver.common.by import By |
| from selenium.webdriver.support.ui import WebDriverWait |
| from selenium.webdriver.support import expected_conditions as EC |
|
|
| |
| FB_USERNAME = "fatihr252@gmail.com" |
| FB_PASSWORD = "Bambank1" |
| COOKIES_FILE = "fb_cookies.json" |
|
|
| |
| GROUP_INPUTS = [ |
| "https://web.facebook.com/groups/183039928416039?locale=id_ID", |
| "https://web.facebook.com/groups/teraswarga?locale=id_ID", |
| "https://web.facebook.com/groups/967901979894945?locale=id_ID" |
| ] |
|
|
| |
| OUTPUT_CSV = f"facebook_groups_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv" |
| OUTPUT_JSON = f"facebook_groups_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json" |
|
|
| |
| options = uc.ChromeOptions() |
| options.add_argument("--disable-notifications") |
| options.add_argument("--disable-infobars") |
| options.add_argument("--start-maximized") |
|
|
| driver = uc.Chrome(options=options, use_subprocess=True) |
| wait = WebDriverWait(driver, 15) |
|
|
|
|
| |
| def save_cookies(driver, path): |
| with open(path, "w") as file: |
| json.dump(driver.get_cookies(), file) |
|
|
|
|
| def load_cookies(driver, path): |
| with open(path, "r") as file: |
| cookies = json.load(file) |
| for cookie in cookies: |
| driver.add_cookie(cookie) |
|
|
| def fb_login(force=False): |
| """ |
| force=True akan memaksa login pakai username/password |
| walaupun ada cookies. |
| """ |
| driver.get("https://www.facebook.com/") |
| time.sleep(3) |
|
|
| if not force and os.path.exists(COOKIES_FILE): |
| try: |
| load_cookies(driver, COOKIES_FILE) |
| driver.refresh() |
| time.sleep(5) |
| if "login" not in driver.current_url: |
| print("✅ Login pakai cookies berhasil") |
| |
| try: |
| wait.until(EC.presence_of_element_located((By.XPATH, '//input[@placeholder="Cari di Facebook"]'))) |
| print("🔍 Search bar tersedia, siap mencari grup") |
| except: |
| print("⚠️ Search bar belum muncul, tetap lanjutkan") |
| return |
| except Exception as e: |
| print("⚠️ Cookies gagal dipakai:", e) |
|
|
| print("🔑 Login manual pakai username/password...") |
|
|
| |
| try: |
| |
| email_input = wait.until(EC.presence_of_element_located((By.ID, "email"))) |
| pass_input = driver.find_element(By.ID, "pass") |
| email_input.clear() |
| email_input.send_keys(FB_USERNAME) |
| pass_input.clear() |
| pass_input.send_keys(FB_PASSWORD) |
| driver.find_element(By.NAME, "login").click() |
| except Exception: |
| try: |
| |
| email_input = wait.until(EC.presence_of_element_located((By.XPATH, '//input[@name="email" and @type="text"]'))) |
| pass_input = driver.find_element(By.XPATH, '//input[@name="pass" and @type="password"]') |
| email_input.clear() |
| email_input.send_keys(FB_USERNAME) |
| pass_input.clear() |
| pass_input.send_keys(FB_PASSWORD) |
| pass_input.submit() |
| except Exception: |
| try: |
| |
| email_input = wait.until(EC.presence_of_element_located((By.XPATH, '//input[@data-testid="royal-email"]'))) |
| pass_input = driver.find_element(By.XPATH, '//input[@data-testid="royal-pass"]') |
| email_input.clear() |
| email_input.send_keys(FB_USERNAME) |
| pass_input.clear() |
| pass_input.send_keys(FB_PASSWORD) |
| driver.find_element(By.NAME, "login").click() |
| except Exception as e: |
| raise Exception(f"❌ Tidak menemukan form login yang cocok: {e}") |
|
|
| time.sleep(5) |
| if "login" in driver.current_url: |
| raise Exception("❌ Login gagal! Cek username/password") |
|
|
| save_cookies(driver, COOKIES_FILE) |
| print("✅ Login sukses & cookies disimpan") |
|
|
| |
| try: |
| wait.until(EC.presence_of_element_located((By.XPATH, '//input[@placeholder="Cari di Facebook"]'))) |
| print("🔍 Search bar tersedia, siap mencari grup") |
| except: |
| print("⚠️ Search bar belum muncul, coba manual redirect ke beranda") |
| driver.get("https://www.facebook.com/") |
| wait.until(EC.presence_of_element_located((By.XPATH, '//input[@placeholder="Cari di Facebook"]'))) |
|
|
|
|
| def ensure_logged_in(): |
| """Cek apakah user masih login, kalau muncul halaman login atau popup, login ulang.""" |
| try: |
| |
| if driver.current_url and "login" in driver.current_url: |
| print("⚠️ Redirect ke halaman login, mencoba login ulang...") |
| fb_login(force=True) |
| return |
|
|
| |
| try: |
| popup = driver.find_element(By.XPATH, '//div[contains(text(),"See more on Facebook")]') |
| if popup.is_displayed(): |
| print("⚠️ Popup login terdeteksi, login ulang...") |
| fb_login(force=True) |
| return |
| except: |
| pass |
|
|
| |
| try: |
| login_modal = driver.find_element(By.XPATH, '//input[@type="email" or @type="text"]') |
| if login_modal.is_displayed(): |
| print("⚠️ Form login modal terdeteksi, login ulang...") |
| fb_login(force=True) |
| return |
| except: |
| pass |
|
|
| except Exception as e: |
| print("⚠️ Gagal cek login:", e) |
|
|
| |
| def open_group(group_input): |
| """ |
| Bisa menerima nama grup ATAU link grup langsung. |
| """ |
| |
| if group_input.startswith("http"): |
| print(f"🔗 Buka langsung link grup: {group_input}") |
| driver.get(group_input) |
| time.sleep(5) |
|
|
| ensure_logged_in() |
| return group_input |
|
|
| |
| try: |
| search_box = wait.until( |
| EC.presence_of_element_located((By.XPATH, '//input[@placeholder="Cari di Facebook"]')) |
| ) |
| print(f"🔍 Mencari grup '{group_input}' via search...") |
| search_box.clear() |
| search_box.send_keys(group_input) |
| search_box.submit() |
| time.sleep(5) |
|
|
| |
| link = None |
| results = driver.find_elements(By.XPATH, f'//a[contains(text(),"{group_input}")]') |
| if results: |
| link = results[0].get_attribute("href") |
|
|
| if link: |
| print(f"✅ Grup ditemukan: {link}") |
| driver.get(link) |
| time.sleep(5) |
| return link |
| else: |
| print(f"❌ Grup '{group_input}' tidak ditemukan via search") |
| return None |
|
|
| except Exception as e: |
| print(f"⚠️ Search gagal untuk '{group_input}':", e) |
| return None |
|
|
| def scroll_to_bottom(driver, max_scrolls=10, pause_time=2): |
| last_height = driver.execute_script("return document.body.scrollHeight") |
| for i in range(max_scrolls): |
| driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") |
| time.sleep(pause_time) |
| new_height = driver.execute_script("return document.body.scrollHeight") |
| if new_height == last_height: |
| break |
| last_height = new_height |
|
|
| |
| def scrape_group(group_url, group_name, max_scrolls=3, max_posts=None): |
| print(f"📥 Scraping grup: {group_name} ({group_url})") |
| driver.get(group_url) |
| time.sleep(4) |
| ensure_logged_in() |
|
|
| posts = [] |
| last_height = driver.execute_script("return document.body.scrollHeight") |
|
|
| for scroll_round in range(max_scrolls): |
| driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") |
| time.sleep(4) |
| ensure_logged_in() |
|
|
| post_elements = driver.find_elements(By.XPATH, '//div[@role="article"]') |
| print(f"🔎 Ditemukan {len(post_elements)} postingan pada scroll {scroll_round+1}") |
|
|
| for idx, post in enumerate(post_elements): |
| if max_posts and len(posts) >= max_posts: |
| break |
|
|
| try: |
| driver.execute_script("arguments[0].scrollIntoView(true);", post) |
| time.sleep(1) |
|
|
| |
| article_ctx = None |
| try: |
| |
| article_ctx = post |
| except: |
| article_ctx = None |
|
|
| |
| permalink = None |
| post_context = post |
|
|
| try: |
| |
| link_el = post.find_element(By.XPATH, ".//a[contains(@href,'/posts/')]") |
| permalink = link_el.get_attribute("href").split("?")[0] |
| except: |
| try: |
| |
| link_el = post.find_element(By.XPATH, ".//a[contains(@href,'/permalink/')]") |
| permalink = link_el.get_attribute("href").split("?")[0] |
| except: |
| try: |
| |
| post_id = post.get_attribute("data-ft") |
| if post_id and "top_level_post_id" in post_id: |
| import json |
| d = json.loads(post_id) |
| pid = d.get("top_level_post_id") |
| if pid: |
| permalink = f"{group_url.rstrip('/').split('?')[0]}/posts/{pid}/" |
| except: |
| pass |
|
|
| if not permalink: |
| print("⚠️ Tidak ada permalink & tidak bisa generate. Tetap lanjut simpan data.") |
| permalink = group_url |
|
|
| |
| try: |
| driver.get(permalink) |
| time.sleep(3) |
| ensure_logged_in() |
| |
| |
| post_context = driver.find_element(By.XPATH, "//div[@role='article']") |
| except Exception as e: |
| print(f"⚠️ Gagal buka permalink {permalink}: {e}") |
| post_context = None |
|
|
| |
| author = "Unknown" |
| try: |
| if post_context: |
| try: |
| author = post_context.find_element(By.XPATH, ".//h2//span//span").text.strip() |
| except: |
| try: |
| author = post_context.find_element(By.XPATH, ".//strong//span").text.strip() |
| except: |
| author = post_context.find_element(By.XPATH, ".//span[contains(@class,'x193iq5w xeuugli x13faqbe x1vvkbs x1xmvt09 x1nxh6w3 x1sibtaa x1s688f xi81zsa')]").text.strip() |
| except: |
| pass |
|
|
| |
| while True: |
| try: |
| btn = post.find_element(By.XPATH, ".//span[contains(text(),'Lihat komentar') or contains(text(),'View more comments')]") |
| driver.execute_script("arguments[0].click();", btn) |
| time.sleep(2) |
| except: |
| break |
|
|
| while True: |
| try: |
| btn = post.find_element(By.XPATH, ".//span[contains(text(),'Lihat') and contains(text(),'balasan')] | .//span[contains(text(),'View') and contains(text(),'replies')]") |
| driver.execute_script("arguments[0].click();", btn) |
| time.sleep(2) |
| except: |
| break |
|
|
| |
| if post_context: |
| try: |
| caption_blocks = post_context.find_elements(By.XPATH, ".//div[@data-ad-rendering-role='story_message']//div[@dir='auto']") |
| caption_texts = [cb.text.strip() for cb in caption_blocks if cb.text.strip()] |
| caption = "\n".join(caption_texts)[:2000] if caption_texts else "" |
| except: |
| caption = "" |
|
|
| |
| comments = [] |
| try: |
| comment_blocks = post_context.find_elements(By.XPATH, ".//div[@aria-label='Komentar']//div[@dir='auto']") |
| seen = set() |
| for cb in comment_blocks: |
| text = cb.text.strip() |
| if text and text not in seen: |
| seen.add(text) |
| comments.append(text) |
| except: |
| comments = [] |
|
|
| data = { |
| "group_name": group_name, |
| "group_url": group_url, |
| "post_url": permalink, |
| "author": author, |
| "caption": caption, |
| "comments": comments, |
| } |
| print(f"✅ Post captured: {author} | {caption[:60]}... | {len(comments)} komentar") |
| posts.append(data) |
|
|
| except Exception as e: |
| print(f"⚠️ Error baca postingan {idx}: {e}") |
| continue |
|
|
| new_height = driver.execute_script("return document.body.scrollHeight") |
| if new_height == last_height: |
| break |
| last_height = new_height |
|
|
| return posts |
|
|
| |
| all_data = [] |
|
|
| fb_login() |
|
|
| for g in GROUP_INPUTS: |
| group_url = open_group(g) |
| if group_url: |
| posts = scrape_group(group_url, g) |
| all_data.extend(posts) |
|
|
| |
| with open(OUTPUT_CSV, "w", newline="", encoding="utf-8") as csvfile: |
| fieldnames = ["group_name", "group_url", "post_url", "author", "caption", "comments"] |
| writer = csv.DictWriter(csvfile, fieldnames=fieldnames) |
| writer.writeheader() |
| for row in all_data: |
| writer.writerow(row) |
|
|
| |
| with open(OUTPUT_JSON, "w", encoding="utf-8") as f: |
| json.dump(all_data, f, ensure_ascii=False, indent=2) |
|
|
| print(f"✅ Selesai. Data disimpan ke {OUTPUT_CSV} dan {OUTPUT_JSON}") |
| try: |
| driver.quit() |
| except: |
| pass |
|
|