import os import time import json import csv from datetime import datetime import undetected_chromedriver as uc from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC # ========== KONFIGURASI ========== FB_USERNAME = "fatihr252@gmail.com" FB_PASSWORD = "Bambank1" COOKIES_FILE = "fb_cookies.json" # daftar grup yang ingin di-scrape GROUP_INPUTS = [ "https://web.facebook.com/groups/183039928416039?locale=id_ID", "https://web.facebook.com/groups/teraswarga?locale=id_ID", "https://web.facebook.com/groups/967901979894945?locale=id_ID" ] # lokasi hasil scraping OUTPUT_CSV = f"facebook_groups_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv" OUTPUT_JSON = f"facebook_groups_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json" # ========== SETUP SELENIUM ========== options = uc.ChromeOptions() options.add_argument("--disable-notifications") options.add_argument("--disable-infobars") options.add_argument("--start-maximized") driver = uc.Chrome(options=options, use_subprocess=True) wait = WebDriverWait(driver, 15) # ========== FUNGSI LOGIN ========== def save_cookies(driver, path): with open(path, "w") as file: json.dump(driver.get_cookies(), file) def load_cookies(driver, path): with open(path, "r") as file: cookies = json.load(file) for cookie in cookies: driver.add_cookie(cookie) def fb_login(force=False): """ force=True akan memaksa login pakai username/password walaupun ada cookies. """ driver.get("https://www.facebook.com/") time.sleep(3) if not force and os.path.exists(COOKIES_FILE): try: load_cookies(driver, COOKIES_FILE) driver.refresh() time.sleep(5) if "login" not in driver.current_url: print("✅ Login pakai cookies berhasil") # pastikan search bar muncul sebelum keluar try: wait.until(EC.presence_of_element_located((By.XPATH, '//input[@placeholder="Cari di Facebook"]'))) print("🔍 Search bar tersedia, siap mencari grup") except: print("⚠️ Search bar belum muncul, tetap lanjutkan") return except Exception as e: print("⚠️ Cookies gagal dipakai:", e) print("🔑 Login manual pakai username/password...") # --- Login form handling --- try: # versi klasik (id=email, id=pass) email_input = wait.until(EC.presence_of_element_located((By.ID, "email"))) pass_input = driver.find_element(By.ID, "pass") email_input.clear() email_input.send_keys(FB_USERNAME) pass_input.clear() pass_input.send_keys(FB_PASSWORD) driver.find_element(By.NAME, "login").click() except Exception: try: # versi dinamis (_r_s_, _r_17_) email_input = wait.until(EC.presence_of_element_located((By.XPATH, '//input[@name="email" and @type="text"]'))) pass_input = driver.find_element(By.XPATH, '//input[@name="pass" and @type="password"]') email_input.clear() email_input.send_keys(FB_USERNAME) pass_input.clear() pass_input.send_keys(FB_PASSWORD) pass_input.submit() except Exception: try: # versi lain (data-testid) email_input = wait.until(EC.presence_of_element_located((By.XPATH, '//input[@data-testid="royal-email"]'))) pass_input = driver.find_element(By.XPATH, '//input[@data-testid="royal-pass"]') email_input.clear() email_input.send_keys(FB_USERNAME) pass_input.clear() pass_input.send_keys(FB_PASSWORD) driver.find_element(By.NAME, "login").click() except Exception as e: raise Exception(f"❌ Tidak menemukan form login yang cocok: {e}") time.sleep(5) if "login" in driver.current_url: raise Exception("❌ Login gagal! Cek username/password") save_cookies(driver, COOKIES_FILE) print("✅ Login sukses & cookies disimpan") # setelah login sukses, pastikan search bar ada try: wait.until(EC.presence_of_element_located((By.XPATH, '//input[@placeholder="Cari di Facebook"]'))) print("🔍 Search bar tersedia, siap mencari grup") except: print("⚠️ Search bar belum muncul, coba manual redirect ke beranda") driver.get("https://www.facebook.com/") wait.until(EC.presence_of_element_located((By.XPATH, '//input[@placeholder="Cari di Facebook"]'))) def ensure_logged_in(): """Cek apakah user masih login, kalau muncul halaman login atau popup, login ulang.""" try: # --- Kasus URL berubah ke login page --- if driver.current_url and "login" in driver.current_url: print("⚠️ Redirect ke halaman login, mencoba login ulang...") fb_login(force=True) return # --- Kasus popup 'See more on Facebook' muncul --- try: popup = driver.find_element(By.XPATH, '//div[contains(text(),"See more on Facebook")]') if popup.is_displayed(): print("⚠️ Popup login terdeteksi, login ulang...") fb_login(force=True) return except: pass # --- Kasus ada input email/password nongol di modal --- try: login_modal = driver.find_element(By.XPATH, '//input[@type="email" or @type="text"]') if login_modal.is_displayed(): print("⚠️ Form login modal terdeteksi, login ulang...") fb_login(force=True) return except: pass except Exception as e: print("⚠️ Gagal cek login:", e) # ========== SEARCH & BUKA GRUP ========== def open_group(group_input): """ Bisa menerima nama grup ATAU link grup langsung. """ # --- Kasus: input berupa link langsung --- if group_input.startswith("http"): print(f"🔗 Buka langsung link grup: {group_input}") driver.get(group_input) time.sleep(5) ensure_logged_in() return group_input # --- Kasus: input berupa nama grup --- try: search_box = wait.until( EC.presence_of_element_located((By.XPATH, '//input[@placeholder="Cari di Facebook"]')) ) print(f"🔍 Mencari grup '{group_input}' via search...") search_box.clear() search_box.send_keys(group_input) search_box.submit() time.sleep(5) # cari hasil grup dengan nama persis link = None results = driver.find_elements(By.XPATH, f'//a[contains(text(),"{group_input}")]') if results: link = results[0].get_attribute("href") if link: print(f"✅ Grup ditemukan: {link}") driver.get(link) time.sleep(5) return link else: print(f"❌ Grup '{group_input}' tidak ditemukan via search") return None except Exception as e: print(f"⚠️ Search gagal untuk '{group_input}':", e) return None def scroll_to_bottom(driver, max_scrolls=10, pause_time=2): last_height = driver.execute_script("return document.body.scrollHeight") for i in range(max_scrolls): driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") time.sleep(pause_time) new_height = driver.execute_script("return document.body.scrollHeight") if new_height == last_height: break last_height = new_height # ========== SCRAPING POSTINGAN GRUP ========== def scrape_group(group_url, group_name, max_scrolls=3, max_posts=None): print(f"📥 Scraping grup: {group_name} ({group_url})") driver.get(group_url) time.sleep(4) ensure_logged_in() posts = [] last_height = driver.execute_script("return document.body.scrollHeight") for scroll_round in range(max_scrolls): driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") time.sleep(4) ensure_logged_in() post_elements = driver.find_elements(By.XPATH, '//div[@role="article"]') print(f"🔎 Ditemukan {len(post_elements)} postingan pada scroll {scroll_round+1}") for idx, post in enumerate(post_elements): if max_posts and len(posts) >= max_posts: break try: driver.execute_script("arguments[0].scrollIntoView(true);", post) time.sleep(1) # --- article_ctx: konteks utama artikel/post --- article_ctx = None try: # Biasanya post itu sendiri sudah konteks utama article_ctx = post except: article_ctx = None # --- permalink & buka halaman post --- permalink = None post_context = post # default fallback ke post list try: # coba ambil link /posts/ link_el = post.find_element(By.XPATH, ".//a[contains(@href,'/posts/')]") permalink = link_el.get_attribute("href").split("?")[0] except: try: # coba ambil link /permalink/ link_el = post.find_element(By.XPATH, ".//a[contains(@href,'/permalink/')]") permalink = link_el.get_attribute("href").split("?")[0] except: try: # fallback ambil ID dari data-ft post_id = post.get_attribute("data-ft") if post_id and "top_level_post_id" in post_id: import json d = json.loads(post_id) pid = d.get("top_level_post_id") if pid: permalink = f"{group_url.rstrip('/').split('?')[0]}/posts/{pid}/" except: pass if not permalink: print("⚠️ Tidak ada permalink & tidak bisa generate. Tetap lanjut simpan data.") permalink = group_url # fallback isi dengan URL grup # --- buka halaman permalink --- try: driver.get(permalink) time.sleep(3) ensure_logged_in() # ambil elemen post baru dari halaman permalink post_context = driver.find_element(By.XPATH, "//div[@role='article']") except Exception as e: print(f"⚠️ Gagal buka permalink {permalink}: {e}") post_context = None # jangan pakai lagi elemen lama # --- ambil author --- author = "Unknown" try: if post_context: try: author = post_context.find_element(By.XPATH, ".//h2//span//span").text.strip() except: try: author = post_context.find_element(By.XPATH, ".//strong//span").text.strip() except: author = post_context.find_element(By.XPATH, ".//span[contains(@class,'x193iq5w xeuugli x13faqbe x1vvkbs x1xmvt09 x1nxh6w3 x1sibtaa x1s688f xi81zsa')]").text.strip() except: pass # --- expand komentar --- while True: try: btn = post.find_element(By.XPATH, ".//span[contains(text(),'Lihat komentar') or contains(text(),'View more comments')]") driver.execute_script("arguments[0].click();", btn) time.sleep(2) except: break while True: try: btn = post.find_element(By.XPATH, ".//span[contains(text(),'Lihat') and contains(text(),'balasan')] | .//span[contains(text(),'View') and contains(text(),'replies')]") driver.execute_script("arguments[0].click();", btn) time.sleep(2) except: break # --- ambil caption & komentar dari post_context --- if post_context: try: caption_blocks = post_context.find_elements(By.XPATH, ".//div[@data-ad-rendering-role='story_message']//div[@dir='auto']") caption_texts = [cb.text.strip() for cb in caption_blocks if cb.text.strip()] caption = "\n".join(caption_texts)[:2000] if caption_texts else "" except: caption = "" # ambil komentar comments = [] try: comment_blocks = post_context.find_elements(By.XPATH, ".//div[@aria-label='Komentar']//div[@dir='auto']") seen = set() for cb in comment_blocks: text = cb.text.strip() if text and text not in seen: seen.add(text) comments.append(text) except: comments = [] data = { "group_name": group_name, "group_url": group_url, "post_url": permalink, "author": author, "caption": caption, "comments": comments, } print(f"✅ Post captured: {author} | {caption[:60]}... | {len(comments)} komentar") posts.append(data) except Exception as e: print(f"⚠️ Error baca postingan {idx}: {e}") continue new_height = driver.execute_script("return document.body.scrollHeight") if new_height == last_height: break last_height = new_height return posts # ========== MAIN ========== all_data = [] fb_login() for g in GROUP_INPUTS: group_url = open_group(g) if group_url: posts = scrape_group(group_url, g) all_data.extend(posts) # simpan ke CSV with open(OUTPUT_CSV, "w", newline="", encoding="utf-8") as csvfile: fieldnames = ["group_name", "group_url", "post_url", "author", "caption", "comments"] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for row in all_data: writer.writerow(row) # simpan ke JSON with open(OUTPUT_JSON, "w", encoding="utf-8") as f: json.dump(all_data, f, ensure_ascii=False, indent=2) print(f"✅ Selesai. Data disimpan ke {OUTPUT_CSV} dan {OUTPUT_JSON}") try: driver.quit() except: pass