| | from playwright.async_api import async_playwright |
| | import playwright_stealth |
| | from bs4 import BeautifulSoup |
| | from database import db |
| | from firebase_admin import firestore |
| | import asyncio |
| | import random |
| |
|
| | async def start_scrape_generator(target_url): |
| | async with async_playwright() as p: |
| | yield "π Menjalankan Browser Stealth (Chromium)..." |
| | |
| | |
| | browser = await p.chromium.launch( |
| | headless=True, |
| | args=["--no-sandbox", "--disable-setuid-sandbox"] |
| | ) |
| | |
| | context = await browser.new_context( |
| | user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36" |
| | ) |
| | |
| | page = await context.new_page() |
| | |
| | |
| | try: |
| | await playwright_stealth.stealth_async(page) |
| | except AttributeError: |
| | |
| | from playwright_stealth import stealth_sync |
| | playwright_stealth.stealth_sync(page) |
| |
|
| | try: |
| | if target_url.rstrip('/') == "https://anichin.cafe": |
| | target_url = "https://anichin.cafe/anime/?order=update" |
| | yield "βοΈ Mode Crawler: Menuju daftar anime..." |
| |
|
| | yield f"π Membuka: {target_url}" |
| | |
| | |
| | await page.goto(target_url, wait_until="networkidle", timeout=90000) |
| | await asyncio.sleep(5) |
| | |
| | content = await page.content() |
| | soup = BeautifulSoup(content, 'html.parser') |
| | |
| | items = soup.select('.listupd .bsx a') |
| | if items: |
| | yield f"π Ditemukan {len(items)} judul. Mulai sinkronisasi..." |
| | for item in items: |
| | link = item['href'] |
| | title = item.select_one('.tt').text.strip() if item.select_one('.tt') else "Judul" |
| | yield f"π¬ Memproses: {title}" |
| | |
| | doc_id = title.replace(' ', '_').replace('/', '-') |
| | db.collection('streaming').document(doc_id).set({ |
| | "title": title, |
| | "url": link, |
| | "updated_at": firestore.SERVER_TIMESTAMP |
| | }, merge=True) |
| | yield "β
SEMUA JUDUL BERHASIL DISINKRONISASI!" |
| | else: |
| | yield "β Gagal bypass Cloudflare atau Selector salah. IP HF lo kemungkinan kena limit." |
| |
|
| | except Exception as e: |
| | yield f"β Error: {str(e)}" |
| | finally: |
| | await browser.close() |
| |
|