weu / scraper.py
Aqso's picture
Update scraper.py
bc11e45 verified
from playwright.async_api import async_playwright
import playwright_stealth # Import utuh biar gak ImportError
from bs4 import BeautifulSoup
from database import db
from firebase_admin import firestore
import asyncio
import random
async def start_scrape_generator(target_url):
async with async_playwright() as p:
yield "πŸš€ Menjalankan Browser Stealth (Chromium)..."
# Launch browser dengan argumen tambahan buat stabilitas di HF
browser = await p.chromium.launch(
headless=True,
args=["--no-sandbox", "--disable-setuid-sandbox"]
)
context = await browser.new_context(
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
)
page = await context.new_page()
# FIX DISINI: Gunakan pemanggilan langsung dari library
try:
await playwright_stealth.stealth_async(page)
except AttributeError:
# Fallback jika fungsi async tidak ditemukan
from playwright_stealth import stealth_sync
playwright_stealth.stealth_sync(page)
try:
if target_url.rstrip('/') == "https://anichin.cafe":
target_url = "https://anichin.cafe/anime/?order=update"
yield "⛓️ Mode Crawler: Menuju daftar anime..."
yield f"🌐 Membuka: {target_url}"
# Timeout diperpanjang karena Playwright di HF butuh nafas
await page.goto(target_url, wait_until="networkidle", timeout=90000)
await asyncio.sleep(5)
content = await page.content()
soup = BeautifulSoup(content, 'html.parser')
items = soup.select('.listupd .bsx a')
if items:
yield f"πŸ“‚ Ditemukan {len(items)} judul. Mulai sinkronisasi..."
for item in items:
link = item['href']
title = item.select_one('.tt').text.strip() if item.select_one('.tt') else "Judul"
yield f"🎬 Memproses: {title}"
doc_id = title.replace(' ', '_').replace('/', '-')
db.collection('streaming').document(doc_id).set({
"title": title,
"url": link,
"updated_at": firestore.SERVER_TIMESTAMP
}, merge=True)
yield "βœ… SEMUA JUDUL BERHASIL DISINKRONISASI!"
else:
yield "❌ Gagal bypass Cloudflare atau Selector salah. IP HF lo kemungkinan kena limit."
except Exception as e:
yield f"❌ Error: {str(e)}"
finally:
await browser.close()