File size: 2,823 Bytes
7c74c3d
bc11e45
91994f0
 
 
7c74c3d
690f2ec
91994f0
7c74c3d
 
 
bc11e45
 
 
 
 
 
 
7c74c3d
bc11e45
debc4ec
bc11e45
7c74c3d
bc11e45
 
 
 
 
 
 
 
690f2ec
7c74c3d
 
 
 
debc4ec
7c74c3d
bc11e45
 
 
 
7c74c3d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bc11e45
7c74c3d
 
bc11e45
debc4ec
7c74c3d
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
from playwright.async_api import async_playwright
import playwright_stealth # Import utuh biar gak ImportError
from bs4 import BeautifulSoup
from database import db
from firebase_admin import firestore
import asyncio
import random

async def start_scrape_generator(target_url):
    async with async_playwright() as p:
        yield "πŸš€ Menjalankan Browser Stealth (Chromium)..."
        
        # Launch browser dengan argumen tambahan buat stabilitas di HF
        browser = await p.chromium.launch(
            headless=True,
            args=["--no-sandbox", "--disable-setuid-sandbox"]
        )
        
        context = await browser.new_context(
            user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
        )
        
        page = await context.new_page()
        
        # FIX DISINI: Gunakan pemanggilan langsung dari library
        try:
            await playwright_stealth.stealth_async(page)
        except AttributeError:
            # Fallback jika fungsi async tidak ditemukan
            from playwright_stealth import stealth_sync
            playwright_stealth.stealth_sync(page)

        try:
            if target_url.rstrip('/') == "https://anichin.cafe":
                target_url = "https://anichin.cafe/anime/?order=update"
                yield "⛓️ Mode Crawler: Menuju daftar anime..."

            yield f"🌐 Membuka: {target_url}"
            
            # Timeout diperpanjang karena Playwright di HF butuh nafas
            await page.goto(target_url, wait_until="networkidle", timeout=90000)
            await asyncio.sleep(5) 
            
            content = await page.content()
            soup = BeautifulSoup(content, 'html.parser')
            
            items = soup.select('.listupd .bsx a')
            if items:
                yield f"πŸ“‚ Ditemukan {len(items)} judul. Mulai sinkronisasi..."
                for item in items:
                    link = item['href']
                    title = item.select_one('.tt').text.strip() if item.select_one('.tt') else "Judul"
                    yield f"🎬 Memproses: {title}"
                    
                    doc_id = title.replace(' ', '_').replace('/', '-')
                    db.collection('streaming').document(doc_id).set({
                        "title": title,
                        "url": link,
                        "updated_at": firestore.SERVER_TIMESTAMP
                    }, merge=True)
                yield "βœ… SEMUA JUDUL BERHASIL DISINKRONISASI!"
            else:
                yield "❌ Gagal bypass Cloudflare atau Selector salah. IP HF lo kemungkinan kena limit."

        except Exception as e:
            yield f"❌ Error: {str(e)}"
        finally:
            await browser.close()