QJMKWB2 / scraper6.py
QJMKWB's picture
Create scraper6.py
ab8d642 verified
async def start(self):
async with self._lock:
if self._setup_done: return
setup_and_start_xray()
self.playwright = await async_playwright().start()
self.context = await self.playwright.chromium.launch_persistent_context(
"/tmp/xray_profile",
headless=True,
proxy={"server": "socks5://127.0.0.1:10808"},
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
)
# --- OVERENIE IP ADRESY ---
test_page = await self.context.new_page()
try:
print("[DEBUG] Overujem IP cez tunel...")
await test_page.goto("https://api.ipify.org", timeout=15000)
ip = await test_page.inner_text("body")
print(f"[DEBUG] Tvoja IP v tuneli je: {ip}")
except Exception as e:
print(f"[DEBUG ERROR] Tunel nefunguje: {e}")
finally:
await test_page.close()
# ---------------------------
self._setup_done = True
async def search_movies(query):
page = await manager.get_page()
try:
url = f"{DOMENA}/se/j/json?q={urllib.parse.quote(query)}"
print(f"[TUNNEL] Vyhľadávam: {query}")
# Otestujeme aj klasickú HTML stránku, či nás pustí
await page.goto(url, wait_until="networkidle", timeout=30000)
# Získame surový obsah
raw_content = await page.content()
print(f"[DEBUG] Surová dĺžka obsahu: {len(raw_content)}")
if len(raw_content) < 200:
print(f"[DEBUG] POZOR! Web vrátil podozrivo málo dát: {raw_content}")
content = await page.evaluate("() => document.body.innerText")
return json.loads(content)
except Exception as e:
print(f"[SEARCH ERROR] {e}")
return []
finally: await page.close()