"""Headless Chromium session — sag monitor icin canli sayfa stream'i. Tek-singleton browser + Page. Asistan tool sonucundan productLink alinca `navigate(url)` cagrilir; bagli /browser WS client'lari ekran goruntusunu alir. Read-only: tiklama/scroll yok, sadece otomatik navigate + JPEG stream. """ from __future__ import annotations import asyncio import base64 import logging from typing import Optional from fastapi import WebSocket logger = logging.getLogger(__name__) # Trek sayfa proxy — HF Space IP ban'ini etrafindan dolas TREK_PROXY = "https://video.trek-turkey.com/trek-page-proxy.php?url=" # Stream parametreleri VIEWPORT_W = 1280 VIEWPORT_H = 800 FRAME_INTERVAL_S = 0.4 # 2.5 fps JPEG_QUALITY = 65 NAV_TIMEOUT_MS = 15000 POST_NAV_SLEEP_S = 0.2 # nav sonrasi ilk frame icin kisa bekleme # Bot/tracker/ads/agir resource'lari block et — Trek sayfalari cok hizlanir BLOCKED_DOMAINS = ( "google-analytics.com", "googletagmanager.com", "googlesyndication.com", "doubleclick.net", "facebook.net", "facebook.com", "connect.facebook", "hotjar.com", "static.hotjar", "criteo.com", "criteo.net", "yandex.ru", "mc.yandex", "metrica", "webvisor", "clarity.ms", "bing.com/sct", "linkedin.com/li", "tiktok.com", "snapchat.com", "pinterest.com", "cloudflareinsights.com", "beacon.min.js", ) BLOCKED_RESOURCE_TYPES = ("media", "font") # video/audio/font yok class BrowserSession: def __init__(self): self._pw = None self._browser = None self._page = None self._lock = asyncio.Lock() self._current_url: Optional[str] = None self._nav_seq = 0 # her navigate'te artar, stream loop'a sinyal self._started = False async def ensure_started(self): if self._started: return async with self._lock: if self._started: return try: from playwright.async_api import async_playwright self._pw = await async_playwright().start() self._browser = await self._pw.chromium.launch( headless=True, args=[ "--no-sandbox", "--disable-dev-shm-usage", "--disable-gpu", "--disable-setuid-sandbox", ], ) ctx = await self._browser.new_context( viewport={"width": VIEWPORT_W, "height": VIEWPORT_H}, user_agent=( "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " "AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/120.0.0.0 Safari/537.36" ), ) self._page = await ctx.new_page() # Tracker/ads/heavy resource'lari block et async def _route_filter(route): req = route.request url = req.url if req.resource_type in BLOCKED_RESOURCE_TYPES: return await route.abort() for d in BLOCKED_DOMAINS: if d in url: return await route.abort() return await route.continue_() await self._page.route("**/*", _route_filter) # Acilis ekrani await self._page.set_content( "" "Trek Sesli Asistan — bir urun adi soyleyin" ) self._started = True logger.info("BrowserSession baslatildi (Chromium headless)") except Exception: logger.exception("BrowserSession baslatilamadi") # Cleanup partial state await self._safe_shutdown() raise async def _safe_shutdown(self): try: if self._browser: await self._browser.close() except Exception: pass try: if self._pw: await self._pw.stop() except Exception: pass self._browser = None self._pw = None self._page = None self._started = False async def navigate(self, url: str, fallback_query: str | None = None): """Fire-and-forget: nav baslatilir, beklenmez. Stream loop frame'leri kademeli yakalar. URL 404 dönerse Trek arama sayfasina fallback yapilir.""" if not url: return try: await self.ensure_started() except Exception: return if not self._page: return self._current_url = url self._nav_seq += 1 from urllib.parse import quote, quote_plus def _proxied(u: str) -> str: return TREK_PROXY + quote(u, safe="") if url.startswith("https://www.trekbisiklet.com.tr/"): target = _proxied(url) else: target = url logger.info(f"[browser] navigate -> {url} (via proxy)") async def _do_goto(): try: resp = await self._page.goto(target, timeout=NAV_TIMEOUT_MS, wait_until="commit") # 404/5xx ise Trek aramaya fallback if resp and resp.status >= 400 and fallback_query: search_url = f"https://www.trekbisiklet.com.tr/arama?q={quote_plus(fallback_query)}" logger.info(f"[browser] {resp.status} -> fallback search: {fallback_query!r}") self._current_url = search_url self._nav_seq += 1 await self._page.goto(_proxied(search_url), timeout=NAV_TIMEOUT_MS, wait_until="commit") except Exception as e: logger.warning(f"[browser] nav (bg) {url}: {e}") asyncio.create_task(_do_goto()) @property def current_url(self) -> Optional[str]: return self._current_url async def click(self, nx: float, ny: float): """Normalize edilmis (0-1) koordinatla viewport'ta tikla.""" if not self._page: return try: x = max(0, min(VIEWPORT_W - 1, int(nx * VIEWPORT_W))) y = max(0, min(VIEWPORT_H - 1, int(ny * VIEWPORT_H))) self._nav_seq += 1 # stream loop frame'i hizlandirsin await self._page.mouse.click(x, y) except Exception as e: logger.debug(f"[browser] click hatasi: {e}") async def scroll(self, dy: int): """Sayfayi dy piksel kaydir (pozitif = asagi).""" if not self._page: return try: await self._page.mouse.wheel(0, dy) self._nav_seq += 1 except Exception as e: logger.debug(f"[browser] scroll hatasi: {e}") async def screenshot_jpeg(self) -> Optional[bytes]: if not self._page: return None try: return await self._page.screenshot(type="jpeg", quality=JPEG_QUALITY, full_page=False) except Exception as e: logger.debug(f"[browser] screenshot hatasi: {e}") return None async def stream_to(self, ws: WebSocket): """Tek client icin screenshot stream loop — disconnect olana kadar.""" try: await self.ensure_started() except Exception: try: await ws.send_json({"type": "browser.error", "message": "Chromium baslatilamadi"}) except Exception: pass return last_seq_sent = -1 last_url_sent = None while True: try: # Navigate olduysa kalibre et — frame yollamadan once kucuk bekleme if self._nav_seq != last_seq_sent: last_seq_sent = self._nav_seq if self._current_url != last_url_sent: last_url_sent = self._current_url try: await ws.send_json({"type": "browser.url", "url": self._current_url}) except Exception: return # Sayfa yuklenirken kisa bekleme await asyncio.sleep(POST_NAV_SLEEP_S) jpeg = await self.screenshot_jpeg() if jpeg: b64 = base64.b64encode(jpeg).decode("ascii") try: await ws.send_json({"type": "browser.frame", "jpeg": b64}) except Exception: return await asyncio.sleep(FRAME_INTERVAL_S) except asyncio.CancelledError: raise except Exception: logger.exception("[browser] stream loop hatasi") await asyncio.sleep(1.0) # Singleton _session = BrowserSession() def get_browser_session() -> BrowserSession: return _session async def navigate(url: str): await _session.navigate(url)