Spaces:
Running
Running
| """Headless Chromium session — sag monitor icin canli sayfa stream'i. | |
| Tek-singleton browser + Page. Asistan tool sonucundan productLink alinca | |
| `navigate(url)` cagrilir; bagli /browser WS client'lari ekran goruntusunu alir. | |
| Read-only: tiklama/scroll yok, sadece otomatik navigate + JPEG stream. | |
| """ | |
| from __future__ import annotations | |
| import asyncio | |
| import base64 | |
| import logging | |
| from typing import Optional | |
| from fastapi import WebSocket | |
| logger = logging.getLogger(__name__) | |
| # Trek sayfa proxy — HF Space IP ban'ini etrafindan dolas | |
| TREK_PROXY = "https://video.trek-turkey.com/trek-page-proxy.php?url=" | |
| # Stream parametreleri | |
| VIEWPORT_W = 1280 | |
| VIEWPORT_H = 800 | |
| FRAME_INTERVAL_S = 0.4 # 2.5 fps | |
| JPEG_QUALITY = 65 | |
| NAV_TIMEOUT_MS = 15000 | |
| POST_NAV_SLEEP_S = 0.2 # nav sonrasi ilk frame icin kisa bekleme | |
| # Bot/tracker/ads/agir resource'lari block et — Trek sayfalari cok hizlanir | |
| BLOCKED_DOMAINS = ( | |
| "google-analytics.com", "googletagmanager.com", "googlesyndication.com", | |
| "doubleclick.net", "facebook.net", "facebook.com", "connect.facebook", | |
| "hotjar.com", "static.hotjar", "criteo.com", "criteo.net", | |
| "yandex.ru", "mc.yandex", "metrica", "webvisor", | |
| "clarity.ms", "bing.com/sct", "linkedin.com/li", | |
| "tiktok.com", "snapchat.com", "pinterest.com", | |
| "cloudflareinsights.com", "beacon.min.js", | |
| ) | |
| BLOCKED_RESOURCE_TYPES = ("media", "font") # video/audio/font yok | |
| class BrowserSession: | |
| def __init__(self): | |
| self._pw = None | |
| self._browser = None | |
| self._page = None | |
| self._lock = asyncio.Lock() | |
| self._current_url: Optional[str] = None | |
| self._nav_seq = 0 # her navigate'te artar, stream loop'a sinyal | |
| self._started = False | |
| async def ensure_started(self): | |
| if self._started: | |
| return | |
| async with self._lock: | |
| if self._started: | |
| return | |
| try: | |
| from playwright.async_api import async_playwright | |
| self._pw = await async_playwright().start() | |
| self._browser = await self._pw.chromium.launch( | |
| headless=True, | |
| args=[ | |
| "--no-sandbox", | |
| "--disable-dev-shm-usage", | |
| "--disable-gpu", | |
| "--disable-setuid-sandbox", | |
| ], | |
| ) | |
| ctx = await self._browser.new_context( | |
| viewport={"width": VIEWPORT_W, "height": VIEWPORT_H}, | |
| user_agent=( | |
| "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " | |
| "AppleWebKit/537.36 (KHTML, like Gecko) " | |
| "Chrome/120.0.0.0 Safari/537.36" | |
| ), | |
| ) | |
| self._page = await ctx.new_page() | |
| # Tracker/ads/heavy resource'lari block et | |
| async def _route_filter(route): | |
| req = route.request | |
| url = req.url | |
| if req.resource_type in BLOCKED_RESOURCE_TYPES: | |
| return await route.abort() | |
| for d in BLOCKED_DOMAINS: | |
| if d in url: | |
| return await route.abort() | |
| return await route.continue_() | |
| await self._page.route("**/*", _route_filter) | |
| # Acilis ekrani | |
| await self._page.set_content( | |
| "<html><body style='margin:0;display:flex;align-items:center;" | |
| "justify-content:center;height:100vh;font-family:sans-serif;" | |
| "background:#fff;color:#999;font-size:24px;'>" | |
| "Trek Sesli Asistan — bir urun adi soyleyin</body></html>" | |
| ) | |
| self._started = True | |
| logger.info("BrowserSession baslatildi (Chromium headless)") | |
| except Exception: | |
| logger.exception("BrowserSession baslatilamadi") | |
| # Cleanup partial state | |
| await self._safe_shutdown() | |
| raise | |
| async def _safe_shutdown(self): | |
| try: | |
| if self._browser: | |
| await self._browser.close() | |
| except Exception: | |
| pass | |
| try: | |
| if self._pw: | |
| await self._pw.stop() | |
| except Exception: | |
| pass | |
| self._browser = None | |
| self._pw = None | |
| self._page = None | |
| self._started = False | |
| async def navigate(self, url: str, fallback_query: str | None = None): | |
| """Fire-and-forget: nav baslatilir, beklenmez. Stream loop frame'leri | |
| kademeli yakalar. URL 404 dönerse Trek arama sayfasina fallback yapilir.""" | |
| if not url: | |
| return | |
| try: | |
| await self.ensure_started() | |
| except Exception: | |
| return | |
| if not self._page: | |
| return | |
| self._current_url = url | |
| self._nav_seq += 1 | |
| from urllib.parse import quote, quote_plus | |
| def _proxied(u: str) -> str: | |
| return TREK_PROXY + quote(u, safe="") | |
| if url.startswith("https://www.trekbisiklet.com.tr/"): | |
| target = _proxied(url) | |
| else: | |
| target = url | |
| logger.info(f"[browser] navigate -> {url} (via proxy)") | |
| async def _do_goto(): | |
| try: | |
| resp = await self._page.goto(target, timeout=NAV_TIMEOUT_MS, wait_until="commit") | |
| # 404/5xx ise Trek aramaya fallback | |
| if resp and resp.status >= 400 and fallback_query: | |
| search_url = f"https://www.trekbisiklet.com.tr/arama?q={quote_plus(fallback_query)}" | |
| logger.info(f"[browser] {resp.status} -> fallback search: {fallback_query!r}") | |
| self._current_url = search_url | |
| self._nav_seq += 1 | |
| await self._page.goto(_proxied(search_url), timeout=NAV_TIMEOUT_MS, wait_until="commit") | |
| except Exception as e: | |
| logger.warning(f"[browser] nav (bg) {url}: {e}") | |
| asyncio.create_task(_do_goto()) | |
| def current_url(self) -> Optional[str]: | |
| return self._current_url | |
| async def click(self, nx: float, ny: float): | |
| """Normalize edilmis (0-1) koordinatla viewport'ta tikla.""" | |
| if not self._page: | |
| return | |
| try: | |
| x = max(0, min(VIEWPORT_W - 1, int(nx * VIEWPORT_W))) | |
| y = max(0, min(VIEWPORT_H - 1, int(ny * VIEWPORT_H))) | |
| self._nav_seq += 1 # stream loop frame'i hizlandirsin | |
| await self._page.mouse.click(x, y) | |
| except Exception as e: | |
| logger.debug(f"[browser] click hatasi: {e}") | |
| async def scroll(self, dy: int): | |
| """Sayfayi dy piksel kaydir (pozitif = asagi).""" | |
| if not self._page: | |
| return | |
| try: | |
| await self._page.mouse.wheel(0, dy) | |
| self._nav_seq += 1 | |
| except Exception as e: | |
| logger.debug(f"[browser] scroll hatasi: {e}") | |
| async def screenshot_jpeg(self) -> Optional[bytes]: | |
| if not self._page: | |
| return None | |
| try: | |
| return await self._page.screenshot(type="jpeg", quality=JPEG_QUALITY, full_page=False) | |
| except Exception as e: | |
| logger.debug(f"[browser] screenshot hatasi: {e}") | |
| return None | |
| async def stream_to(self, ws: WebSocket): | |
| """Tek client icin screenshot stream loop — disconnect olana kadar.""" | |
| try: | |
| await self.ensure_started() | |
| except Exception: | |
| try: | |
| await ws.send_json({"type": "browser.error", "message": "Chromium baslatilamadi"}) | |
| except Exception: | |
| pass | |
| return | |
| last_seq_sent = -1 | |
| last_url_sent = None | |
| while True: | |
| try: | |
| # Navigate olduysa kalibre et — frame yollamadan once kucuk bekleme | |
| if self._nav_seq != last_seq_sent: | |
| last_seq_sent = self._nav_seq | |
| if self._current_url != last_url_sent: | |
| last_url_sent = self._current_url | |
| try: | |
| await ws.send_json({"type": "browser.url", "url": self._current_url}) | |
| except Exception: | |
| return | |
| # Sayfa yuklenirken kisa bekleme | |
| await asyncio.sleep(POST_NAV_SLEEP_S) | |
| jpeg = await self.screenshot_jpeg() | |
| if jpeg: | |
| b64 = base64.b64encode(jpeg).decode("ascii") | |
| try: | |
| await ws.send_json({"type": "browser.frame", "jpeg": b64}) | |
| except Exception: | |
| return | |
| await asyncio.sleep(FRAME_INTERVAL_S) | |
| except asyncio.CancelledError: | |
| raise | |
| except Exception: | |
| logger.exception("[browser] stream loop hatasi") | |
| await asyncio.sleep(1.0) | |
| # Singleton | |
| _session = BrowserSession() | |
| def get_browser_session() -> BrowserSession: | |
| return _session | |
| async def navigate(url: str): | |
| await _session.navigate(url) | |