BF-Realtime / browser_session.py
SamiKoen
Kalici fix: 404'te otomatik Trek arama sayfasina fallback
3608912
Raw
History Blame Contribute Delete
9.24 kB
"""Headless Chromium session — sag monitor icin canli sayfa stream'i.
Tek-singleton browser + Page. Asistan tool sonucundan productLink alinca
`navigate(url)` cagrilir; bagli /browser WS client'lari ekran goruntusunu alir.
Read-only: tiklama/scroll yok, sadece otomatik navigate + JPEG stream.
"""
from __future__ import annotations
import asyncio
import base64
import logging
from typing import Optional
from fastapi import WebSocket
logger = logging.getLogger(__name__)
# Trek sayfa proxy — HF Space IP ban'ini etrafindan dolas
TREK_PROXY = "https://video.trek-turkey.com/trek-page-proxy.php?url="
# Stream parametreleri
VIEWPORT_W = 1280
VIEWPORT_H = 800
FRAME_INTERVAL_S = 0.4 # 2.5 fps
JPEG_QUALITY = 65
NAV_TIMEOUT_MS = 15000
POST_NAV_SLEEP_S = 0.2 # nav sonrasi ilk frame icin kisa bekleme
# Bot/tracker/ads/agir resource'lari block et — Trek sayfalari cok hizlanir
BLOCKED_DOMAINS = (
"google-analytics.com", "googletagmanager.com", "googlesyndication.com",
"doubleclick.net", "facebook.net", "facebook.com", "connect.facebook",
"hotjar.com", "static.hotjar", "criteo.com", "criteo.net",
"yandex.ru", "mc.yandex", "metrica", "webvisor",
"clarity.ms", "bing.com/sct", "linkedin.com/li",
"tiktok.com", "snapchat.com", "pinterest.com",
"cloudflareinsights.com", "beacon.min.js",
)
BLOCKED_RESOURCE_TYPES = ("media", "font") # video/audio/font yok
class BrowserSession:
def __init__(self):
self._pw = None
self._browser = None
self._page = None
self._lock = asyncio.Lock()
self._current_url: Optional[str] = None
self._nav_seq = 0 # her navigate'te artar, stream loop'a sinyal
self._started = False
async def ensure_started(self):
if self._started:
return
async with self._lock:
if self._started:
return
try:
from playwright.async_api import async_playwright
self._pw = await async_playwright().start()
self._browser = await self._pw.chromium.launch(
headless=True,
args=[
"--no-sandbox",
"--disable-dev-shm-usage",
"--disable-gpu",
"--disable-setuid-sandbox",
],
)
ctx = await self._browser.new_context(
viewport={"width": VIEWPORT_W, "height": VIEWPORT_H},
user_agent=(
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/120.0.0.0 Safari/537.36"
),
)
self._page = await ctx.new_page()
# Tracker/ads/heavy resource'lari block et
async def _route_filter(route):
req = route.request
url = req.url
if req.resource_type in BLOCKED_RESOURCE_TYPES:
return await route.abort()
for d in BLOCKED_DOMAINS:
if d in url:
return await route.abort()
return await route.continue_()
await self._page.route("**/*", _route_filter)
# Acilis ekrani
await self._page.set_content(
"<html><body style='margin:0;display:flex;align-items:center;"
"justify-content:center;height:100vh;font-family:sans-serif;"
"background:#fff;color:#999;font-size:24px;'>"
"Trek Sesli Asistan — bir urun adi soyleyin</body></html>"
)
self._started = True
logger.info("BrowserSession baslatildi (Chromium headless)")
except Exception:
logger.exception("BrowserSession baslatilamadi")
# Cleanup partial state
await self._safe_shutdown()
raise
async def _safe_shutdown(self):
try:
if self._browser:
await self._browser.close()
except Exception:
pass
try:
if self._pw:
await self._pw.stop()
except Exception:
pass
self._browser = None
self._pw = None
self._page = None
self._started = False
async def navigate(self, url: str, fallback_query: str | None = None):
"""Fire-and-forget: nav baslatilir, beklenmez. Stream loop frame'leri
kademeli yakalar. URL 404 dönerse Trek arama sayfasina fallback yapilir."""
if not url:
return
try:
await self.ensure_started()
except Exception:
return
if not self._page:
return
self._current_url = url
self._nav_seq += 1
from urllib.parse import quote, quote_plus
def _proxied(u: str) -> str:
return TREK_PROXY + quote(u, safe="")
if url.startswith("https://www.trekbisiklet.com.tr/"):
target = _proxied(url)
else:
target = url
logger.info(f"[browser] navigate -> {url} (via proxy)")
async def _do_goto():
try:
resp = await self._page.goto(target, timeout=NAV_TIMEOUT_MS, wait_until="commit")
# 404/5xx ise Trek aramaya fallback
if resp and resp.status >= 400 and fallback_query:
search_url = f"https://www.trekbisiklet.com.tr/arama?q={quote_plus(fallback_query)}"
logger.info(f"[browser] {resp.status} -> fallback search: {fallback_query!r}")
self._current_url = search_url
self._nav_seq += 1
await self._page.goto(_proxied(search_url), timeout=NAV_TIMEOUT_MS, wait_until="commit")
except Exception as e:
logger.warning(f"[browser] nav (bg) {url}: {e}")
asyncio.create_task(_do_goto())
@property
def current_url(self) -> Optional[str]:
return self._current_url
async def click(self, nx: float, ny: float):
"""Normalize edilmis (0-1) koordinatla viewport'ta tikla."""
if not self._page:
return
try:
x = max(0, min(VIEWPORT_W - 1, int(nx * VIEWPORT_W)))
y = max(0, min(VIEWPORT_H - 1, int(ny * VIEWPORT_H)))
self._nav_seq += 1 # stream loop frame'i hizlandirsin
await self._page.mouse.click(x, y)
except Exception as e:
logger.debug(f"[browser] click hatasi: {e}")
async def scroll(self, dy: int):
"""Sayfayi dy piksel kaydir (pozitif = asagi)."""
if not self._page:
return
try:
await self._page.mouse.wheel(0, dy)
self._nav_seq += 1
except Exception as e:
logger.debug(f"[browser] scroll hatasi: {e}")
async def screenshot_jpeg(self) -> Optional[bytes]:
if not self._page:
return None
try:
return await self._page.screenshot(type="jpeg", quality=JPEG_QUALITY, full_page=False)
except Exception as e:
logger.debug(f"[browser] screenshot hatasi: {e}")
return None
async def stream_to(self, ws: WebSocket):
"""Tek client icin screenshot stream loop — disconnect olana kadar."""
try:
await self.ensure_started()
except Exception:
try:
await ws.send_json({"type": "browser.error", "message": "Chromium baslatilamadi"})
except Exception:
pass
return
last_seq_sent = -1
last_url_sent = None
while True:
try:
# Navigate olduysa kalibre et — frame yollamadan once kucuk bekleme
if self._nav_seq != last_seq_sent:
last_seq_sent = self._nav_seq
if self._current_url != last_url_sent:
last_url_sent = self._current_url
try:
await ws.send_json({"type": "browser.url", "url": self._current_url})
except Exception:
return
# Sayfa yuklenirken kisa bekleme
await asyncio.sleep(POST_NAV_SLEEP_S)
jpeg = await self.screenshot_jpeg()
if jpeg:
b64 = base64.b64encode(jpeg).decode("ascii")
try:
await ws.send_json({"type": "browser.frame", "jpeg": b64})
except Exception:
return
await asyncio.sleep(FRAME_INTERVAL_S)
except asyncio.CancelledError:
raise
except Exception:
logger.exception("[browser] stream loop hatasi")
await asyncio.sleep(1.0)
# Singleton
_session = BrowserSession()
def get_browser_session() -> BrowserSession:
return _session
async def navigate(url: str):
await _session.navigate(url)