""" Playwright-driven simulated user. Drives the actual browser UI: registers + logs in via the login form, navigates to ``/annotate``, applies annotations through real DOM events (clicking radios, filling textareas, ticking checkboxes), then clicks the Next button to advance. Designed as a slow-but-real smoke test of the rendered annotation UI: the same code paths a human user would hit. The annotation values still come from an :class:`AnnotationStrategy` (typically :class:`AgentSimulatorStrategy`); Playwright is only the *driver*. Constraints: - Single user, sequential. Browser sessions don't parallelize cleanly. - Requires ``playwright`` + a chromium install (``pip install playwright && playwright install chromium``). - Falls back gracefully if Playwright is unavailable: raises a clear ``ImportError`` at construction time. """ from __future__ import annotations import logging import time from dataclasses import dataclass from datetime import datetime from typing import Any, Dict, List, Optional import requests from .annotation_strategies import AnnotationStrategy, create_strategy from .competence_profiles import create_competence_profile from .config import ( AnnotationStrategyType, InteractiveConfig, UserConfig, ) from .interactive_runner import InteractiveSessionRunner from .timing_models import NoWaitTimingModel, TimingModel from .user_simulator import AnnotationRecord, UserSimulationResult logger = logging.getLogger(__name__) @dataclass class _DomSubmissionResult: """How many DOM inputs we successfully applied vs total annotations.""" applied: int total: int class PlaywrightSimulatedUser: """Browser-driven simulated user. Public surface mirrors :class:`SimulatedUser` enough that a smoke runner can use either. ``run_simulation()`` returns a :class:`UserSimulationResult` so existing reporting works. """ def __init__( self, user_config: UserConfig, server_url: str, gold_standards: Optional[Dict[str, Dict[str, Any]]] = None, simulate_wait: bool = False, interactive_config: Optional[InteractiveConfig] = None, headless: bool = True, debounce_seconds: float = 1.8, ): try: from playwright.sync_api import sync_playwright # noqa: F401 except ImportError as e: raise ImportError( "PlaywrightSimulatedUser requires playwright. Install it with:\n" " pip install playwright\n" " playwright install chromium" ) from e self.config = user_config self.server_url = server_url.rstrip("/") self.gold_standards = gold_standards or {} self.headless = headless self.debounce_seconds = debounce_seconds self.competence = create_competence_profile(user_config.competence) self.strategy = self._create_strategy() self.timing = ( TimingModel(user_config.timing) if simulate_wait else NoWaitTimingModel(user_config.timing) ) # API session is mirrored to/from Playwright for /api/* fetches self.api_session = requests.Session() self.interactive_runner: Optional[InteractiveSessionRunner] = None if interactive_config and interactive_config.enabled: self.interactive_runner = InteractiveSessionRunner( interactive_config, server_url ) self.result = UserSimulationResult(user_id=user_config.user_id) self.schemas: List[Dict[str, Any]] = [] # Lazily set in start() self._pw = None self._browser = None self._context = None self._page = None # ------------------------------------------------------------------ # Strategy & lifecycle # ------------------------------------------------------------------ def _create_strategy(self) -> AnnotationStrategy: return create_strategy( strategy_type=self.config.strategy, llm_config=self.config.llm_config, biased_config=self.config.biased_config, pattern_config=self.config.pattern_config, agent_config=self.config.agent_config, user_id=self.config.user_id, ) def start(self) -> None: from playwright.sync_api import sync_playwright self._pw = sync_playwright().start() self._browser = self._pw.chromium.launch(headless=self.headless) self._context = self._browser.new_context() self._page = self._context.new_page() def stop(self) -> None: for closer in (self._page, self._context, self._browser): if closer is None: continue try: closer.close() except Exception: pass if self._pw is not None: try: self._pw.stop() except Exception: pass self._page = self._context = self._browser = self._pw = None # ------------------------------------------------------------------ # Cookie sharing # ------------------------------------------------------------------ def _sync_cookies_to_api(self) -> None: """Copy Playwright cookies into the requests session.""" if self._context is None: return for cookie in self._context.cookies(): self.api_session.cookies.set( cookie["name"], cookie["value"], domain=cookie.get("domain") ) # ------------------------------------------------------------------ # Login via UI # ------------------------------------------------------------------ def login_via_ui(self) -> bool: """Register + log in. Uses the API for the credential exchange (the UI form mode is brittle across templates), then loads the cookies into Playwright so DOM interactions work in a logged-in session.""" password = "simulated_password_123" try: self.api_session.post( f"{self.server_url}/register", data={"action": "signup", "email": self.config.user_id, "pass": password}, allow_redirects=True, timeout=30, ) self.api_session.post( f"{self.server_url}/auth", data={"action": "login", "email": self.config.user_id, "pass": password}, allow_redirects=True, timeout=30, ) except requests.exceptions.RequestException as e: logger.warning("API login failed: %s", e) return False # Push cookies into Playwright so the page renders as logged-in. self._sync_cookies_to_browser() # Navigate to /annotate; walk past consent/instructions if needed. page = self._page try: page.goto(f"{self.server_url}/annotate", wait_until="domcontentloaded", timeout=15000) except Exception as e: logger.warning("page.goto /annotate failed: %s", e) for _ in range(10): self._sync_cookies_to_api() if self._is_in_annotation_phase(): # Make sure the annotation page is what's rendered try: page.wait_for_selector("#next-btn", timeout=5000) except Exception: pass return True # Try a UI advance (consent / instructions screen) self._advance_phase_screen() time.sleep(0.5) try: page.goto(f"{self.server_url}/annotate", wait_until="domcontentloaded", timeout=10000) except Exception: pass return self._is_in_annotation_phase() def _sync_cookies_to_browser(self) -> None: """Push cookies from the API session into the Playwright context.""" if self._context is None: return cookies = [] for c in self.api_session.cookies: cookies.append({ "name": c.name, "value": c.value, "url": self.server_url, }) if cookies: try: self._context.add_cookies(cookies) except Exception as e: logger.warning("add_cookies failed: %s", e) def _is_in_annotation_phase(self) -> bool: """We're in annotation phase iff /api/current_instance returns 200.""" try: r = self.api_session.get( f"{self.server_url}/api/current_instance", timeout=10 ) except requests.exceptions.RequestException: return False return r.status_code == 200 def _advance_phase_screen(self) -> None: """Click any visible 'Next' / 'Continue' / 'I agree' button.""" page = self._page candidates = [ "#next-btn:visible", "button:has-text('Continue')", "button:has-text('I Agree')", "button:has-text('Start')", "input[type='submit']:visible", ] for sel in candidates: loc = page.locator(sel) if loc.count() > 0: try: loc.first.click() return except Exception: continue # ------------------------------------------------------------------ # API helpers # ------------------------------------------------------------------ def fetch_schemas(self) -> List[Dict[str, Any]]: try: r = self.api_session.get(f"{self.server_url}/api/schemas", timeout=30) if r.status_code != 200: return [] data = r.json() if isinstance(data, dict): schemas = ( list(data["schemas"].values()) if isinstance(data.get("schemas"), dict) else data.get("schemas", list(data.values())) ) else: schemas = data self.schemas = schemas return schemas except requests.exceptions.RequestException as e: logger.warning("schema fetch failed: %s", e) return [] def fetch_current_instance(self) -> Optional[Dict[str, Any]]: try: r = self.api_session.get( f"{self.server_url}/api/current_instance", timeout=30 ) except requests.exceptions.RequestException: return None if r.status_code != 200: return None return r.json() # ------------------------------------------------------------------ # DOM annotation application # ------------------------------------------------------------------ def _apply_annotations(self, annotations: Dict[str, Any]) -> _DomSubmissionResult: """Translate a wire-format annotation dict into DOM clicks/fills.""" page = self._page applied = 0 total = 0 for key, value in annotations.items(): total += 1 if ":" not in key: continue schema, label = key.split(":", 1) if label == "text": # textarea / textbox # name: :::text via generate_element_identifier sel = f"[name=\"{schema}:::text\"]" if page.locator(sel).count() == 0: sel = f"textarea[name*='{schema}']" if page.locator(sel).count() > 0: try: page.locator(sel).first.fill(str(value)[:1000]) applied += 1 except Exception as e: logger.debug("fill failed for %s: %s", sel, e) continue # Try radio (name=schema, value=label) radio_sel = f"input[type='radio'][name=\"{schema}\"][value=\"{label}\"]" if page.locator(radio_sel).count() > 0: try: page.locator(radio_sel).first.check(force=True) applied += 1 continue except Exception: pass # Try checkbox / multiselect (name=:::