""" Web Agent Trace Display Type Interactive step-by-step viewer for web agent browsing traces. Renders screenshots with SVG overlay visualizations (click markers, bounding boxes, mouse paths) alongside step details and a filmstrip navigator. Data format: { "steps": [ { "step_index": 0, "screenshot_url": "screenshots/step_000.png", "action_type": "click", "element": {"tag": "input", "text": "Search", "bbox": [340, 45, 680, 75]}, "coordinates": {"x": 510, "y": 60}, "mouse_path": [[200, 300], [350, 200], [510, 60]], "thought": "I need to search for blue wool sweaters", "observation": "Search box is focused", "timestamp": 1.2, "viewport": {"width": 1280, "height": 720} } ], "task_description": "Find and add a blue wool sweater under $50 to cart", "site": "amazon.com" } Supported action_type values: click, type, scroll, hover, select, navigate, wait, done """ import html import json from typing import Dict, Any, List, Optional from .base import BaseDisplay # Action type badge colors ACTION_TYPE_COLORS = { "click": {"bg": "#fff3e0", "border": "#FF9800", "badge": "rgba(255,152,0,0.2)"}, "type": {"bg": "#e8f4fd", "border": "#2196F3", "badge": "rgba(33,150,243,0.2)"}, "scroll": {"bg": "#e8f5e9", "border": "#4CAF50", "badge": "rgba(76,175,80,0.2)"}, "hover": {"bg": "#f3e5f5", "border": "#9C27B0", "badge": "rgba(156,39,176,0.2)"}, "select": {"bg": "#e0f7fa", "border": "#00BCD4", "badge": "rgba(0,188,212,0.2)"}, "navigate": {"bg": "#e8eaf6", "border": "#3F51B5", "badge": "rgba(63,81,181,0.2)"}, "wait": {"bg": "#f5f5f5", "border": "#9E9E9E", "badge": "rgba(158,158,158,0.2)"}, "done": {"bg": "#e8f5e9", "border": "#388E3C", "badge": "rgba(56,142,60,0.2)"}, } DEFAULT_ACTION_COLOR = {"bg": "#f5f5f5", "border": "#9E9E9E", "badge": "rgba(158,158,158,0.2)"} class WebAgentTraceDisplay(BaseDisplay): """ Display type for web agent browsing traces with interactive step navigation, SVG overlay visualizations, and filmstrip thumbnails. """ name = "web_agent_trace" required_fields = ["key"] optional_fields = { "show_overlays": True, "show_filmstrip": True, "show_thought": True, "show_observation": True, "show_element_info": True, "screenshot_max_width": 800, "screenshot_max_height": 600, "filmstrip_size": 80, "auto_playback": False, "playback_step_delay": 2.0, } description = "Web agent trace viewer with screenshots, SVG overlays, and step navigation" supports_span_target = False def render(self, field_config: Dict[str, Any], data: Any) -> str: if not data: return '
No trace data provided
' options = self.get_display_options(field_config) field_key = html.escape(field_config.get("key", ""), quote=True) # Normalize data steps = self._normalize_steps(data) if not steps: return '
No trace steps found
' # Extract task info task_desc = "" site = "" if isinstance(data, dict): task_desc = data.get("task_description", "") site = data.get("site", "") max_w = options.get("screenshot_max_width", 800) max_h = options.get("screenshot_max_height", 600) filmstrip_size = options.get("filmstrip_size", 80) # Serialize steps for JS steps_json = html.escape(json.dumps(steps, ensure_ascii=False), quote=True) css = self._build_css(max_w, max_h, filmstrip_size) # Task header task_html = "" if task_desc: escaped_task = html.escape(str(task_desc)) task_html = f'
Task: {escaped_task}
' if site: escaped_site = html.escape(str(site)) task_html += f'
Site: {escaped_site}
' # Build first step display (JS will handle subsequent navigation) first_step = steps[0] screenshot_html = self._render_screenshot(first_step, max_w, max_h) details_html = self._render_step_details(first_step, 0, len(steps), options) filmstrip_html = self._render_filmstrip(steps, filmstrip_size) if options.get("show_filmstrip", True) else "" # Per-step annotation container per_step_html = '
' # Playback data attributes auto_playback = options.get("auto_playback", False) playback_delay = options.get("playback_step_delay", 2.0) playback_attrs = "" if auto_playback: playback_attrs = f' data-auto-playback="true" data-playback-step-delay="{playback_delay}"' return f'''
{task_html}
Step 0 screenshot
Step 1 of {len(steps)}
{details_html} {per_step_html}
{filmstrip_html}
''' def _normalize_steps(self, data: Any) -> List[Dict[str, Any]]: """Normalize input data to a list of step dicts.""" if isinstance(data, dict): steps = data.get("steps", []) elif isinstance(data, list): steps = data else: return [] normalized = [] for i, step in enumerate(steps): if not isinstance(step, dict): continue normalized.append({ "step_index": step.get("step_index", i), "screenshot_url": step.get("screenshot_url", step.get("screenshot", "")), "action_type": step.get("action_type", "unknown"), "element": step.get("element", {}), "coordinates": step.get("coordinates", {}), "mouse_path": step.get("mouse_path", []), "thought": step.get("thought", ""), "observation": step.get("observation", ""), "timestamp": step.get("timestamp", ""), "viewport": step.get("viewport", {"width": 1280, "height": 720}), "typed_text": step.get("typed_text", step.get("value", "")), "scroll_direction": step.get("scroll_direction", step.get("direction", "")), }) return normalized def _render_screenshot(self, step: Dict, max_w: int, max_h: int) -> str: """Render the screenshot image element.""" url = step.get("screenshot_url", "") if not url: return '
No screenshot available
' escaped_url = html.escape(str(url), quote=True) return f'Step screenshot' def _render_step_details(self, step: Dict, index: int, total: int, options: Dict) -> str: """Render the step details panel content.""" action_type = step.get("action_type", "unknown") safe_type = html.escape(str(action_type), quote=True) colors = ACTION_TYPE_COLORS.get(action_type, DEFAULT_ACTION_COLOR) parts = [] # Action badge parts.append( f'
' f'{safe_type.upper()}
' ) # Timestamp ts = step.get("timestamp", "") if ts: parts.append(f'
t={html.escape(str(ts))}s
') # Thought thought = step.get("thought", "") if thought and options.get("show_thought", True): parts.append( f'
' f'Thought: {html.escape(str(thought))}' f'
' ) # Element info element = step.get("element", {}) if element and options.get("show_element_info", True): elem_parts = [] if isinstance(element, dict): for k in ("tag", "text", "id", "class"): if k in element: elem_parts.append(f'{k}="{html.escape(str(element[k]))}"') if elem_parts: parts.append( f'
' f'Element: {" ".join(elem_parts)}' f'
' ) # Coordinates coords = step.get("coordinates", {}) if coords and isinstance(coords, dict): x = coords.get("x", "") y = coords.get("y", "") if x or y: parts.append( f'
' f'Coords: ({html.escape(str(x))}, {html.escape(str(y))})' f'
' ) # Typed text (for type actions) typed = step.get("typed_text", "") if typed: parts.append( f'
' f'Typed: "{html.escape(str(typed))}"' f'
' ) # Observation obs = step.get("observation", "") if obs and options.get("show_observation", True): parts.append( f'
' f'Observation: {html.escape(str(obs))}' f'
' ) return f'
{"".join(parts)}
' def _render_filmstrip(self, steps: List[Dict], thumb_size: int) -> str: """Render the filmstrip thumbnail navigation bar.""" thumbs = [] for i, step in enumerate(steps): url = step.get("screenshot_url", "") active = "filmstrip-active" if i == 0 else "" if url: escaped_url = html.escape(str(url), quote=True) thumbs.append( f'
' f'Step {i}' f'{i + 1}' f'
' ) else: thumbs.append( f'
' f'
?
' f'{i + 1}' f'
' ) return f'
{"".join(thumbs)}
' def _build_css(self, max_w: int, max_h: int, filmstrip_size: int) -> str: """Build CSS for the web agent viewer.""" return f''' .web-agent-viewer {{ font-family: inherit; }} .web-agent-task, .web-agent-site {{ padding: 6px 12px; margin-bottom: 8px; background: #f8f9fa; border-radius: 4px; font-size: 0.95em; }} .web-agent-main {{ display: flex; gap: 16px; margin-bottom: 12px; }} .screenshot-panel {{ flex: 0 0 auto; max-width: {max_w}px; }} .screenshot-container {{ position: relative; display: inline-block; border: 1px solid #ddd; border-radius: 6px; overflow: hidden; background: #1a1a1a; }} .step-screenshot {{ display: block; max-width: {max_w}px; max-height: {max_h}px; width: auto; height: auto; }} .overlay-layer {{ position: absolute; top: 0; left: 0; width: 100%; height: 100%; pointer-events: none; }} .screenshot-placeholder {{ width: {max_w}px; height: 300px; display: flex; align-items: center; justify-content: center; background: #f0f0f0; color: #999; font-size: 0.9em; }} .step-nav {{ display: flex; align-items: center; justify-content: center; gap: 12px; padding: 8px 0; }} .step-nav button {{ padding: 4px 12px; border: 1px solid #ccc; border-radius: 4px; background: #fff; cursor: pointer; font-size: 0.85em; }} .step-nav button:disabled {{ opacity: 0.4; cursor: default; }} .step-nav button:not(:disabled):hover {{ background: #e8f4fd; border-color: #2196F3; }} .step-counter {{ font-size: 0.9em; font-weight: 600; color: #555; }} .step-details-panel {{ flex: 1; min-width: 250px; max-width: 400px; border: 1px solid #e0e0e0; border-radius: 6px; padding: 12px; background: #fafafa; overflow-y: auto; max-height: {max_h + 50}px; }} .step-details-content {{ display: flex; flex-direction: column; gap: 8px; }} .action-badge {{ display: inline-block; padding: 4px 12px; border-radius: 4px; font-weight: 700; font-size: 0.8em; letter-spacing: 0.5px; }} .step-timestamp {{ color: #888; font-size: 0.8em; }} .step-thought {{ padding: 8px; background: #e8f4fd; border-left: 3px solid #2196F3; border-radius: 4px; font-size: 0.9em; }} .step-element {{ font-size: 0.85em; color: #555; }} .step-element code {{ background: #f0f0f0; padding: 2px 4px; border-radius: 2px; font-size: 0.9em; }} .step-coords {{ font-size: 0.85em; color: #666; }} .step-typed {{ padding: 6px 8px; background: #e8f4fd; border-radius: 4px; font-size: 0.9em; }} .step-observation {{ padding: 8px; background: #e8f5e9; border-left: 3px solid #4CAF50; border-radius: 4px; font-size: 0.9em; }} .filmstrip {{ display: flex; gap: 4px; overflow-x: auto; padding: 8px 4px; background: #f5f5f5; border-radius: 6px; margin-top: 4px; }} .filmstrip-thumb {{ flex: 0 0 auto; width: {filmstrip_size}px; cursor: pointer; border: 2px solid transparent; border-radius: 4px; overflow: hidden; text-align: center; background: #fff; transition: border-color 0.2s; }} .filmstrip-thumb:hover {{ border-color: #90CAF9; }} .filmstrip-thumb.filmstrip-active {{ border-color: #2196F3; }} .filmstrip-thumb img {{ width: 100%; height: {int(filmstrip_size * 0.65)}px; object-fit: cover; display: block; }} .filmstrip-placeholder {{ width: 100%; height: {int(filmstrip_size * 0.65)}px; display: flex; align-items: center; justify-content: center; background: #eee; color: #999; font-size: 0.8em; }} .filmstrip-label {{ font-size: 0.7em; color: #666; padding: 2px 0; }} .overlay-controls {{ display: flex; gap: 12px; padding: 6px 0; font-size: 0.8em; color: #555; }} .overlay-controls label {{ display: flex; align-items: center; gap: 4px; cursor: pointer; }} .web-agent-per-step-annotations {{ margin-top: 12px; padding-top: 12px; border-top: 1px solid #e0e0e0; }} /* SVG overlay styles */ .overlay-click-marker {{ }} .overlay-bbox {{ }} .overlay-mouse-path {{ }} .overlay-scroll {{ }} @keyframes pulse-marker {{ 0%, 100% {{ r: 8; opacity: 1; }} 50% {{ r: 12; opacity: 0.7; }} }} @media (max-width: 768px) {{ .web-agent-main {{ flex-direction: column; }} .step-details-panel {{ max-width: none; max-height: none; }} .screenshot-panel {{ max-width: 100%; }} .step-screenshot {{ max-width: 100%; }} }} ''' def get_css_classes(self, field_config: Dict[str, Any]) -> List[str]: classes = super().get_css_classes(field_config) return classes def get_data_attributes(self, field_config: Dict[str, Any], data: Any) -> Dict[str, str]: attrs = super().get_data_attributes(field_config, data) return attrs