# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. """ Html Design Agent Environment Implementation. Renders HTML with a headless Playwright browser and scores it on four dimensions from AGENTS.md: R = 0.25*R_branding + 0.25*R_spacing + 0.25*R_a11y + 0.25*R_composition Three tasks of increasing difficulty: - level1_accessibility : (easy) add missing alt/aria-label/label attributes - level2_spacing : (medium) fix off-grid spacing to the 8pt system - level3_contrast : (hard) fix colours, spacing AND accessibility together """ from __future__ import annotations import json import math import os from dataclasses import dataclass, field from typing import Any, Dict, List, Optional, Tuple from uuid import uuid4 from openenv.core.env_server.interfaces import Environment from openenv.core.env_server.types import State try: from ..models import HtmlDesignAgentAction, HtmlDesignAgentObservation except ImportError: from models import HtmlDesignAgentAction, HtmlDesignAgentObservation # --------------------------------------------------------------------------- # Brand design tokens (single source of truth for all tasks) # --------------------------------------------------------------------------- DESIGN_TOKENS: Dict[str, Any] = { "palette": { "primary": "#1A1A2E", # dark navy – headings, body text "accent": "#E94560", # brand red – CTAs, highlights "white": "#FFFFFF", # page background, reversed text "surface": "#F0F0F5", # card backgrounds "muted": "#646478", # secondary / caption text }, "palette_rgb": { "primary": (26, 26, 46), "accent": (233, 69, 96), "white": (255, 255, 255), "surface": (240, 240, 245), "muted": (100, 100, 120), }, "fonts": ["Inter", "Roboto", "Open Sans", "system-ui", "sans-serif", "-apple-system"], "spacing_scale": [0, 4, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 96, 128], "min_contrast_ratio": 4.5, "max_color_delta_e": 2.0, } # --------------------------------------------------------------------------- # Task definitions # --------------------------------------------------------------------------- @dataclass class TaskDefinition: task_id: str description: str difficulty: str # "easy" | "medium" | "hard" broken_html: str done_threshold: float # episode ends when total reward >= this TASKS: Dict[str, TaskDefinition] = { # ------------------------------------------------------------------ # Level 1 – level1_accessibility (EASY) # Only structural HTML attributes are missing: alt, aria-label, labels. # Colours and spacing are already correct — agent only needs to add # missing attributes, no design knowledge required. # done_threshold is lenient (0.80) because the scorer also considers # composition/branding which are already correct. # ------------------------------------------------------------------ "level1_accessibility": TaskDefinition( task_id="level1_accessibility", description=( "Easy — fix accessibility only: add alt text to images, " "aria-label to icon buttons, and to form inputs. " "Colours and spacing are already correct." ), difficulty="easy", done_threshold=0.80, broken_html="""

Contact Us

""", ), # ------------------------------------------------------------------ # Level 2 – level2_spacing (MEDIUM) # Colours are correct (brand palette used throughout) but every # spacing value is off-grid (13 px, 5 px, 7 px, 10 px, 22 px …). # Agent must understand the 8pt grid system and fix all padding / # margin / gap values while keeping colours and accessibility intact. # ------------------------------------------------------------------ "level2_spacing": TaskDefinition( task_id="level2_spacing", description=( "Medium — fix spacing only: all padding/margin/gap values must " "be multiples of 8 (0,8,16,24,32,48,64…). " "Colours and accessibility are already correct." ), difficulty="medium", done_threshold=0.85, broken_html="""

Build Faster, Ship Smarter

The platform trusted by 10,000+ developers worldwide.

Fast Performance

99.9% Uptime

24/7 Support

""", ), # ------------------------------------------------------------------ # Level 3 – level3_contrast (HARD) # Everything is broken simultaneously: low-contrast colours (#CCC on # white), off-grid spacing, AND missing accessibility attributes. # The agent must fix all three dimensions at once — requiring colour # knowledge (WCAG 4.5:1, brand palette), grid alignment (8pt), and # structural HTML fixes (alt, aria-label, labels). # ------------------------------------------------------------------ "level3_contrast": TaskDefinition( task_id="level3_contrast", description=( "Hard — fix everything: low-contrast colours (use brand palette), " "off-grid spacing (multiples of 8), and missing accessibility " "attributes (alt, aria-label, labels). All four reward dimensions active." ), difficulty="hard", done_threshold=0.88, broken_html="""

Build Faster, Ship Smarter

The platform trusted by 10,000+ developers worldwide.

Fast Performance

Sub-10ms latency.

99.9% Uptime

Always available.

24/7 Support

We've got you covered.

""", ), } # --------------------------------------------------------------------------- # Colour-science helpers # --------------------------------------------------------------------------- def _srgb_to_linear(c: int) -> float: v = c / 255.0 return v / 12.92 if v <= 0.04045 else ((v + 0.055) / 1.055) ** 2.4 def _relative_luminance(r: int, g: int, b: int) -> float: return ( 0.2126 * _srgb_to_linear(r) + 0.7152 * _srgb_to_linear(g) + 0.0722 * _srgb_to_linear(b) ) def _wcag_contrast(rgb1: Tuple[int, int, int], rgb2: Tuple[int, int, int]) -> float: l1 = _relative_luminance(*rgb1) l2 = _relative_luminance(*rgb2) lighter, darker = max(l1, l2), min(l1, l2) return (lighter + 0.05) / (darker + 0.05) def _xyz_to_lab(x: float, y: float, z: float) -> Tuple[float, float, float]: x, y, z = x / 0.95047, y / 1.00000, z / 1.08883 def f(t: float) -> float: return t ** (1 / 3) if t > 0.008856 else 7.787 * t + 16 / 116 return 116 * f(y) - 16, 500 * (f(x) - f(y)), 200 * (f(y) - f(z)) def _rgb_to_lab(r: int, g: int, b: int) -> Tuple[float, float, float]: lr, lg, lb = _srgb_to_linear(r), _srgb_to_linear(g), _srgb_to_linear(b) x = lr * 0.4124564 + lg * 0.3575761 + lb * 0.1804375 y = lr * 0.2126729 + lg * 0.7151522 + lb * 0.0721750 z = lr * 0.0193339 + lg * 0.1191920 + lb * 0.9503041 return _xyz_to_lab(x, y, z) def _delta_e(rgb1: Tuple[int, int, int], rgb2: Tuple[int, int, int]) -> float: L1, a1, b1 = _rgb_to_lab(*rgb1) L2, a2, b2 = _rgb_to_lab(*rgb2) return math.sqrt((L1 - L2) ** 2 + (a1 - a2) ** 2 + (b1 - b2) ** 2) def _closest_brand_delta_e(rgb: Tuple[int, int, int]) -> float: palette = DESIGN_TOKENS["palette_rgb"] return min(_delta_e(rgb, brand_rgb) for brand_rgb in palette.values()) # --------------------------------------------------------------------------- # JavaScript snippets executed inside the headless page # --------------------------------------------------------------------------- _JS_GET_TEXT_COLORS = """ () => { function parseRgb(s) { const m = s.match(/rgba?\\((\\d+),\\s*(\\d+),\\s*(\\d+)/); return m ? [+m[1], +m[2], +m[3]] : null; } const sel = 'h1,h2,h3,h4,h5,h6,p,span,a,button,label,li,td,th,caption,small,strong,em'; const result = []; document.querySelectorAll(sel).forEach(el => { const s = window.getComputedStyle(el); const fg = parseRgb(s.color); const bg = parseRgb(s.backgroundColor); if (fg && bg) result.push({fg, bg, tag: el.tagName}); }); return result; } """ _JS_GET_SPACING_VIOLATIONS = """ () => { const GRID = 8; const violations = []; let total = 0; document.querySelectorAll('*').forEach(el => { const s = window.getComputedStyle(el); const props = [ 'paddingTop','paddingRight','paddingBottom','paddingLeft', 'marginTop','marginRight','marginBottom','marginLeft', ]; if (s.display === 'flex' || s.display === 'grid') { props.push('gap','rowGap','columnGap'); } props.forEach(p => { const v = parseFloat(s[p]); if (!isNaN(v) && v > 0) { total++; if (v % GRID !== 0) violations.push({tag: el.tagName, prop: p, value: v}); } }); }); return {violations, total}; } """ _JS_GET_A11Y_ISSUES = """ () => { const issues = []; // Images without alt attribute document.querySelectorAll('img').forEach(img => { if (!img.hasAttribute('alt')) issues.push('img missing alt attribute'); }); // Buttons without accessible name document.querySelectorAll('button').forEach(btn => { const name = (btn.textContent || '').trim() || btn.getAttribute('aria-label') || btn.getAttribute('title'); if (!name) issues.push('button missing accessible name (add text or aria-label)'); }); // Inputs without label const inputSel = 'input:not([type=hidden]):not([type=submit]):not([type=button]):not([type=reset])'; document.querySelectorAll(inputSel).forEach(input => { const id = input.id; const hasLabel = id && document.querySelector('label[for="' + id + '"]'); const hasAria = input.getAttribute('aria-label') || input.getAttribute('aria-labelledby'); if (!hasLabel && !hasAria) issues.push('input[type=' + (input.type || 'text') + '] missing associated label'); }); return issues; } """ _JS_GET_COMPOSITION = """ () => { const vw = window.innerWidth, vh = window.innerHeight; const mx = vw / 2, my = vh / 2; let lw = 0, rw = 0, tw = 0, bw = 0; document.querySelectorAll('*').forEach(el => { const r = el.getBoundingClientRect(); if (r.width === 0 || r.height === 0) return; const s = window.getComputedStyle(el); if (s.display === 'none' || s.visibility === 'hidden' || s.opacity === '0') return; const area = r.width * r.height; const cx = r.left + r.width / 2; const cy = r.top + r.height / 2; if (cx < mx) lw += area; else rw += area; if (cy < my) tw += area; else bw += area; }); return {lw, rw, tw, bw}; } """ _JS_DOM_SUMMARY = """ () => { function walk(el, depth) { if (depth > 3) return null; const children = Array.from(el.children) .slice(0, 6) .map(c => walk(c, depth + 1)) .filter(Boolean); return { tag: el.tagName.toLowerCase(), cls: Array.from(el.classList).slice(0, 3).join(' '), children, }; } return JSON.stringify(walk(document.body, 0)); } """ # --------------------------------------------------------------------------- # Playwright evaluator # --------------------------------------------------------------------------- class DesignEvaluator: """ Wraps a headless Chromium browser for HTML scoring. One instance is shared for the lifetime of the environment. """ def __init__(self) -> None: from playwright.sync_api import sync_playwright # lazy import self._pw = sync_playwright().start() self._browser = self._pw.chromium.launch(headless=True) # ------------------------------------------------------------------ def evaluate( self, html: str, task: TaskDefinition, ) -> Tuple[float, Dict[str, float], List[str], str]: """ Render *html* and return (total_reward, breakdown, violations, dom_summary). """ page = self._browser.new_page(viewport={"width": 1280, "height": 720}) try: page.set_content(html, wait_until="domcontentloaded") r_branding, brand_violations = self._score_branding(page) r_spacing, spacing_violations = self._score_spacing(page) r_a11y, a11y_violations = self._score_a11y(page) r_composition, comp_violations = self._score_composition(page) breakdown = { "branding": round(r_branding, 3), "spacing": round(r_spacing, 3), "a11y": round(r_a11y, 3), "composition": round(r_composition, 3), } total = (r_branding + r_spacing + r_a11y + r_composition) / 4.0 violations = brand_violations + spacing_violations + a11y_violations + comp_violations try: dom_summary = page.evaluate(_JS_DOM_SUMMARY) or "" except Exception: dom_summary = "" return round(total, 4), breakdown, violations, dom_summary finally: page.close() # ------------------------------------------------------------------ def _score_branding(self, page: Any) -> Tuple[float, List[str]]: """ R_branding – two sub-checks: 1. WCAG contrast ratio >= 4.5:1 for text pairs. 2. Foreground/background colours are within ΔE* 2.0 of brand palette. Each failing pair contributes a proportional penalty. """ violations: List[str] = [] try: pairs = page.evaluate(_JS_GET_TEXT_COLORS) except Exception: return 1.0, [] if not pairs: return 1.0, [] total = len(pairs) passed = 0 max_delta = DESIGN_TOKENS["max_color_delta_e"] min_contrast = DESIGN_TOKENS["min_contrast_ratio"] for item in pairs: fg = tuple(item["fg"]) bg = tuple(item["bg"]) tag = item.get("tag", "?") contrast = _wcag_contrast(fg, bg) fg_delta = _closest_brand_delta_e(fg) bg_delta = _closest_brand_delta_e(bg) ok_contrast = contrast >= min_contrast ok_fg = fg_delta <= max_delta ok_bg = bg_delta <= max_delta if ok_contrast and ok_fg and ok_bg: passed += 1 else: if not ok_contrast: violations.append( f"<{tag}> contrast {contrast:.1f}:1 < {min_contrast}:1 " f"(fg={fg}, bg={bg})" ) if not ok_fg: violations.append( f"<{tag}> text colour {fg} not in brand palette (ΔE*={fg_delta:.1f})" ) score = passed / total if total else 1.0 return score, violations # ------------------------------------------------------------------ def _score_spacing(self, page: Any) -> Tuple[float, List[str]]: """ R_spacing – all padding/margin/gap values must be multiples of 8. +1.0 for 100 % compliance; -0.1 per off-grid value (clamped to 0). """ violations: List[str] = [] try: result = page.evaluate(_JS_GET_SPACING_VIOLATIONS) except Exception: return 1.0, [] total = result.get("total", 0) bad_items = result.get("violations", []) if total == 0: return 1.0, [] for item in bad_items[:10]: # cap reported violations violations.append( f"<{item['tag']}> {item['prop']}: {item['value']}px is not a multiple of 8" ) score = max(0.0, 1.0 - (len(bad_items) / total)) return score, violations # ------------------------------------------------------------------ def _score_a11y(self, page: Any) -> Tuple[float, List[str]]: """ R_a11y – checks: • All have alt attributes. • All