"""Structural reward: DOM tag-sequence similarity + style coverage.""" from __future__ import annotations from difflib import SequenceMatcher from typing import Optional from openenv.server.rewards import extract_html def _get_tag_sequence(html: str) -> list[str]: from bs4 import BeautifulSoup soup = BeautifulSoup(html, "html.parser") return [t.name for t in soup.find_all() if t.name] def _get_css_classes(html: str) -> set[str]: from bs4 import BeautifulSoup soup = BeautifulSoup(html, "html.parser") classes: set[str] = set() for tag in soup.find_all(class_=True): classes.update(tag.get("class", [])) return classes def _get_inline_style_props(html: str) -> set[str]: """Return the set of CSS property names used in inline style attributes.""" from bs4 import BeautifulSoup soup = BeautifulSoup(html, "html.parser") props: set[str] = set() for tag in soup.find_all(style=True): for part in tag.get("style", "").split(";"): part = part.strip() if ":" in part: prop = part.split(":", 1)[0].strip().lower() if prop: props.add(prop) return props def structural_similarity_reward( completions: list[list[dict]], solution: Optional[list[str]] = None, ) -> list[float]: """Score structural similarity between generated and reference HTML. Computes: - Tag-sequence similarity via difflib SequenceMatcher (0–0.5) - Style coverage: * CSS class overlap when reference uses class-based CSS (0–0.5) * Inline style property overlap when ref uses inline CSS (0–0.5) Inline style property overlap penalises blank/unstyled predictions against styled references without hurting the perfect-match case. Args: completions: List of completion message lists. solution: List of reference HTML strings (one per completion). Returns: List of float scores in [0.0, 1.0]. """ results = [] for i, completion in enumerate(completions): content = completion[0]["content"] pred_html = extract_html(content) ref_html = solution[i] if solution and i < len(solution) else "" try: pred_tags = _get_tag_sequence(pred_html) ref_tags = _get_tag_sequence(ref_html) tag_sim = SequenceMatcher(None, pred_tags, ref_tags).ratio() ref_classes = _get_css_classes(ref_html) if ref_classes: pred_classes = _get_css_classes(pred_html) style_score = len(pred_classes & ref_classes) / len(ref_classes) else: # Reference uses inline styles — compare CSS property coverage ref_props = _get_inline_style_props(ref_html) if ref_props: pred_props = _get_inline_style_props(pred_html) style_score = len(pred_props & ref_props) / len(ref_props) else: style_score = 1.0 # ref has no styling at all → neutral score = 0.5 * tag_sim + 0.5 * style_score except Exception: score = 0.0 results.append(score) return results