Spaces:

amaljoe88
/

vision-coder-openenv

Sleeping

App Files Files Community

vision-coder-openenv / src /server /rewards /structural_rewards.py

amaljoe88

deploy: sync 712e5bc -> HF

cf6c0e0 about 1 month ago

raw

history blame contribute delete

3.25 kB

	"""Structural reward: DOM tag-sequence similarity + style coverage."""
	from __future__ import annotations

	from difflib import SequenceMatcher
	from typing import Optional

	from openenv.server.rewards import extract_html


	def _get_tag_sequence(html: str) -> list[str]:
	from bs4 import BeautifulSoup
	soup = BeautifulSoup(html, "html.parser")
	return [t.name for t in soup.find_all() if t.name]


	def _get_css_classes(html: str) -> set[str]:
	from bs4 import BeautifulSoup
	soup = BeautifulSoup(html, "html.parser")
	classes: set[str] = set()
	for tag in soup.find_all(class_=True):
	classes.update(tag.get("class", []))
	return classes


	def _get_inline_style_props(html: str) -> set[str]:
	"""Return the set of CSS property names used in inline style attributes."""
	from bs4 import BeautifulSoup
	soup = BeautifulSoup(html, "html.parser")
	props: set[str] = set()
	for tag in soup.find_all(style=True):
	for part in tag.get("style", "").split(";"):
	part = part.strip()
	if ":" in part:
	prop = part.split(":", 1)[0].strip().lower()
	if prop:
	props.add(prop)
	return props


	def structural_similarity_reward(
	completions: list[list[dict]],
	solution: Optional[list[str]] = None,
	) -> list[float]:
	"""Score structural similarity between generated and reference HTML.

	Computes:
	- Tag-sequence similarity via difflib SequenceMatcher (0–0.5)
	- Style coverage:
	* CSS class overlap when reference uses class-based CSS (0–0.5)
	* Inline style property overlap when ref uses inline CSS (0–0.5)

	Inline style property overlap penalises blank/unstyled predictions
	against styled references without hurting the perfect-match case.

	Args:
	completions: List of completion message lists.
	solution: List of reference HTML strings (one per completion).

	Returns:
	List of float scores in [0.0, 1.0].
	"""
	results = []
	for i, completion in enumerate(completions):
	content = completion[0]["content"]
	pred_html = extract_html(content)
	ref_html = solution[i] if solution and i < len(solution) else ""

	try:
	pred_tags = _get_tag_sequence(pred_html)
	ref_tags = _get_tag_sequence(ref_html)
	tag_sim = SequenceMatcher(None, pred_tags, ref_tags).ratio()

	ref_classes = _get_css_classes(ref_html)
	if ref_classes:
	pred_classes = _get_css_classes(pred_html)
	style_score = len(pred_classes & ref_classes) / len(ref_classes)
	else:
	# Reference uses inline styles — compare CSS property coverage
	ref_props = _get_inline_style_props(ref_html)
	if ref_props:
	pred_props = _get_inline_style_props(pred_html)
	style_score = len(pred_props & ref_props) / len(ref_props)
	else:
	style_score = 1.0 # ref has no styling at all → neutral

	score = 0.5 * tag_sim + 0.5 * style_score
	except Exception:
	score = 0.0

	results.append(score)
	return results