Spaces:

amaljoe88
/

vision-coder-openenv

Running

App Files Files Community

vision-coder-openenv / src /agents.py

amaljoe88

deploy: sync 712e5bc -> HF

cf6c0e0 16 days ago

raw

history blame contribute delete

32.5 kB

	"""Developer and Critic agents for VisionCoder OpenEnv.

	All agent logic (tool-call handling, TODO-list critique, episode loop)
	lives here. Prompts are in openenv.prompts.

	Usage:
	from openenv.agents import run_episode, AgentConfig

	config = AgentConfig(api_key=..., api_base=..., model=...)
	result = run_episode(env_client, config, difficulty="hard", session=obs, dbg=dbg)
	"""
	from __future__ import annotations

	import json
	import logging
	import os
	import re
	from dataclasses import dataclass, field
	from typing import List, Optional, Tuple

	from openai import OpenAI

	from openenv.prompts import (
	DEVELOPER_SYSTEM,
	FIRST_CRITIC_SYSTEM,
	SUBSEQUENT_CRITIC_SYSTEM,
	FALLBACK_HTML,
	)

	logger = logging.getLogger(__name__)



	# ---------------------------------------------------------------------------
	# TODO list tracker
	# ---------------------------------------------------------------------------

	_PRIORITY_ORDER = {"HIGH": 0, "MEDIUM": 1, "LOW": 2}


	@dataclass
	class TodoItem:
	text: str # full item text including "PRIORITY \| DIMENSION — description"
	done: bool = False
	priority: str = "MEDIUM" # HIGH / MEDIUM / LOW


	@dataclass
	class TodoList:
	items: List[TodoItem] = field(default_factory=list)

	def all_done(self) -> bool:
	return bool(self.items) and all(item.done for item in self.items)

	def pending_count(self) -> int:
	return sum(1 for item in self.items if not item.done)

	def format_for_critic(self) -> str:
	"""Previous TODO list passed to Critic — includes priority tag for exact copying."""
	if not self.items:
	return "(No previous TODO list — this is the first review.)"
	lines = ["Previous TODO list (copy with EXACT priority and text, update only the markers):"]
	for item in self.items:
	marker = "[✓]" if item.done else "[ ]"
	lines.append(f"{marker} {item.priority} \| {item.text}")
	return "\n".join(lines)

	def format_for_developer(self) -> str:
	"""Pending items sorted by priority, formatted as actionable critique."""
	_NOISE_PHRASES = (
	"matches the reference", "which matches",
	"is present and correct", "is correct", "matches reference",
	)
	pending = [
	item for item in self.items
	if not item.done
	and not any(p in item.text.lower() for p in _NOISE_PHRASES)
	]
	if not pending:
	return (
	"The Critic found no remaining issues. Look carefully at the reference "
	"screenshot for fine details (spacing, colors, missing elements) and refine."
	)
	# Sort by priority: HIGH first, cap at 8 so Developer gets focused feedback
	pending.sort(key=lambda it: _PRIORITY_ORDER.get(it.priority, 1))
	pending = pending[:8]
	lines = ["Fix these issues in priority order (Critic feedback):"]
	for item in pending:
	lines.append(f"- [{item.priority}] {item.text}")
	return "\n".join(lines)

	@classmethod
	def parse(cls, text: str) -> "TodoList":
	"""Parse a TODO list from Critic output text.

	Expected item format: [✓/[ ]/[+]] PRIORITY \| DIMENSION — description
	Priority tag (HIGH/MEDIUM/LOW) is optional — defaults to MEDIUM if absent.

	[+] items are always kept pending (can't resolve same step they're discovered).
	Duplicate and truncated items are dropped.
	"""
	_TRUNCATION_ENDINGS = (
	" in", " on", " at", " to", " of", " for", " and", " the",
	" a", " an", " with", " by", " from", " as", " or", " but",
	)
	_VALID_PRIORITIES = {"HIGH", "MEDIUM", "LOW"}

	result = cls()
	seen: set = set()
	for line in text.split("\n"):
	line = line.strip()
	if line.startswith("[✓]"):
	item_text = line[3:].strip()
	done = True
	elif line.startswith("[ ]"):
	item_text = line[3:].strip()
	done = False
	elif line.startswith("[+]"):
	item_text = line[3:].strip()
	done = False
	else:
	continue
	if len(item_text) < 10:
	continue
	if any(item_text.lower().endswith(e) for e in _TRUNCATION_ENDINGS):
	continue
	# Extract priority if present: "HIGH \| LAYOUT — ..."
	priority = "MEDIUM"
	parts = item_text.split("\|", 1)
	if len(parts) == 2:
	candidate = parts[0].strip().upper()
	if candidate in _VALID_PRIORITIES:
	priority = candidate
	item_text = parts[1].strip()
	key = item_text.lower()[:60]
	if key not in seen:
	seen.add(key)
	result.items.append(TodoItem(text=item_text, done=done, priority=priority))
	return result

	@classmethod
	def merge(cls, prev: "TodoList", updated: "TodoList") -> "TodoList":
	"""Merge updated list back — re-adds any pending prev items the Critic forgot.

	Uses 40-char prefix matching so paraphrased items count as the same issue.
	Resolved prev items (done=True) are never re-added.
	New [+] items introduced in this step are capped at 3 by priority so the
	list doesn't balloon when the model ignores the per-step limit.
	"""
	prev_prefixes = {item.text.lower()[:40] for item in prev.items}

	# Separate carried items (also in prev) from genuinely new [+] items
	carried: list = []
	new_items: list = []
	for item in updated.items:
	if item.text.lower()[:40] in prev_prefixes:
	carried.append(item)
	else:
	new_items.append(item)

	# Keep at most 3 new items (highest priority first)
	new_items.sort(key=lambda it: _PRIORITY_ORDER.get(it.priority, 1))
	new_items = new_items[:3]

	result = cls(items=carried + new_items)
	updated_prefixes = {item.text.lower()[:40] for item in result.items}

	# Re-add any pending prev items the Critic dropped entirely
	for prev_item in prev.items:
	if prev_item.done:
	continue
	if prev_item.text.lower()[:40] not in updated_prefixes:
	result.items.append(prev_item)
	return result


	# ---------------------------------------------------------------------------
	# HTML helpers
	# ---------------------------------------------------------------------------

	def _looks_like_html(text: str) -> bool:
	t = text.strip().lower()
	return t.startswith("<!doctype") or t.startswith("<html")


	def _parse_qwen_xml_tool_call(content: str) -> Optional[Tuple[str, dict]]:
	"""Fallback parser for Qwen3's XML tool call format when vllm hermes parser misses it."""
	if "<tool_call>" not in content:
	return None
	fn_m = re.search(r"<function=(\w+)>", content)
	if not fn_m:
	return None
	func_name = fn_m.group(1)
	args = {
	m.group(1): m.group(2).strip()
	for m in re.finditer(r"<parameter=(\w+)>(.*?)(?:</parameter>\|\Z)", content, re.DOTALL)
	}
	return (func_name, args) if args else None


	def _clean_html_output(content: str) -> str:
	"""Strip residual <tool_call> wrapper or markdown fences from model output."""
	parsed = _parse_qwen_xml_tool_call(content)
	if parsed:
	_, args = parsed
	if "html" in args:
	return args["html"]
	fence = re.match(r"```(?:html)?\s(.?)\s*```", content, re.DOTALL)
	if fence:
	return fence.group(1)
	return content


	# ---------------------------------------------------------------------------
	# Developer agent
	# ---------------------------------------------------------------------------

	def developer_turn(
	client: OpenAI,
	env_client, # unused — kept for signature compatibility
	model: str,
	ref_b64: str,
	current_html: str,
	todo: Optional[TodoList] = None,
	dbg=None,
	) -> str:
	"""Developer generates HTML from the reference screenshot in a single LLM call.

	No tools — rendering is the environment's responsibility after step().
	On subsequent steps the Critic's TODO list is included so the Developer
	knows exactly what to fix.
	"""
	if dbg:
	dbg.log_developer_input(current_html, todo.format_for_developer() if todo else None)

	messages = [{"role": "system", "content": DEVELOPER_SYSTEM}]

	user_content: list = [
	{"type": "text", "text": "Reference screenshot (reproduce this UI):"},
	{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{ref_b64}"}},
	]

	if current_html and todo and todo.items:
	user_content.append({
	"type": "text",
	"text": (
	f"\n\nYour previous HTML:\n```html\n{current_html[:5000]}\n```\n\n"
	f"{todo.format_for_developer()}\n\n"
	"Output the revised HTML only."
	),
	})
	else:
	user_content.append({
	"type": "text",
	"text": "\n\nGenerate complete HTML with inline CSS. Output the HTML only.",
	})

	messages.append({"role": "user", "content": user_content})

	response = client.chat.completions.create(
	model=model,
	messages=messages,
	max_tokens=4096,
	temperature=0.7,
	)
	content = response.choices[0].message.content or ""
	html_out = _clean_html_output(content)
	if not _looks_like_html(html_out):
	html_out = FALLBACK_HTML
	if dbg:
	dbg.log_developer_output(html_out)
	return html_out


	# ---------------------------------------------------------------------------
	# Critic agent
	# ---------------------------------------------------------------------------

	def critic_turn(
	client: OpenAI,
	model: str,
	ref_b64: str,
	render_curr_b64: str,
	prev_todo: Optional[TodoList],
	render_prev_b64: Optional[str] = None,
	current_html: str = "",
	dbg=None,
	) -> Tuple[str, TodoList]:
	"""Critic reviews current render vs reference and returns (raw_text, updated TodoList).

	Receives the Developer's HTML source so it can write selector-specific CSS fixes
	instead of abstract visual observations.
	"""
	is_first = prev_todo is None

	if dbg:
	prev_critique_text = prev_todo.format_for_developer() if prev_todo else None
	dbg.log_critic_input(ref_b64, render_prev_b64, prev_critique_text, render_curr_b64)

	system = FIRST_CRITIC_SYSTEM if is_first else SUBSEQUENT_CRITIC_SYSTEM

	critic_messages = [{"role": "system", "content": system}]

	content: list = [
	{"type": "text", "text": "Reference screenshot:"},
	{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{ref_b64}"}},
	]

	if render_prev_b64 and prev_todo:
	content += [
	{"type": "text", "text": "Previous render (before this step's revision):"},
	{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{render_prev_b64}"}},
	]

	content += [
	{"type": "text", "text": "Current render:"},
	{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{render_curr_b64}"}},
	]

	if current_html:
	content.append({
	"type": "text",
	"text": (
	f"\nDeveloper's current HTML source (use exact selectors in your FIX instructions):\n"
	f"```html\n{current_html[:5000]}\n```"
	),
	})

	if is_first:
	content.append({
	"type": "text",
	"text": (
	"\nThis is the first review. Perform a comprehensive visual audit covering "
	"LAYOUT, STRUCTURE, COLOR, TYPOGRAPHY, SPACING, and TEXT dimensions. "
	"Output your initial TODO LIST with [+] items only. "
	"Each item MUST include a → FIX: instruction with exact CSS."
	),
	})
	else:
	content.append({
	"type": "text",
	"text": (
	f"\n{prev_todo.format_for_critic()}\n\n"
	"Update the TODO list based on what you see in the CURRENT RENDER and HTML. "
	"Mark fixed items [✓], keep unresolved items [ ] (update FIX selector if HTML changed), "
	"add new issues with [+]. Each item must have a → FIX: instruction. "
	"Stop after the last item — no STATUS or summary line."
	),
	})

	critic_messages.append({"role": "user", "content": content})

	response = client.chat.completions.create(
	model=model,
	messages=critic_messages,
	max_tokens=2048,
	temperature=0.1,
	)
	critique_text = response.choices[0].message.content or ""

	updated_todo = TodoList.parse(critique_text)
	if prev_todo:
	if updated_todo.all_done():
	# Critic explicitly marked every visible item [✓] — trust that signal.
	# Skipping merge avoids re-adding items the Critic intentionally resolved.
	pass
	else:
	updated_todo = TodoList.merge(prev_todo, updated_todo)

	if dbg:
	dbg.log_critic_output(critique_text, updated_todo)

	return critique_text, updated_todo


	# ---------------------------------------------------------------------------
	# Episode config
	# ---------------------------------------------------------------------------

	@dataclass
	class AgentConfig:
	api_key: str
	api_base: str
	model: str
	max_steps: int = 5


	# ---------------------------------------------------------------------------
	# Episode runner
	# ---------------------------------------------------------------------------

	@dataclass
	class StepResult:
	step: int
	html: str
	reward: float
	done: bool
	critique: str
	todo: Optional[TodoList]
	render_full_b64: Optional[str]
	sub_rewards: Optional[dict]
	error: Optional[str] = None


	def run_episode(
	env_client,
	config: AgentConfig,
	session_id: str,
	ref_b64: str,
	dbg=None,
	on_step=None, # optional callback(StepResult) → None, called immediately after env step
	) -> List[StepResult]:
	"""Run one full episode (Developer↔Critic loop) and return per-step results.

	Terminates when:
	- max_steps reached (env done=True)
	- Critic marks all TODO items resolved
	- No reward improvement for 2 consecutive steps (plateau)

	Monotonic reward guarantee: Developer always receives the best-seen HTML as
	its base, so regressions don't compound. If a step produces lower reward the
	Developer retries from the best-known state on the next step.
	"""
	client = OpenAI(api_key=config.api_key, base_url=config.api_base)

	current_html = ""
	best_html = ""
	best_reward = 0.0
	no_improve_streak = 0
	_MAX_NO_IMPROVE = 2

	todo: Optional[TodoList] = None
	render_prev: Optional[str] = None
	results: List[StepResult] = []

	for step_i in range(config.max_steps):
	# Guard: Critic resolved everything
	if todo is not None and todo.pending_count() == 0:
	break
	# Guard: plateau — no improvement for N consecutive steps
	if no_improve_streak >= _MAX_NO_IMPROVE:
	print(
	f"[CRITIC] No improvement for {_MAX_NO_IMPROVE} consecutive steps "
	f"(best={best_reward:.3f}) — stopping early.",
	flush=True,
	)
	break

	error: Optional[str] = None

	# Developer always starts from the best-seen HTML to avoid compounding regressions
	try:
	current_html = developer_turn(
	client, env_client, config.model,
	ref_b64, best_html, todo, dbg,
	)
	except Exception as exc:
	error = str(exc)[:120]
	current_html = FALLBACK_HTML

	# Step the environment
	step_resp = env_client.post(
	"/step",
	json={"html": current_html, "session_id": session_id},
	)
	step_resp.raise_for_status()
	result = step_resp.json()

	reward = float(result.get("reward", 0.0))
	env_done = bool(result.get("done", False))
	render_full = result.get("render_full")
	sub_rewards = result.get("metadata", {}).get("rewards")

	# Monotonic tracking — update best only on genuine improvement
	if reward > best_reward:
	best_reward = reward
	best_html = current_html
	no_improve_streak = 0
	else:
	no_improve_streak += 1

	if dbg:
	dbg.log_step_result(reward, env_done, render_full, sub_rewards)

	step_n = step_i + 1

	sr = StepResult(
	step=step_n,
	html=current_html,
	reward=reward,
	done=env_done,
	critique="",
	todo=todo,
	render_full_b64=render_full,
	sub_rewards=sub_rewards,
	error=error,
	)

	# Notify caller immediately so [STEP] prints before [CRITIC]
	if on_step:
	on_step(sr)

	# Critic turn (skip on final env step)
	if not env_done:
	try:
	critique_text, todo = critic_turn(
	client, config.model,
	ref_b64, render_full,
	prev_todo=todo,
	render_prev_b64=render_prev,
	current_html=current_html,
	dbg=dbg,
	)
	sr.critique = critique_text
	sr.todo = todo
	preview = critique_text.replace("\n", " ")[:200]
	print(
	f"[CRITIC] step={step_n} reward={reward:.3f} best={best_reward:.3f} → {preview}",
	flush=True,
	)
	except Exception as exc:
	logger.warning("Critic failed: %s", exc)
	todo = None

	results.append(sr)
	render_prev = render_full

	if env_done:
	break
	if todo is not None and todo.pending_count() == 0:
	print(
	f"[CRITIC] All items resolved at step={step_n} reward={reward:.3f} — stopping.",
	flush=True,
	)
	break

	return results


	# ---------------------------------------------------------------------------
	# Approach B: Long-horizon Developer (no Critic, sees full history)
	# ---------------------------------------------------------------------------

	def developer_turn_long_horizon(
	client: OpenAI,
	model: str,
	ref_b64: str,
	history: List[Tuple[str, str]], # list of (render_full_b64, html)
	dbg=None,
	) -> str:
	"""Developer with full history: reference + all previous renders + all previous HTML."""
	messages = [{"role": "system", "content": DEVELOPER_SYSTEM}]

	user_content: list = [
	{"type": "text", "text": "Reference screenshot (reproduce this UI):"},
	{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{ref_b64}"}},
	]

	if history:
	for i, (render_b64, prev_html) in enumerate(history, 1):
	user_content.append({
	"type": "text",
	"text": f"\n\nStep {i} render:",
	})
	user_content.append({
	"type": "image_url",
	"image_url": {"url": f"data:image/png;base64,{render_b64}"},
	})
	user_content.append({
	"type": "text",
	"text": f"Step {i} HTML:\n```html\n{prev_html[:2000]}\n```",
	})
	user_content.append({
	"type": "text",
	"text": (
	"\n\nAll your previous attempts are shown above. "
	"Generate improved HTML that better matches the reference. "
	"Output the HTML only."
	),
	})
	else:
	user_content.append({
	"type": "text",
	"text": "\n\nGenerate complete HTML with inline CSS. Output the HTML only.",
	})

	messages.append({"role": "user", "content": user_content})

	response = client.chat.completions.create(
	model=model,
	messages=messages,
	max_tokens=4096,
	temperature=0.7,
	)
	content = response.choices[0].message.content or ""
	html_out = _clean_html_output(content)
	return html_out if _looks_like_html(html_out) else FALLBACK_HTML


	def run_episode_long_dev(
	env_client,
	config: AgentConfig,
	session_id: str,
	ref_b64: str,
	dbg=None,
	on_step=None,
	) -> List[StepResult]:
	"""Approach B: Long-horizon Developer only — full history, no Critic."""
	client = OpenAI(api_key=config.api_key, base_url=config.api_base)

	current_html = ""
	history: List[Tuple[str, str]] = []
	results: List[StepResult] = []

	for step_i in range(config.max_steps):
	error: Optional[str] = None
	try:
	current_html = developer_turn_long_horizon(
	client, config.model, ref_b64, history, dbg
	)
	except Exception as exc:
	error = str(exc)[:120]
	current_html = FALLBACK_HTML

	step_resp = env_client.post(
	"/step",
	json={"html": current_html, "session_id": session_id},
	)
	step_resp.raise_for_status()
	result = step_resp.json()

	reward = float(result.get("reward", 0.0))
	env_done = bool(result.get("done", False))
	render_full = result.get("render_full")
	sub_rewards = result.get("metadata", {}).get("rewards")

	step_n = step_i + 1
	sr = StepResult(
	step=step_n,
	html=current_html,
	reward=reward,
	done=env_done,
	critique="",
	todo=None,
	render_full_b64=render_full,
	sub_rewards=sub_rewards,
	error=error,
	)
	if on_step:
	on_step(sr)

	if render_full:
	history.append((render_full, current_html))

	results.append(sr)
	if env_done:
	break

	return results


	# ---------------------------------------------------------------------------
	# Approach C: Short-horizon Developer (no Critic, sees only last render)
	# ---------------------------------------------------------------------------

	def developer_turn_short_horizon(
	client: OpenAI,
	model: str,
	ref_b64: str,
	prev_render_b64: Optional[str],
	prev_html: Optional[str],
	dbg=None,
	) -> str:
	"""Developer with short horizon: reference + only last render + only last HTML."""
	messages = [{"role": "system", "content": DEVELOPER_SYSTEM}]

	user_content: list = [
	{"type": "text", "text": "Reference screenshot (reproduce this UI):"},
	{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{ref_b64}"}},
	]

	if prev_render_b64 and prev_html:
	user_content += [
	{"type": "text", "text": "\n\nYour previous render:"},
	{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{prev_render_b64}"}},
	{
	"type": "text",
	"text": (
	f"\n\nYour previous HTML:\n```html\n{prev_html[:3000]}\n```\n\n"
	"Compare the renders and output improved HTML only."
	),
	},
	]
	else:
	user_content.append({
	"type": "text",
	"text": "\n\nGenerate complete HTML with inline CSS. Output the HTML only.",
	})

	messages.append({"role": "user", "content": user_content})

	response = client.chat.completions.create(
	model=model,
	messages=messages,
	max_tokens=4096,
	temperature=0.7,
	)
	content = response.choices[0].message.content or ""
	html_out = _clean_html_output(content)
	return html_out if _looks_like_html(html_out) else FALLBACK_HTML


	def run_episode_short_dev(
	env_client,
	config: AgentConfig,
	session_id: str,
	ref_b64: str,
	dbg=None,
	on_step=None,
	) -> List[StepResult]:
	"""Approach C: Short-horizon Developer only — sees only last render each step, no Critic."""
	client = OpenAI(api_key=config.api_key, base_url=config.api_base)

	current_html = ""
	prev_render: Optional[str] = None
	results: List[StepResult] = []

	for step_i in range(config.max_steps):
	error: Optional[str] = None
	try:
	current_html = developer_turn_short_horizon(
	client, config.model, ref_b64,
	prev_render,
	current_html if step_i > 0 else None,
	dbg,
	)
	except Exception as exc:
	error = str(exc)[:120]
	current_html = FALLBACK_HTML

	step_resp = env_client.post(
	"/step",
	json={"html": current_html, "session_id": session_id},
	)
	step_resp.raise_for_status()
	result = step_resp.json()

	reward = float(result.get("reward", 0.0))
	env_done = bool(result.get("done", False))
	render_full = result.get("render_full")
	sub_rewards = result.get("metadata", {}).get("rewards")

	step_n = step_i + 1
	sr = StepResult(
	step=step_n,
	html=current_html,
	reward=reward,
	done=env_done,
	critique="",
	todo=None,
	render_full_b64=render_full,
	sub_rewards=sub_rewards,
	error=error,
	)
	if on_step:
	on_step(sr)

	prev_render = render_full # only keep the latest render
	results.append(sr)
	if env_done:
	break

	return results


	# ---------------------------------------------------------------------------
	# Approach D: Long-horizon Developer (low-res renders) + simple free-form Critic
	# ---------------------------------------------------------------------------

	_SIMPLE_CRITIC_SYSTEM = (
	"You are a UI reviewer. You will be shown a reference screenshot and a current render "
	"of HTML that is meant to reproduce it.\n\n"
	"Describe what needs to change the most to make the render match the reference. "
	"Be concise and specific — mention exact colors, sizes, or elements where helpful. "
	"You can write a short paragraph or a bullet list. No structured format required."
	)

	_SIMPLE_DEV_SYSTEM = (
	"You are a UI-to-code expert. Given a reference screenshot of a web page, "
	"generate complete HTML with inline CSS that reproduces the layout as accurately as possible.\n\n"
	"Critical layout rules:\n"
	"- Always use `* { box-sizing: border-box; margin: 0; padding: 0; }` reset.\n"
	"- Page and all top-level sections must be full-width: `width: 100%; min-height: 100vh`.\n"
	"- Never center-constrain the overall page — only constrain inner content containers if the reference does.\n"
	"- Match background colors, section colors, and typography as precisely as possible.\n\n"
	"Output ONLY the raw HTML code starting with <!DOCTYPE html>. "
	"No explanations, no markdown fences — just the HTML."
	)


	def _simple_critic_turn(
	client: OpenAI,
	model: str,
	ref_b64: str,
	render_full_b64: str,
	) -> str:
	"""Simple free-form critic: compare ref vs render, say what needs to change most."""
	messages = [{"role": "system", "content": _SIMPLE_CRITIC_SYSTEM}]
	messages.append({"role": "user", "content": [
	{"type": "text", "text": "Reference:"},
	{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{ref_b64}"}},
	{"type": "text", "text": "Current render:"},
	{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{render_full_b64}"}},
	{"type": "text", "text": "What needs to change the most?"},
	]})
	response = client.chat.completions.create(
	model=model,
	messages=messages,
	max_tokens=512,
	temperature=0.1,
	)
	return response.choices[0].message.content or ""


	def _developer_turn_d(
	client: OpenAI,
	model: str,
	ref_b64: str,
	history: List[Tuple[str, str]], # (render_low_b64, html)
	critique: Optional[str],
	) -> str:
	"""Approach D developer: full-res ref + all previous low-res renders + all HTML + critic feedback."""
	messages = [{"role": "system", "content": _SIMPLE_DEV_SYSTEM}]

	user_content: list = [
	{"type": "text", "text": "Reference screenshot (reproduce this UI):"},
	{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{ref_b64}"}},
	]

	if history:
	for i, (render_low_b64, prev_html) in enumerate(history, 1):
	user_content.append({"type": "text", "text": f"\n\nStep {i} render (low-res preview):"})
	user_content.append({"type": "image_url", "image_url": {"url": f"data:image/png;base64,{render_low_b64}"}})
	user_content.append({"type": "text", "text": f"Step {i} HTML:\n```html\n{prev_html[:2000]}\n```"})

	if critique:
	user_content.append({
	"type": "text",
	"text": f"\n\nReviewer feedback on your last render:\n{critique}\n\nGenerate improved HTML addressing this feedback. Output the HTML only.",
	})
	elif history:
	user_content.append({
	"type": "text",
	"text": "\n\nGenerate improved HTML that better matches the reference. Output the HTML only.",
	})
	else:
	user_content.append({
	"type": "text",
	"text": "\n\nGenerate complete HTML with inline CSS. Output the HTML only.",
	})

	messages.append({"role": "user", "content": user_content})

	response = client.chat.completions.create(
	model=model,
	messages=messages,
	max_tokens=4096,
	temperature=0.7,
	)
	content = response.choices[0].message.content or ""
	html_out = _clean_html_output(content)
	return html_out if _looks_like_html(html_out) else FALLBACK_HTML


	def run_episode_d(
	env_client,
	config: AgentConfig,
	session_id: str,
	ref_b64: str,
	dbg=None,
	on_step=None,
	) -> List[StepResult]:
	"""Approach D: long-horizon dev (low-res renders) + simple free-form critic."""
	client = OpenAI(api_key=config.api_key, base_url=config.api_base)

	current_html = ""
	history: List[Tuple[str, str]] = [] # (render_low_b64, html)
	critique: Optional[str] = None
	results: List[StepResult] = []

	for step_i in range(config.max_steps):
	error: Optional[str] = None
	try:
	current_html = _developer_turn_d(
	client, config.model, ref_b64, history, critique
	)
	except Exception as exc:
	error = str(exc)[:120]
	current_html = FALLBACK_HTML

	step_resp = env_client.post(
	"/step",
	json={"html": current_html, "session_id": session_id},
	)
	step_resp.raise_for_status()
	result = step_resp.json()

	reward = float(result.get("reward", 0.0))
	env_done = bool(result.get("done", False))
	render_full = result.get("render_full")
	render_low = result.get("render_low")
	sub_rewards = result.get("metadata", {}).get("rewards")

	step_n = step_i + 1
	sr = StepResult(
	step=step_n,
	html=current_html,
	reward=reward,
	done=env_done,
	critique=critique or "",
	todo=None,
	render_full_b64=render_full,
	sub_rewards=sub_rewards,
	error=error,
	)
	if on_step:
	on_step(sr)

	if render_low:
	history.append((render_low, current_html))

	# Critic turn (skip on final env step)
	if not env_done and render_full:
	try:
	critique = _simple_critic_turn(client, config.model, ref_b64, render_full)
	preview = critique.replace("\n", " ")[:200]
	print(f"[CRITIC-D] step={step_n} reward={reward:.2f} → {preview}", flush=True)
	except Exception as exc:
	logger.warning("Critic-D failed: %s", exc)
	critique = None

	results.append(sr)
	if env_done:
	break

	return results