Spaces:

kgast
/

openenv-hackathon-tesserae

Running

openenv-hackathon-tesserae / openenv /models.py

kabalan

Add structured action types and session state management

a1ca935 about 1 month ago

11 kB

	"""
	Pydantic data models for the Slide Skill OpenEnv environment.

	Action space:
	SlideSkillAction is a discriminated union of action types:

	File-level actions (modify skill markdown files):
	- EditSectionAction: Replace a named section's body in one skill file.
	- ReplaceFileAction: Replace the entire content of one skill file.

	Structured actions (higher-level operations):
	- AppendExampleAction: Append a scored example entry to EXAMPLES.md.
	- EditJsTemplateAction: Define a reusable JS code block injected into
	the generator prompt as a mandatory snippet.
	- SetConstraintAction: Add or remove a hard constraint for the generator.
	- PatchCodeAction: Add a regex find/replace rule applied to generated JS
	after LLM output but before Node.js execution.

	Observation space:
	SlideSkillObservation contains the full evaluator output including all
	seven score dimensions plus qualitative feedback fields.
	"""

	from __future__ import annotations

	from typing import Annotated, Literal

	from pydantic import BaseModel, Field


	# ---------------------------------------------------------------------------
	# Actions
	# ---------------------------------------------------------------------------

	SkillFile = Literal[
	"DESIGN_RULES.md",
	"EXAMPLES.md",
	"SKILL.md",
	"editing.md",
	"pptxgenjs.md",
	]
	"""Skill files the optimizer is allowed to modify."""


	class EditSectionAction(BaseModel):
	"""
	Replace the body of a named markdown section within a skill file.

	The section is identified by its heading text (without the leading #
	characters). The replacement spans from immediately after the heading
	line to (but not including) the next heading of equal or higher level.

	Example:
	action = EditSectionAction(
	file="DESIGN_RULES.md",
	section_heading="Color Palette",
	new_body="- Navy (#0C2340): primary\\n- White: background\\n"
	)
	"""

	action_type: Literal["edit_section"] = "edit_section"
	file: SkillFile = Field(..., description="Which skill file to edit.")
	section_heading: str = Field(
	...,
	description=(
	"Exact heading text (without leading # markers). "
	"Case-sensitive. Must match a heading in the file."
	),
	)
	new_body: str = Field(
	...,
	description="New markdown content for the section body (after the heading line).",
	)


	class ReplaceFileAction(BaseModel):
	"""
	Replace the entire content of a skill file.

	Use this when the optimizer rewrites the file wholesale rather than
	making targeted section edits. This is the mode used by the historical
	optimizer LLM in the Skill Forge loop.
	"""

	action_type: Literal["replace_file"] = "replace_file"
	file: SkillFile = Field(..., description="Which skill file to replace.")
	new_content: str = Field(
	...,
	description="Complete new file content (valid markdown).",
	)


	class AppendExampleAction(BaseModel):
	"""
	Append a scored example entry to EXAMPLES.md.

	Builds a memory of what the evaluator rewards, giving the generator
	better few-shot context over time. The entry is formatted as a markdown
	section with score, verdict, and description.
	"""

	action_type: Literal["append_example"] = "append_example"
	description: str = Field(
	...,
	description="What this example demonstrates (e.g. 'White bg with navy table headers').",
	)
	score: int = Field(
	..., ge=0, le=100,
	description="The score this approach achieved (0-100).",
	)
	verdict: str = Field(
	...,
	description="Evaluator's one-line verdict for this example.",
	)


	class EditJsTemplateAction(BaseModel):
	"""
	Define or update a reusable JS code block injected into the generator prompt.

	Templates are stored per-session and injected as a "## Required Code Blocks"
	section. The generator LLM is instructed to use the exact code for the
	named block. Set js_code to empty string to delete a template.

	Example:
	action = EditJsTemplateAction(
	block_name="footer",
	js_code="slide.addText('McKinsey & Company', {x: 8.5, y: 5.2, ...});"
	)
	"""

	action_type: Literal["edit_js_template"] = "edit_js_template"
	block_name: str = Field(
	...,
	description="Name for this code block (e.g. 'footer', 'table_style', 'title_section').",
	)
	js_code: str = Field(
	...,
	description="JavaScript code snippet. Empty string removes the template.",
	)


	class SetConstraintAction(BaseModel):
	"""
	Add or remove a hard constraint for the generator LLM.

	Constraints are injected into the generator prompt as a "## Hard Constraints"
	section. Each constraint is a single directive like "NEVER use require('react')"
	or "ALWAYS use a white background".
	"""

	action_type: Literal["set_constraint"] = "set_constraint"
	constraint: str = Field(
	...,
	description="The constraint text (e.g. 'NEVER use colored backgrounds for data slides').",
	)
	active: bool = Field(
	True,
	description="True to add/keep the constraint, False to remove it.",
	)


	class PatchCodeAction(BaseModel):
	"""
	Add a regex find/replace rule applied to generated JS after LLM output
	but before Node.js execution.

	Patches are applied in insertion order. Useful for mechanical fixes like
	color corrections, font swaps, or removing problematic API calls.
	Set replacement to None to remove a patch by its description.
	"""

	action_type: Literal["patch_code"] = "patch_code"
	pattern: str = Field(
	...,
	description="Regex pattern to search for in the generated JS code.",
	)
	replacement: str = Field(
	...,
	description="Replacement string (supports regex backreferences like \\1).",
	)
	description: str = Field(
	...,
	description="Human-readable description of what this patch does.",
	)
	active: bool = Field(
	True,
	description="True to add/keep the patch, False to remove it (matched by description).",
	)


	# Discriminated union — action_type is the discriminator field.
	SlideSkillAction = Annotated[
	EditSectionAction
	\| ReplaceFileAction
	\| AppendExampleAction
	\| EditJsTemplateAction
	\| SetConstraintAction
	\| PatchCodeAction,
	Field(discriminator="action_type"),
	]


	# ---------------------------------------------------------------------------
	# Scores
	# ---------------------------------------------------------------------------


	class SlideScores(BaseModel):
	"""Raw scores from the McKinsey evaluator. Each dimension is 0-15 except
	overall_impression which is 0-10. Total is 0-100."""

	background_layout: int = Field(..., ge=0, le=15)
	color_palette: int = Field(..., ge=0, le=15)
	typography: int = Field(..., ge=0, le=15)
	title_quality: int = Field(..., ge=0, le=15)
	data_presentation: int = Field(..., ge=0, le=15)
	structural_elements: int = Field(..., ge=0, le=15)
	overall_impression: int = Field(..., ge=0, le=10)

	@property
	def total(self) -> int:
	return (
	self.background_layout
	+ self.color_palette
	+ self.typography
	+ self.title_quality
	+ self.data_presentation
	+ self.structural_elements
	+ self.overall_impression
	)


	# ---------------------------------------------------------------------------
	# Observation
	# ---------------------------------------------------------------------------


	class SlideSkillObservation(BaseModel):
	"""
	Observation returned to the agent after each step.

	Contains the full evaluator output so the optimizer LLM has all the
	information it needs to write the next skill revision: numeric scores,
	qualitative strengths/weaknesses, and the one-line verdict.
	"""

	scores: SlideScores
	total: int = Field(..., description="Sum of all score dimensions (0-100).")
	strengths: list[str] = Field(
	default_factory=list,
	description="List of what the slide does well, from the evaluator.",
	)
	weaknesses: list[str] = Field(
	default_factory=list,
	description="List of what needs improvement, from the evaluator.",
	)
	one_line_verdict: str = Field(
	..., description="Single-sentence summary from the evaluator."
	)
	reward: float = Field(
	...,
	description=(
	"Score delta vs. previous step, capped to [-0.3, +0.3] and "
	"normalized to [-1.0, +1.0] by dividing by 100. "
	"Capping reduces reward noise from LLM evaluation variance."
	),
	)
	step: int = Field(..., description="Current step index (0-based).")
	done: bool = Field(..., description="True if max_steps reached.")
	jpg_path: str = Field(
	..., description="Absolute path to the generated slide JPG."
	)
	design_rules_content: str = Field(
	...,
	description="Current DESIGN_RULES.md content (after action was applied).",
	)
	examples_content: str = Field(
	...,
	description="Current EXAMPLES.md content (after action was applied).",
	)
	js_templates: dict[str, str] = Field(
	default_factory=dict,
	description="Active JS templates (block_name → js_code).",
	)
	constraints: list[str] = Field(
	default_factory=list,
	description="Active hard constraints for the generator.",
	)
	code_patches: list[dict[str, str]] = Field(
	default_factory=list,
	description="Active code patches (list of {pattern, replacement, description}).",
	)


	# ---------------------------------------------------------------------------
	# State (internal, not exposed to client)
	# ---------------------------------------------------------------------------


	class SlideSkillState(BaseModel):
	"""Internal environment state. Not serialized to the client."""

	session_id: str
	step: int = 0
	prev_total: int = 0 # score from the previous step (for reward calculation)
	session_dir: str = Field(
	...,
	description=(
	"Absolute path to the isolated session directory under /tmp/. "
	"Contains copies of skill files that this session is free to "
	"modify without affecting other sessions."
	),
	)
	js_templates: dict[str, str] = Field(
	default_factory=dict,
	description="Named JS code blocks injected into generator prompt.",
	)
	constraints: list[str] = Field(
	default_factory=list,
	description="Hard constraints injected into generator prompt.",
	)
	code_patches: list[dict[str, str]] = Field(
	default_factory=list,
	description="Regex patches applied to generated JS post-LLM.",
	)