Spaces:

kgast
/

openenv-hackathon-tesserae

Running

openenv-hackathon-tesserae / openenv /slide_skill_environment.py

kabalan

add initial Colab notebook

eea5114 about 1 month ago

7.24 kB

	"""
	Slide Skill Environment — OpenEnv-compatible environment for optimizing
	McKinsey-style PowerPoint slide generation.

	Concurrency model:
	SUPPORTS_CONCURRENT_SESSIONS = True

	Each session gets an isolated working directory at /tmp/slide_skill_{session_id}/.
	All skill files (DESIGN_RULES.md, EXAMPLES.md, SKILL.md, editing.md,
	pptxgenjs.md) are copied there on reset() and modified in place during
	the session. The shared repo files are never modified.

	Episode timing:
	Each step involves two LLM calls (generator + evaluator) plus Node.js and
	LibreOffice. Expect 60-120 seconds per step. At max_steps=7, a full episode
	runs 7-14 minutes.

	Reward function:
	reward = clip(total_score - prev_total_score, -30, +30) / 100
	Capping at +/-30 points (+/-0.3 reward) dampens LLM evaluation noise.
	"""

	from __future__ import annotations

	import os
	import shutil
	import uuid
	from pathlib import Path
	from typing import ClassVar

	from models import (
	SlideScores,
	SlideSkillAction,
	SlideSkillObservation,
	SlideSkillState,
	)
	from skill_manager import SkillManager
	from slide_generator import SlideGenerator
	from evaluator_adapter import EvaluatorAdapter


	# Paths relative to repo root — adjust if the package moves.
	REPO_ROOT = Path(__file__).parent.parent
	BASELINE_DIR = REPO_ROOT / "skill_files_baseline"
	PPTX_SKILL_DIR = REPO_ROOT / "pptx"
	TASK_PROMPT_PATH = REPO_ROOT / "output" / "TASK_PROMPT.md"
	REFERENCE_DIR = REPO_ROOT / "output" / "reference"

	# Reward capping parameters
	REWARD_CLIP_POINTS = 30 # clip score delta to +/-30 before normalizing
	REWARD_SCALE = 100.0 # divide clipped delta by this to get [-0.3, +0.3]

	MAX_STEPS = int(os.environ.get("SLIDE_SKILL_MAX_STEPS", "7"))

	# Session directory root: defaults to repo/tmp/ locally, configurable via
	# env var so HuggingFace Spaces (read-only app dir) can use /tmp instead.
	_default_session_root = str(REPO_ROOT / "tmp")
	SESSION_ROOT = Path(os.environ.get("SLIDE_SKILL_SESSION_ROOT", _default_session_root))

	# Baseline skill files (DESIGN_RULES.md + EXAMPLES.md) and generic pptx
	# tooling files that get copied into each session.
	BASELINE_FILES = ("DESIGN_RULES.md", "EXAMPLES.md")
	PPTX_SKILL_FILES = ("SKILL.md", "editing.md", "pptxgenjs.md")


	class SlideSkillEnvironment:
	"""OpenEnv environment for the Skill Forge optimization loop."""

	SUPPORTS_CONCURRENT_SESSIONS: ClassVar[bool] = True

	def __init__(self) -> None:
	self._sessions: dict[str, SlideSkillState] = {}
	self._generator = SlideGenerator(
	task_prompt_path=TASK_PROMPT_PATH,
	pptx_skill_dir=PPTX_SKILL_DIR,
	reference_dir=REFERENCE_DIR,
	)
	self._evaluator = EvaluatorAdapter(reference_dir=REFERENCE_DIR)

	# ------------------------------------------------------------------
	# Public OpenEnv interface
	# ------------------------------------------------------------------

	def reset(self, session_id: str \| None = None) -> str:
	"""
	Initialize or reinitialize a session.

	Creates an isolated working directory under /tmp/ and copies both
	the baseline skill files and the generic pptx tooling files into it.
	Returns the session_id.
	"""
	session_id = session_id or str(uuid.uuid4())

	session_dir = SESSION_ROOT / f"slide_skill_{session_id}"
	if session_dir.exists():
	shutil.rmtree(session_dir)
	session_dir.mkdir(parents=True)

	# Copy baseline skill files (DESIGN_RULES.md, EXAMPLES.md).
	for fname in BASELINE_FILES:
	src = BASELINE_DIR / fname
	if not src.exists():
	raise FileNotFoundError(
	f"Baseline file missing: {src}. "
	"Commit skill_files_baseline/ to the repo."
	)
	shutil.copy2(src, session_dir / fname)

	# Copy generic pptx skill/tooling files so the agent can edit them.
	for fname in PPTX_SKILL_FILES:
	src = PPTX_SKILL_DIR / fname
	if src.exists():
	shutil.copy2(src, session_dir / fname)

	self._sessions[session_id] = SlideSkillState(
	session_id=session_id,
	step=0,
	prev_total=0,
	session_dir=str(session_dir),
	)
	return session_id

	def step(self, session_id: str, action: SlideSkillAction) -> SlideSkillObservation:
	"""
	Apply an action, run the generation pipeline, evaluate, and return
	an observation.

	Args:
	session_id: Must be a live session (call reset() first).
	action: Any SlideSkillAction variant.

	Returns:
	SlideSkillObservation with scores, feedback, reward, and file contents.

	Raises:
	KeyError: If session_id is not found.
	RuntimeError: If the generation or evaluation pipeline fails.
	"""
	state = self._sessions[session_id]
	session_dir = Path(state.session_dir)

	# 1. Apply the action to the session's skill files / state.
	manager = SkillManager(session_dir, state)
	manager.apply(action)

	# 2. Run the full generation pipeline.
	# Pass state so the generator can inject templates/constraints
	# and apply code patches.
	jpg_path = self._generator.generate(
	session_id=session_id,
	session_dir=session_dir,
	state=state,
	)

	# 3. Evaluate the generated slide.
	eval_result = self._evaluator.evaluate(jpg_path)

	# 4. Compute reward (capped score delta).
	delta = eval_result["total"] - state.prev_total
	clipped_delta = max(-REWARD_CLIP_POINTS, min(REWARD_CLIP_POINTS, delta))
	reward = clipped_delta / REWARD_SCALE

	# 5. Update state.
	state.step += 1
	state.prev_total = eval_result["total"]
	done = state.step >= MAX_STEPS

	# 6. Read back current file contents for the observation.
	design_rules = (session_dir / "DESIGN_RULES.md").read_text(encoding="utf-8")
	examples = (session_dir / "EXAMPLES.md").read_text(encoding="utf-8")

	scores = SlideScores(**eval_result["scores"])

	return SlideSkillObservation(
	scores=scores,
	total=eval_result["total"],
	strengths=eval_result.get("strengths", []),
	weaknesses=eval_result.get("weaknesses", []),
	one_line_verdict=eval_result["one_line_verdict"],
	reward=reward,
	step=state.step,
	done=done,
	jpg_path=str(jpg_path),
	design_rules_content=design_rules,
	examples_content=examples,
	js_templates=dict(state.js_templates),
	constraints=list(state.constraints),
	code_patches=list(state.code_patches),
	)

	def close(self, session_id: str) -> None:
	"""Clean up session resources. Deletes the /tmp/ session directory."""
	if session_id in self._sessions:
	state = self._sessions.pop(session_id)
	session_dir = Path(state.session_dir)
	if session_dir.exists():
	shutil.rmtree(session_dir)