from __future__ import annotations from pathlib import Path from typing import Literal from agents.hero.schema import HeroEpisodeStats from agents.master.schema import DMObservation, WorldDefinition from agents.shared.model_schema import StrictModel class ClosedLoopEpisodeArtifacts(StrictModel): episode_dir: str world_generation_attempts_path: str world_definition_path: str run_record_path: str hero_trace_path: str transcript_path: str @classmethod def from_episode_dir(cls, episode_dir: Path) -> "ClosedLoopEpisodeArtifacts": return cls( episode_dir=str(episode_dir), world_generation_attempts_path=str(episode_dir / "world_generation_attempts.jsonl"), world_definition_path=str(episode_dir / "world_definition.json"), run_record_path=str(episode_dir / "run_record.json"), hero_trace_path=str(episode_dir / "hero_trace.jsonl"), transcript_path=str(episode_dir / "transcript.jsonl"), ) class ClosedLoopEpisodeRecord(StrictModel): episode_id: str status: Literal["complete", "failed", "compile_failed", "policy_error"] target_ratio: float compile_attempts: int dm_repair_errors: list[str] hero_policy_error: str | None = None hero_episode_stats: HeroEpisodeStats | None = None declared_difficulty_target: float | None = None difficulty_target_matches_target_ratio: bool | None = None world_definition: WorldDefinition | None = None observation: DMObservation artifacts: ClosedLoopEpisodeArtifacts class ClosedLoopEpisodeSummary(StrictModel): episode_id: str status: str reward: float | None = None player_won: bool | None = None ratio: float | None = None compile_error: str | None = None hero_policy_error: str | None = None class ClosedLoopAggregateReport(StrictModel): episodes: int compile_valid_rate: float policy_error_rate: float playable_rate: float solve_rate: float mean_dense_return: float mean_invalid_action_penalty: float mean_repeat_noop_penalty: float