from __future__ import annotations

from pathlib import Path
from typing import Literal

from agents.hero.schema import HeroEpisodeStats
from agents.master.schema import DMObservation, WorldDefinition
from agents.shared.model_schema import StrictModel


class ClosedLoopEpisodeArtifacts(StrictModel):
    episode_dir: str
    world_generation_attempts_path: str
    world_definition_path: str
    run_record_path: str
    hero_trace_path: str
    transcript_path: str

    @classmethod
    def from_episode_dir(cls, episode_dir: Path) -> "ClosedLoopEpisodeArtifacts":
        return cls(
            episode_dir=str(episode_dir),
            world_generation_attempts_path=str(episode_dir / "world_generation_attempts.jsonl"),
            world_definition_path=str(episode_dir / "world_definition.json"),
            run_record_path=str(episode_dir / "run_record.json"),
            hero_trace_path=str(episode_dir / "hero_trace.jsonl"),
            transcript_path=str(episode_dir / "transcript.jsonl"),
        )


class ClosedLoopEpisodeRecord(StrictModel):
    episode_id: str
    status: Literal["complete", "failed", "compile_failed", "policy_error"]
    target_ratio: float
    compile_attempts: int
    dm_repair_errors: list[str]
    hero_policy_error: str | None = None
    hero_episode_stats: HeroEpisodeStats | None = None
    declared_difficulty_target: float | None = None
    difficulty_target_matches_target_ratio: bool | None = None
    world_definition: WorldDefinition | None = None
    observation: DMObservation
    artifacts: ClosedLoopEpisodeArtifacts


class ClosedLoopEpisodeSummary(StrictModel):
    episode_id: str
    status: str
    reward: float | None = None
    player_won: bool | None = None
    ratio: float | None = None
    compile_error: str | None = None
    hero_policy_error: str | None = None


class ClosedLoopAggregateReport(StrictModel):
    episodes: int
    compile_valid_rate: float
    policy_error_rate: float
    playable_rate: float
    solve_rate: float
    mean_dense_return: float
    mean_invalid_action_penalty: float
    mean_repeat_noop_penalty: float