from __future__ import annotations

import json
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class TrainingComparison:
    true_positives: int
    false_positives: int
    false_negatives: int

    @property
    def precision(self) -> float:
        denom = self.true_positives + self.false_positives
        return self.true_positives / denom if denom else 0.0

    @property
    def recall(self) -> float:
        denom = self.true_positives + self.false_negatives
        return self.true_positives / denom if denom else 0.0


class TrainingRunManager:
    """Build training-ready datasets and metrics from deterministic findings and agent outputs."""

    def compare(self, deterministic_findings: set[str], agent_findings: set[str]) -> TrainingComparison:
        tp = len(deterministic_findings & agent_findings)
        fp = len(agent_findings - deterministic_findings)
        fn = len(deterministic_findings - agent_findings)
        return TrainingComparison(true_positives=tp, false_positives=fp, false_negatives=fn)

    def build_preference_record(
        self,
        *,
        prompt: str,
        agent_output: str,
        deterministic_targets: list[str],
        reward: float,
    ) -> dict[str, object]:
        return {
            "prompt": prompt,
            "agent_output": agent_output,
            "deterministic_targets": deterministic_targets,
            "reward": reward,
        }

    def save_records(self, path: Path, records: list[dict[str, object]]) -> None:
        path.parent.mkdir(parents=True, exist_ok=True)
        with path.open("w", encoding="utf-8") as handle:
            for record in records:
                handle.write(json.dumps(record, sort_keys=True) + "\n")

    def assert_non_regression(
        self,
        *,
        baseline_precision: float,
        baseline_recall: float,
        current_precision: float,
        current_recall: float,
        tolerance: float = 0.01,
    ) -> None:
        min_precision = baseline_precision - tolerance
        min_recall = baseline_recall - tolerance
        if current_precision < min_precision:
            raise ValueError(
                f"Precision regression detected: baseline={baseline_precision:.4f}, current={current_precision:.4f}"
            )
        if current_recall < min_recall:
            raise ValueError(
                f"Recall regression detected: baseline={baseline_recall:.4f}, current={current_recall:.4f}"
            )