# app.py
# Main runner script for FORGE-v4.
# Runs one demo episode with the improving_coder and tiered BreakerAgent,
# then prints a structured results report.

import sys

from env import FORGEEnv
from memory import CoachMemory
from agents import get_coder_code, coder_version_label
from logger import log_episode, update_summary, print_log_paths, write_episode_report
from config import DEFAULT_CANDIDATES_PER_STEP, STEPS_PER_EPISODE, ensure_runtime_dirs
from policies.factory import build_policy
from trainer import run_benchmark_mode, run_compare_mode


# ──────────────────────────────────────────────
# Demo configuration
# ──────────────────────────────────────────────
DEFAULT_CODER_VERSION = "improving_coder"
DEFAULT_POLICY = "heuristic"


def run_demo_episode(
    coder_version: str = DEFAULT_CODER_VERSION,
    policy_name: str = DEFAULT_POLICY,
    candidates_per_step: int = DEFAULT_CANDIDATES_PER_STEP,
    generate_metrics: bool = False,
) -> None:
    """
    Execute one demo episode and print a rich results report.

    Args:
        coder_version: Which coder strategy to use.
            "weak_coder_v1" | "weak_coder_v2" | "improving_coder"
    """
    _banner()

    ensure_runtime_dirs()
    memory = CoachMemory()
    memory.clear()          # Start fresh for the demo run
    env = FORGEEnv(memory=memory)
    policy = build_policy(policy_name, strategy=coder_version)
    state = env.reset()

    episode = state["episode"]
    print(f"\n{'─'*60}")
    print(f"  Task ID  : {state['task_id']}")
    print(f"  Episode  : {episode}")
    print(f"  Coder    : {coder_version_label(coder_version, episode)}")
    print(f"  Breaker  : {env.breaker.tier_name}  (starts here, tiers up during run)")
    print(f"{'─'*60}")
    print(f"\n  Problem:\n")
    print(f"  {state['problem_description']}")
    print()

    # ── Accumulators ──────────────────────────────────────────────────────
    ep_coder_rewards:   list[float] = []
    ep_breaker_rewards: list[float] = []
    ep_pass_rates:      list[float] = []
    ep_fail_counts:     list[int]   = []
    ep_error_counts:    list[int]   = []
    ep_timeout_counts:  list[int]   = []
    ep_break_rates:     list[float] = []

    for step_num in range(1, STEPS_PER_EPISODE + 1):
        # Build coder action
        candidates = policy.generate_candidates(state, num_candidates=candidates_per_step)
        candidate_solutions = [candidate.code for candidate in candidates if candidate.code.strip()]
        fallback_code = get_coder_code(coder_version, episode=episode)
        action = {
            "coder_code": candidate_solutions[0] if candidate_solutions else fallback_code,
            "candidate_solutions": candidate_solutions,
            "coder_version": coder_version,
        }

        result = env.step(action)
        state  = result["state"]

        cr = result["coder_reward"]
        br = result["breaker_reward"]
        info = result["info"]

        # Accumulate
        ep_coder_rewards.append(cr["total_reward"])
        ep_breaker_rewards.append(br["total_reward"])
        ep_pass_rates.append(cr["pass_rate"])
        ep_fail_counts.append(cr["fail_count"])
        ep_error_counts.append(cr["error_count"])
        ep_timeout_counts.append(cr.get("timeout_count", 0))
        ep_break_rates.append(br["break_rate"])

        # Per-step print
        print(f"  ── Step {step_num}/{STEPS_PER_EPISODE}  [breaker: {info['breaker_tier_name']}]")
        print(
            f"     Coder   → pass_rate: {cr['pass_rate']:.2f}  "
            f"| passes: {cr['pass_count']}  "
            f"| fails: {cr['fail_count']}  "
            f"| errors: {cr['error_count']}  "
            f"| reward: {cr['total_reward']:+.2f}"
        )
        print(
            f"     Breaker → break_rate: {br['break_rate']:.2f}  "
            f"| breaks: {br['breaks']}  "
            f"| no-break: {br['passes']}  "
            f"| reward: {br['total_reward']:+.2f}"
        )
        rankings = info.get("candidate_rankings", [])
        if rankings:
            best = rankings[0]
            print(
                f"     Candidate ranking → count: {len(rankings)} | "
                f"selected_idx: {info.get('selected_candidate_index', -1)} | "
                f"best pass_rate: {best['pass_rate']:.2f} | "
                f"best runtime_ms: {best['avg_runtime_ms']:.2f}"
            )
        if state.get("recent_breaker_case") is not None:
            print(f"     Recent adversarial input: {state['recent_breaker_case']}")
        print()

        if result["done"]:
            break

    # ── Episode log ───────────────────────────────────────────────────────
    def avg(lst: list) -> float:
        return round(sum(lst) / len(lst), 4) if lst else 0.0

    log_episode(
        episode=episode,
        coder_version=coder_version,
        breaker_tier=env.breaker.current_tier,
        avg_coder_reward=avg(ep_coder_rewards),
        avg_breaker_reward=avg(ep_breaker_rewards),
        avg_pass_rate=avg(ep_pass_rates),
        total_fail_count=sum(ep_fail_counts),
        total_error_count=sum(ep_error_counts),
        total_timeout_count=sum(ep_timeout_counts),
        avg_break_rate=avg(ep_break_rates),
        steps=env.step_count,
    )

    update_summary(
        total_episodes=1,
        coder_version=coder_version,
        final_breaker_tier=env.breaker.current_tier,
        all_coder_rewards=ep_coder_rewards,
        all_breaker_rewards=ep_breaker_rewards,
        all_pass_rates=ep_pass_rates,
        all_break_rates=ep_break_rates,
        coach_memory_summary=memory.summary(),
    )

    write_episode_report(
        episode=episode,
        payload={
            "episode": episode,
            "coder_version": coder_version,
            "policy": policy.name,
            "avg_coder_reward": avg(ep_coder_rewards),
            "avg_breaker_reward": avg(ep_breaker_rewards),
            "avg_pass_rate": avg(ep_pass_rates),
            "avg_break_rate": avg(ep_break_rates),
            "total_fail_count": sum(ep_fail_counts),
            "total_error_count": sum(ep_error_counts),
            "total_timeout_count": sum(ep_timeout_counts),
            "steps": env.step_count,
        },
    )

    # ── Final report ──────────────────────────────────────────────────────
    print(f"{'═'*60}")
    print("  EPISODE SUMMARY")
    print(f"{'═'*60}")
    print(f"  Coder version       : {coder_version_label(coder_version, episode)}")
    print(f"  Final breaker tier  : {env.breaker.tier_name}")
    print(f"  Avg pass rate       : {avg(ep_pass_rates):.2f}")
    print(f"  Avg coder reward    : {avg(ep_coder_rewards):+.4f}")
    print(f"  Avg breaker reward  : {avg(ep_breaker_rewards):+.4f}")
    print(f"  Total fail count    : {sum(ep_fail_counts)}")
    print(f"  Total error count   : {sum(ep_error_counts)}")
    print(f"  Avg break rate      : {avg(ep_break_rates):.2f}")
    print()
    print("  Coach memory summary:")
    summary = memory.summary()
    print(f"    Lessons stored    : {summary.get('total_lessons', 0)}")
    notes = summary.get("recent_coach_notes", [])
    if notes:
        print("    Recent coach notes:")
        for note in notes:
            print(f"      • {note}")
    print()
    print("  Log files updated:")
    print_log_paths()
    if generate_metrics:
        from metrics import generate_charts
        chart_paths = generate_charts()
        if chart_paths:
            print("  Charts generated:")
            for key, path in chart_paths.items():
                print(f"    - {key}: {path}")
    print(f"{'═'*60}")


# ──────────────────────────────────────────────
# Helpers
# ──────────────────────────────────────────────

def _banner() -> None:
    print()
    print("╔══════════════════════════════════════════════════════════╗")
    print("║   FORGE-v4  |  Adversarial Code Generation Environment  ║")
    print("╚══════════════════════════════════════════════════════════╝")


def _print_help() -> None:
    print("Usage: python app.py [OPTIONS]")
    print()
    print("Options:")
    print("  --coder VERSION   Coder strategy to use:")
    print("                      weak_coder_v1   (bubble sort — slow/weak)")
    print("                      weak_coder_v2   (selection sort + abs() bug)")
    print("                      improving_coder (adapts each episode)  [default]")
    print("  --steps N         Override STEPS_PER_EPISODE for this run")
    print("  --policy NAME     Defender policy: heuristic | api | local | offline | model")
    print("  --candidates N    Candidate solutions to evaluate per step")
    print("  --charts          Generate trend charts in outputs/")
    print("  --benchmark N     Run benchmark mode for N episodes (minimum 20)")
    print("  --compare         Run baseline heuristic vs model policy comparison")
    print("  --help / -h       Show this message")


# ──────────────────────────────────────────────
# Entry point
# ──────────────────────────────────────────────

def main() -> None:
    args = sys.argv[1:]

    if "--help" in args or "-h" in args:
        _print_help()
        sys.exit(0)

    coder_version = DEFAULT_CODER_VERSION
    policy_name = DEFAULT_POLICY
    candidates_per_step = DEFAULT_CANDIDATES_PER_STEP
    if "--coder" in args:
        idx = args.index("--coder")
        try:
            coder_version = args[idx + 1]
            valid = ("weak_coder_v1", "weak_coder_v2", "improving_coder")
            if coder_version not in valid:
                print(f"Error: unknown coder version '{coder_version}'. Choose from: {valid}")
                sys.exit(1)
        except IndexError:
            print("Error: --coder requires a version argument.")
            sys.exit(1)

    if "--steps" in args:
        idx = args.index("--steps")
        try:
            import config
            config.STEPS_PER_EPISODE = int(args[idx + 1])
        except (IndexError, ValueError):
            print("Error: --steps requires an integer argument.")
            sys.exit(1)

    if "--policy" in args:
        idx = args.index("--policy")
        try:
            policy_name = args[idx + 1].strip().lower()
            if policy_name not in ("heuristic", "api", "local", "offline", "mock", "model"):
                raise ValueError(policy_name)
        except (IndexError, ValueError):
            print("Error: --policy must be one of: heuristic, api, local, offline, model.")
            sys.exit(1)

    if "--candidates" in args:
        idx = args.index("--candidates")
        try:
            candidates_per_step = max(1, int(args[idx + 1]))
        except (IndexError, ValueError):
            print("Error: --candidates requires an integer >= 1.")
            sys.exit(1)

    if "--compare" in args:
        report = run_compare_mode(
            model_policy_name="model",
            episodes=20,
            candidates_per_step=candidates_per_step,
            verbose=False,
        )
        print("Comparison complete")
        print(f"  Pass-rate delta      : {report['improvement']['pass_rate_delta']:+.4f}")
        print(f"  Defender reward delta: {report['improvement']['defender_reward_delta']:+.4f}")
        print(f"  Adversary reward delta: {report['improvement']['adversary_reward_delta']:+.4f}")
        print(f"  Tier Progression Delta: {report['improvement']['max_tier_delta']:+d}")
        print("  Judge assets exported to outputs/")
        sys.exit(0)

    if "--benchmark" in args:
        idx = args.index("--benchmark")
        try:
            benchmark_episodes = int(args[idx + 1])
        except (IndexError, ValueError):
            print("Error: --benchmark requires an integer argument.")
            sys.exit(1)

        report = run_benchmark_mode(
            policy_name=policy_name,
            episodes=benchmark_episodes,
            candidates_per_step=candidates_per_step,
            verbose=False,
        )
        print("Benchmark complete")
        print(f"  Episodes: {report['episodes']}")
        for row in report.get("rows", []):
            print(
                f"  Ep {row['episode']:>3} | pass={row['pass_rate']:.2f} "
                f"| defender={row['defender_reward']:+.2f} "
                f"| adversary={row['adversary_reward']:+.2f} "
                f"| rank={row['chosen_candidate_rank']} "
                f"| tier={row['tier_progression']}"
            )
        print("  Judge assets exported to outputs/")
        sys.exit(0)

    run_demo_episode(
        coder_version=coder_version,
        policy_name=policy_name,
        candidates_per_step=candidates_per_step,
        generate_metrics=("--charts" in args),
    )


if __name__ == "__main__":
    main()