"""Run feature ablation study for word segmentation.

Runs leave-one-out and additive experiments, collecting results into a summary.

Usage:
    python src/run_ablation.py
"""

import csv
import subprocess
import sys
import time
from datetime import datetime
from pathlib import Path

PROJECT_ROOT = Path(__file__).parent.parent
RESULTS_FILE = PROJECT_ROOT / "results" / "word_segmentation" / "ablation_study.csv"

# Leave-one-out: disable one group at a time
LEAVE_ONE_OUT = {
    "all-type":       "model.features.type=false",
    "all-morphology": "model.features.morphology=false",
    "all-left":       "model.features.left=false",
    "all-right":      "model.features.right=false",
    "all-bigram":     "model.features.bigram=false",
    "all-trigram":    "model.features.trigram=false",
}

# Additive: build up from form-only baseline
ADDITIVE = {
    "form_only":           "model.features.type=false model.features.morphology=false model.features.left=false model.features.right=false model.features.bigram=false model.features.trigram=false",
    "form+left":           "model.features.type=false model.features.morphology=false model.features.right=false model.features.bigram=false model.features.trigram=false",
    "form+left+right":     "model.features.type=false model.features.morphology=false model.features.bigram=false model.features.trigram=false",
    "form+ctx+bigram":     "model.features.type=false model.features.morphology=false model.features.trigram=false",
    "form+ctx+bi+type":    "model.features.morphology=false model.features.trigram=false",
    "form+ctx+bi+type+morph": "model.features.trigram=false",
    # full model (all features) is the baseline -- already run
}


def parse_metrics(output: str) -> dict:
    """Parse metrics from training script output."""
    metrics = {}
    for line in output.split("\n"):
        if "Syllable-level Accuracy:" in line:
            metrics["syl_accuracy"] = float(line.split(":")[-1].strip())
        elif "Word-level Precision:" in line:
            metrics["word_precision"] = float(line.split(":")[-1].strip())
        elif "Word-level Recall:" in line:
            metrics["word_recall"] = float(line.split(":")[-1].strip())
        elif "Word-level F1:" in line:
            metrics["word_f1"] = float(line.split(":")[-1].strip())
        elif "templates)" in line and "Features:" in line:
            # Extract template count from "Features: [...] (N templates)"
            metrics["num_templates"] = int(line.split("(")[1].split(" ")[0])
    return metrics


def run_experiment(name: str, overrides: str) -> dict:
    """Run a single training experiment."""
    print(f"\n{'='*60}")
    print(f"  Experiment: {name}")
    print(f"  Overrides: {overrides}")
    print(f"{'='*60}")

    output_override = f"output=models/word_segmentation/ablation/{name}"
    cmd = f"python src/train_word_segmentation.py {overrides} {output_override}"

    start = time.time()
    result = subprocess.run(
        cmd, shell=True, capture_output=True, text=True,
        cwd=str(PROJECT_ROOT),
    )
    elapsed = time.time() - start

    output = result.stdout + result.stderr
    print(output[-500:] if len(output) > 500 else output)

    metrics = parse_metrics(output)
    metrics["name"] = name
    metrics["time_seconds"] = round(elapsed, 1)

    if "word_f1" in metrics:
        print(f"  => Word F1: {metrics['word_f1']:.4f}  Syl Acc: {metrics['syl_accuracy']:.4f}  ({elapsed:.0f}s)")
    else:
        print(f"  => FAILED ({elapsed:.0f}s)")
        metrics["word_f1"] = 0
        metrics["syl_accuracy"] = 0
        metrics["word_precision"] = 0
        metrics["word_recall"] = 0
        metrics["num_templates"] = 0

    return metrics


def main():
    RESULTS_FILE.parent.mkdir(parents=True, exist_ok=True)

    all_results = []

    # Full model baseline
    print("\n" + "#" * 60)
    print("  FULL MODEL (baseline)")
    print("#" * 60)
    full = run_experiment("full", "")
    all_results.append(full)

    # Leave-one-out
    print("\n" + "#" * 60)
    print("  LEAVE-ONE-OUT EXPERIMENTS")
    print("#" * 60)
    for name, overrides in LEAVE_ONE_OUT.items():
        result = run_experiment(name, overrides)
        all_results.append(result)

    # Additive
    print("\n" + "#" * 60)
    print("  ADDITIVE EXPERIMENTS")
    print("#" * 60)
    for name, overrides in ADDITIVE.items():
        result = run_experiment(name, overrides)
        all_results.append(result)

    # Write CSV
    fieldnames = ["name", "num_templates", "syl_accuracy", "word_precision", "word_recall", "word_f1", "time_seconds"]
    with open(RESULTS_FILE, "w", newline="") as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        for r in all_results:
            writer.writerow({k: r.get(k, "") for k in fieldnames})

    print(f"\nResults saved to {RESULTS_FILE}")

    # Print summary table
    print("\n" + "=" * 80)
    print(f"{'Experiment':<30} {'Templates':>9} {'Syl Acc':>8} {'Word F1':>8} {'Time':>7}")
    print("-" * 80)

    full_f1 = all_results[0]["word_f1"] if all_results else 0

    for r in all_results:
        delta = r["word_f1"] - full_f1
        delta_str = f"({delta:+.4f})" if r["name"] != "full" else ""
        print(f"{r['name']:<30} {r.get('num_templates', '?'):>9} {r['syl_accuracy']:>8.4f} {r['word_f1']:>8.4f} {delta_str:>10} {r['time_seconds']:>5.0f}s")

    print("=" * 80)


if __name__ == "__main__":
    main()