import csv
from collections import Counter, defaultdict
from pathlib import Path

OUTPUT_DIR = Path(__file__).resolve().parent / "survey_outputs"


def load_rows():
    rows = []
    for path in sorted(OUTPUT_DIR.glob("*.csv")):
        with path.open("r", encoding="utf-8") as f:
            reader = csv.DictReader(f)
            for row in reader:
                row["_source_file"] = path.name
                rows.append(row)
    return rows


def summarize(rows):
    total = 0
    ours = 0
    baseline = 0
    by_baseline = defaultdict(Counter)

    for row in rows:
        choice = row.get("chosen_source")
        if not choice:
            continue
        total += 1
        if choice == "ours":
            ours += 1
        else:
            baseline += 1
        by_baseline[row.get("baseline", "unknown")][choice] += 1

    return total, ours, baseline, by_baseline


def main():
    if not OUTPUT_DIR.exists():
        print(f"No outputs found. Missing {OUTPUT_DIR}.")
        return

    rows = load_rows()
    total, ours, baseline, by_baseline = summarize(rows)

    print("Overall")
    print(f"- answered: {total}")
    print(f"- ours: {ours}")
    print(f"- baseline: {baseline}")

    print("\nBy baseline")
    for baseline_name in sorted(by_baseline.keys()):
        c = by_baseline[baseline_name]
        print(
            f"- {baseline_name}: ours {c.get('ours', 0)} | "
            f"{baseline_name} {c.get(baseline_name, 0)}"
        )


if __name__ == "__main__":
    main()