import csv from collections import Counter, defaultdict from pathlib import Path OUTPUT_DIR = Path(__file__).resolve().parent / "survey_outputs" def load_rows(): rows = [] for path in sorted(OUTPUT_DIR.glob("*.csv")): with path.open("r", encoding="utf-8") as f: reader = csv.DictReader(f) for row in reader: row["_source_file"] = path.name rows.append(row) return rows def summarize(rows): total = 0 ours = 0 baseline = 0 by_baseline = defaultdict(Counter) for row in rows: choice = row.get("chosen_source") if not choice: continue total += 1 if choice == "ours": ours += 1 else: baseline += 1 by_baseline[row.get("baseline", "unknown")][choice] += 1 return total, ours, baseline, by_baseline def main(): if not OUTPUT_DIR.exists(): print(f"No outputs found. Missing {OUTPUT_DIR}.") return rows = load_rows() total, ours, baseline, by_baseline = summarize(rows) print("Overall") print(f"- answered: {total}") print(f"- ours: {ours}") print(f"- baseline: {baseline}") print("\nBy baseline") for baseline_name in sorted(by_baseline.keys()): c = by_baseline[baseline_name] print( f"- {baseline_name}: ours {c.get('ours', 0)} | " f"{baseline_name} {c.get(baseline_name, 0)}" ) if __name__ == "__main__": main()