| """ |
| Cross-run comparison — track how changes to θ affect scores over time. |
| |
| Usage: |
| uv run python scripts/compare.py |
| uv run python scripts/compare.py --runs baseline v2_with_freetier |
| """ |
|
|
| import json |
| import argparse |
| from collections import Counter |
| from pathlib import Path |
|
|
| PROJECT_ROOT = Path(__file__).resolve().parent.parent |
| RESULTS_DIR = PROJECT_ROOT / "results" |
|
|
|
|
| def load_run(tag): |
| d = RESULTS_DIR / tag |
| with open(d / "raw_results.json") as f: |
| results = json.load(f) |
| with open(d / "meta.json") as f: |
| meta = json.load(f) |
| return meta, results |
|
|
|
|
| def summarize(results): |
| valid = [r for r in results if "score" in r] |
| if not valid: |
| return {} |
| scores = [r["score"] for r in valid] |
| actions = [r["action"] for r in valid] |
| n = len(valid) |
| return { |
| "n": n, |
| "avg": round(sum(scores) / n, 1), |
| "positive": actions.count("positive"), |
| "neutral": actions.count("neutral"), |
| "negative": actions.count("negative"), |
| "pos_pct": round(100 * actions.count("positive") / n), |
| "attractions": Counter(a for r in valid for a in r.get("attractions", [])).most_common(5), |
| "concerns": Counter(c for r in valid for c in r.get("concerns", [])).most_common(5), |
| } |
|
|
|
|
| def main(): |
| parser = argparse.ArgumentParser() |
| parser.add_argument("--runs", nargs="*", default=None) |
| args = parser.parse_args() |
|
|
| if args.runs: |
| tags = args.runs |
| else: |
| tags = sorted(d.name for d in RESULTS_DIR.iterdir() |
| if d.is_dir() and (d / "meta.json").exists()) |
|
|
| if not tags: |
| print("No runs found.") |
| return |
|
|
| print(f"{'='*75}") |
| print(f"COMPARISON — {len(tags)} RUNS") |
| print(f"{'='*75}\n") |
|
|
| summaries = [] |
| for tag in tags: |
| meta, results = load_run(tag) |
| s = summarize(results) |
| s["tag"] = tag |
| s["entity"] = Path(meta.get("entity", "?")).name |
| s["date"] = meta.get("timestamp", "?")[:10] |
| summaries.append(s) |
|
|
| print(f"{'Tag':<28} {'Date':<12} {'Entity':<22} {'Avg':>5} {'✅':>5} {'🤔':>5} {'❌':>5}") |
| print("-" * 85) |
| for s in summaries: |
| print(f"{s['tag']:<28} {s['date']:<12} {s['entity']:<22} " |
| f"{s['avg']:>5.1f} {s['positive']:>4} {s['neutral']:>4} {s['negative']:>4}") |
|
|
| if len(summaries) >= 2: |
| prev, curr = summaries[-2], summaries[-1] |
| delta = curr["avg"] - prev["avg"] |
| arrow = "↑" if delta > 0 else "↓" if delta < 0 else "→" |
| print(f"\nDelta ({prev['tag']} → {curr['tag']}): {arrow} {delta:+.1f}") |
|
|
| prev_a = set(a for a, _ in prev.get("attractions", [])) |
| curr_a = set(a for a, _ in curr.get("attractions", [])) |
| if curr_a - prev_a: |
| print(f" New attractions: {curr_a - prev_a}") |
| if prev_a - curr_a: |
| print(f" Lost attractions: {prev_a - curr_a}") |
|
|
| prev_c = set(c for c, _ in prev.get("concerns", [])) |
| curr_c = set(c for c, _ in curr.get("concerns", [])) |
| if curr_c - prev_c: |
| print(f" New concerns: {curr_c - prev_c}") |
| if prev_c - curr_c: |
| print(f" Resolved concerns: {prev_c - curr_c}") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|