File size: 3,246 Bytes
9415028 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 | """
Cross-run comparison — track how changes to θ affect scores over time.
Usage:
uv run python scripts/compare.py
uv run python scripts/compare.py --runs baseline v2_with_freetier
"""
import json
import argparse
from collections import Counter
from pathlib import Path
PROJECT_ROOT = Path(__file__).resolve().parent.parent
RESULTS_DIR = PROJECT_ROOT / "results"
def load_run(tag):
d = RESULTS_DIR / tag
with open(d / "raw_results.json") as f:
results = json.load(f)
with open(d / "meta.json") as f:
meta = json.load(f)
return meta, results
def summarize(results):
valid = [r for r in results if "score" in r]
if not valid:
return {}
scores = [r["score"] for r in valid]
actions = [r["action"] for r in valid]
n = len(valid)
return {
"n": n,
"avg": round(sum(scores) / n, 1),
"positive": actions.count("positive"),
"neutral": actions.count("neutral"),
"negative": actions.count("negative"),
"pos_pct": round(100 * actions.count("positive") / n),
"attractions": Counter(a for r in valid for a in r.get("attractions", [])).most_common(5),
"concerns": Counter(c for r in valid for c in r.get("concerns", [])).most_common(5),
}
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--runs", nargs="*", default=None)
args = parser.parse_args()
if args.runs:
tags = args.runs
else:
tags = sorted(d.name for d in RESULTS_DIR.iterdir()
if d.is_dir() and (d / "meta.json").exists())
if not tags:
print("No runs found.")
return
print(f"{'='*75}")
print(f"COMPARISON — {len(tags)} RUNS")
print(f"{'='*75}\n")
summaries = []
for tag in tags:
meta, results = load_run(tag)
s = summarize(results)
s["tag"] = tag
s["entity"] = Path(meta.get("entity", "?")).name
s["date"] = meta.get("timestamp", "?")[:10]
summaries.append(s)
print(f"{'Tag':<28} {'Date':<12} {'Entity':<22} {'Avg':>5} {'✅':>5} {'🤔':>5} {'❌':>5}")
print("-" * 85)
for s in summaries:
print(f"{s['tag']:<28} {s['date']:<12} {s['entity']:<22} "
f"{s['avg']:>5.1f} {s['positive']:>4} {s['neutral']:>4} {s['negative']:>4}")
if len(summaries) >= 2:
prev, curr = summaries[-2], summaries[-1]
delta = curr["avg"] - prev["avg"]
arrow = "↑" if delta > 0 else "↓" if delta < 0 else "→"
print(f"\nDelta ({prev['tag']} → {curr['tag']}): {arrow} {delta:+.1f}")
prev_a = set(a for a, _ in prev.get("attractions", []))
curr_a = set(a for a, _ in curr.get("attractions", []))
if curr_a - prev_a:
print(f" New attractions: {curr_a - prev_a}")
if prev_a - curr_a:
print(f" Lost attractions: {prev_a - curr_a}")
prev_c = set(c for c, _ in prev.get("concerns", []))
curr_c = set(c for c, _ in curr.get("concerns", []))
if curr_c - prev_c:
print(f" New concerns: {curr_c - prev_c}")
if prev_c - curr_c:
print(f" Resolved concerns: {prev_c - curr_c}")
if __name__ == "__main__":
main()
|