Spaces:

xleaps
/

sgo

Running

sgo / scripts /compare.py

Eric Xu

Initial release: Semantic Gradient Optimization framework

9415028 5 days ago

3.25 kB

	"""
	Cross-run comparison — track how changes to θ affect scores over time.

	Usage:
	uv run python scripts/compare.py
	uv run python scripts/compare.py --runs baseline v2_with_freetier
	"""

	import json
	import argparse
	from collections import Counter
	from pathlib import Path

	PROJECT_ROOT = Path(__file__).resolve().parent.parent
	RESULTS_DIR = PROJECT_ROOT / "results"


	def load_run(tag):
	d = RESULTS_DIR / tag
	with open(d / "raw_results.json") as f:
	results = json.load(f)
	with open(d / "meta.json") as f:
	meta = json.load(f)
	return meta, results


	def summarize(results):
	valid = [r for r in results if "score" in r]
	if not valid:
	return {}
	scores = [r["score"] for r in valid]
	actions = [r["action"] for r in valid]
	n = len(valid)
	return {
	"n": n,
	"avg": round(sum(scores) / n, 1),
	"positive": actions.count("positive"),
	"neutral": actions.count("neutral"),
	"negative": actions.count("negative"),
	"pos_pct": round(100 * actions.count("positive") / n),
	"attractions": Counter(a for r in valid for a in r.get("attractions", [])).most_common(5),
	"concerns": Counter(c for r in valid for c in r.get("concerns", [])).most_common(5),
	}


	def main():
	parser = argparse.ArgumentParser()
	parser.add_argument("--runs", nargs="*", default=None)
	args = parser.parse_args()

	if args.runs:
	tags = args.runs
	else:
	tags = sorted(d.name for d in RESULTS_DIR.iterdir()
	if d.is_dir() and (d / "meta.json").exists())

	if not tags:
	print("No runs found.")
	return

	print(f"{'='*75}")
	print(f"COMPARISON — {len(tags)} RUNS")
	print(f"{'='*75}\n")

	summaries = []
	for tag in tags:
	meta, results = load_run(tag)
	s = summarize(results)
	s["tag"] = tag
	s["entity"] = Path(meta.get("entity", "?")).name
	s["date"] = meta.get("timestamp", "?")[:10]
	summaries.append(s)

	print(f"{'Tag':<28} {'Date':<12} {'Entity':<22} {'Avg':>5} {'✅':>5} {'🤔':>5} {'❌':>5}")
	print("-" * 85)
	for s in summaries:
	print(f"{s['tag']:<28} {s['date']:<12} {s['entity']:<22} "
	f"{s['avg']:>5.1f} {s['positive']:>4} {s['neutral']:>4} {s['negative']:>4}")

	if len(summaries) >= 2:
	prev, curr = summaries[-2], summaries[-1]
	delta = curr["avg"] - prev["avg"]
	arrow = "↑" if delta > 0 else "↓" if delta < 0 else "→"
	print(f"\nDelta ({prev['tag']} → {curr['tag']}): {arrow} {delta:+.1f}")

	prev_a = set(a for a, _ in prev.get("attractions", []))
	curr_a = set(a for a, _ in curr.get("attractions", []))
	if curr_a - prev_a:
	print(f" New attractions: {curr_a - prev_a}")
	if prev_a - curr_a:
	print(f" Lost attractions: {prev_a - curr_a}")

	prev_c = set(c for c, _ in prev.get("concerns", []))
	curr_c = set(c for c, _ in curr.get("concerns", []))
	if curr_c - prev_c:
	print(f" New concerns: {curr_c - prev_c}")
	if prev_c - curr_c:
	print(f" Resolved concerns: {prev_c - curr_c}")


	if __name__ == "__main__":
	main()