simplexuq-code / scripts /make_tables.py

Initial anonymous code release

fc329a3 verified 25 days ago

4.62 kB

	"""Generate paper-style summary tables from saved benchmark results."""
	import argparse
	import json
	from pathlib import Path

	METHOD_ORDER = [
	"global",
	"partition",
	"twostage",
	"fullcp",
	"jackknife_plus",
	"oneshot",
	"trainres",
	"weighted",
	"oracle",
	]

	METHOD_LABELS = {
	"global": "Global",
	"partition": "Mondrian",
	"twostage": "TwoStage",
	"fullcp": "FullCP",
	"jackknife_plus": "Jackknife+",
	"oneshot": "OneShot",
	"trainres": "TrainRes",
	"weighted": "Weighted",
	"oracle": "Oracle",
	}

	SYNTHETIC_SPECS = [
	("d1_homogeneous.json", "D1 Homogeneous"),
	("d2_pure_scale.json", "D2 Pure scale"),
	("d3_discrete_groups_aligned.json", "D3 Discrete aligned"),
	("d4_model_bias.json", "D4 Bias"),
	("d5_heavy_tail.json", "D5 Heavy tail"),
	("d6_high_k.json", "D6† High-K"),
	]

	REAL_SPECS = [
	("exp2_2_softmax_cifar10_strata_entropy_fixed.json", "CIFAR-10"),
	("exp2_3_hyperspectral_samson_nmf_all_methods.json", "Samson"),
	("exp2_5_topics_K10_all_methods.json", "Topics"),
	("exp2_6_affective_text.json", "AffectiveText"),
	("exp2_4_age_ldl_K10_image_knn_main.json", "UTKFace"),
	("real_bulk_deconv.json", "PBMC"),
	]

	EXTRA_FILES = {
	"D1 Homogeneous": ["d1_homogeneous_exact.json"],
	"D3 Discrete aligned": ["d3_discrete_groups_aux.json"],
	"D5 Heavy tail": ["d5_heavy_tail_aux.json"],
	"D6† High-K": ["d6_high_k_aux.json", "d6_high_k_exact_appendix.json"],
	"PBMC": ["real_bulk_deconv_fullcp.json", "real_bulk_deconv_aux.json", "real_bulk_deconv_trainres.json"],
	"UTKFace": ["exp2_4_age_ldl_K10_image_knn_fullcp_2k.json"],
	}


	def load_json(path: Path) -> dict:
	with open(path) as f:
	return json.load(f)


	def extract_summary(data: dict) -> dict:
	if "summary" in data:
	return data["summary"]
	if "aggregated" in data:
	return data["aggregated"]
	raise KeyError("Missing summary/aggregated block")


	def metric_cell(summary: dict, method: str) -> str:
	if method not in summary:
	return "--"
	cov = summary[method]["marginal_coverage"]["mean"]
	disp = summary[method]["max_disparity"]["mean"]
	worst = summary[method]["worst_stratum_coverage"]["mean"] if "worst_stratum_coverage" in summary[method] else None
	if worst is None and "stratified_coverage" in summary[method]:
	worst = min(v["mean"] for v in summary[method]["stratified_coverage"].values())
	radius = summary[method]["mean_radius"]["mean"] if "mean_radius" in summary[method] else None
	if radius is None:
	return f"{cov:.3f} / {disp:.3f} / {worst:.3f}"
	return f"{cov:.3f} / {disp:.3f} / {worst:.3f} / {radius:.3f}"


	def write_markdown_table(out_path: Path, title: str, specs: list[tuple[str, str]], results_dir: Path, extras: dict[str, list[str]] \| None = None):
	rows = []
	for filename, label in specs:
	path = results_dir / filename
	if not path.exists():
	continue
	summary = extract_summary(load_json(path))
	if extras and label in extras:
	for extra_name in extras[label]:
	extra_path = results_dir / extra_name
	if extra_path.exists():
	summary = {summary, extract_summary(load_json(extra_path))}
	row = [label]
	for method in METHOD_ORDER:
	row.append(metric_cell(summary, method))
	rows.append(row)

	headers = ["Task"] + [METHOD_LABELS[m] for m in METHOD_ORDER]
	lines = [f"# {title}", "", "Cells report `coverage / disparity / worst-stratum / radius`.", ""]
	lines.append("\| " + " \| ".join(headers) + " \|")
	lines.append("\|" + "\|".join(["---"] * len(headers)) + "\|")
	for row in rows:
	lines.append("\| " + " \| ".join(row) + " \|")
	out_path.write_text("\n".join(lines) + "\n")
	print(f"Saved {out_path}")


	def main():
	parser = argparse.ArgumentParser()
	parser.add_argument("--results-dir", default="results/tables")
	parser.add_argument("--output-dir", default="results/tables")
	args = parser.parse_args()

	results_dir = Path(args.results_dir)
	output_dir = Path(args.output_dir)
	output_dir.mkdir(parents=True, exist_ok=True)

	write_markdown_table(
	output_dir / "paper_table_synthetic_summary.md",
	"Synthetic Summary",
	SYNTHETIC_SPECS,
	results_dir,
	extras=EXTRA_FILES,
	)
	write_markdown_table(
	output_dir / "paper_table_real_summary.md",
	"Real-Data Summary",
	REAL_SPECS,
	results_dir,
	extras=EXTRA_FILES,
	)


	if __name__ == "__main__":
	main()