File size: 4,621 Bytes
fc329a3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 | """Generate paper-style summary tables from saved benchmark results."""
import argparse
import json
from pathlib import Path
METHOD_ORDER = [
"global",
"partition",
"twostage",
"fullcp",
"jackknife_plus",
"oneshot",
"trainres",
"weighted",
"oracle",
]
METHOD_LABELS = {
"global": "Global",
"partition": "Mondrian",
"twostage": "TwoStage",
"fullcp": "FullCP",
"jackknife_plus": "Jackknife+",
"oneshot": "OneShot",
"trainres": "TrainRes",
"weighted": "Weighted",
"oracle": "Oracle",
}
SYNTHETIC_SPECS = [
("d1_homogeneous.json", "D1 Homogeneous"),
("d2_pure_scale.json", "D2 Pure scale"),
("d3_discrete_groups_aligned.json", "D3 Discrete aligned"),
("d4_model_bias.json", "D4 Bias"),
("d5_heavy_tail.json", "D5 Heavy tail"),
("d6_high_k.json", "D6† High-K"),
]
REAL_SPECS = [
("exp2_2_softmax_cifar10_strata_entropy_fixed.json", "CIFAR-10"),
("exp2_3_hyperspectral_samson_nmf_all_methods.json", "Samson"),
("exp2_5_topics_K10_all_methods.json", "Topics"),
("exp2_6_affective_text.json", "AffectiveText"),
("exp2_4_age_ldl_K10_image_knn_main.json", "UTKFace"),
("real_bulk_deconv.json", "PBMC"),
]
EXTRA_FILES = {
"D1 Homogeneous": ["d1_homogeneous_exact.json"],
"D3 Discrete aligned": ["d3_discrete_groups_aux.json"],
"D5 Heavy tail": ["d5_heavy_tail_aux.json"],
"D6† High-K": ["d6_high_k_aux.json", "d6_high_k_exact_appendix.json"],
"PBMC": ["real_bulk_deconv_fullcp.json", "real_bulk_deconv_aux.json", "real_bulk_deconv_trainres.json"],
"UTKFace": ["exp2_4_age_ldl_K10_image_knn_fullcp_2k.json"],
}
def load_json(path: Path) -> dict:
with open(path) as f:
return json.load(f)
def extract_summary(data: dict) -> dict:
if "summary" in data:
return data["summary"]
if "aggregated" in data:
return data["aggregated"]
raise KeyError("Missing summary/aggregated block")
def metric_cell(summary: dict, method: str) -> str:
if method not in summary:
return "--"
cov = summary[method]["marginal_coverage"]["mean"]
disp = summary[method]["max_disparity"]["mean"]
worst = summary[method]["worst_stratum_coverage"]["mean"] if "worst_stratum_coverage" in summary[method] else None
if worst is None and "stratified_coverage" in summary[method]:
worst = min(v["mean"] for v in summary[method]["stratified_coverage"].values())
radius = summary[method]["mean_radius"]["mean"] if "mean_radius" in summary[method] else None
if radius is None:
return f"{cov:.3f} / {disp:.3f} / {worst:.3f}"
return f"{cov:.3f} / {disp:.3f} / {worst:.3f} / {radius:.3f}"
def write_markdown_table(out_path: Path, title: str, specs: list[tuple[str, str]], results_dir: Path, extras: dict[str, list[str]] | None = None):
rows = []
for filename, label in specs:
path = results_dir / filename
if not path.exists():
continue
summary = extract_summary(load_json(path))
if extras and label in extras:
for extra_name in extras[label]:
extra_path = results_dir / extra_name
if extra_path.exists():
summary = {**summary, **extract_summary(load_json(extra_path))}
row = [label]
for method in METHOD_ORDER:
row.append(metric_cell(summary, method))
rows.append(row)
headers = ["Task"] + [METHOD_LABELS[m] for m in METHOD_ORDER]
lines = [f"# {title}", "", "Cells report `coverage / disparity / worst-stratum / radius`.", ""]
lines.append("| " + " | ".join(headers) + " |")
lines.append("|" + "|".join(["---"] * len(headers)) + "|")
for row in rows:
lines.append("| " + " | ".join(row) + " |")
out_path.write_text("\n".join(lines) + "\n")
print(f"Saved {out_path}")
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--results-dir", default="results/tables")
parser.add_argument("--output-dir", default="results/tables")
args = parser.parse_args()
results_dir = Path(args.results_dir)
output_dir = Path(args.output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
write_markdown_table(
output_dir / "paper_table_synthetic_summary.md",
"Synthetic Summary",
SYNTHETIC_SPECS,
results_dir,
extras=EXTRA_FILES,
)
write_markdown_table(
output_dir / "paper_table_real_summary.md",
"Real-Data Summary",
REAL_SPECS,
results_dir,
extras=EXTRA_FILES,
)
if __name__ == "__main__":
main()
|