recap-t2i-evaluation-code-2026 / eval_code /scripts /pack_recap_ed_metrics.py
Authors
Initial anonymous NeurIPS 2026 E&D code and results release
7f59fb7 verified
#!/usr/bin/env python3
"""Pack small recap E&D metric artifacts into a release-friendly directory."""
from __future__ import annotations
import argparse
import csv
import json
import shutil
from pathlib import Path
from typing import Any
ROOT = Path("<PROJECT_ROOT>")
NVME = Path("<LOCAL_CACHE>")
EMBEDDING_RUNS = [
("Qwen3-Embedding-4B", "ours", "qwen3-embedding-4b/datacomp_ours_50k"),
("Qwen3-Embedding-4B", "ref", "qwen3-embedding-4b/datacomp_ref_llava15_50k"),
("Qwen3-Embedding-8B", "ours", "qwen3-embedding-8b/datacomp_ours_50k"),
("Qwen3-Embedding-8B", "ref", "qwen3-embedding-8b/datacomp_ref_llava15_50k"),
("E5-Mistral-7B", "ours", "e5-mistral-7b-instruct/datacomp_ours_50k"),
("E5-Mistral-7B", "ref", "e5-mistral-7b-instruct/datacomp_ref_llava15_50k"),
("BGE-M3-official", "ours", "bge-m3-official/datacomp_ours_50k"),
("BGE-M3-official", "ref", "bge-m3-official/datacomp_ref_llava15_50k"),
]
SUPPORT_RUNS = [
("Qwen3-Embedding-4B raw/raw", "ours", "qwen3-embedding-4b/2026-04-25/diffusiondb_raw_to_ours_50k.support.json"),
("Qwen3-Embedding-4B raw/raw", "ref", "qwen3-embedding-4b/2026-04-25/diffusiondb_raw_to_ref_50k.support.json"),
("Qwen3-Embedding-4B query/doc", "ours", "qwen3-embedding-4b/2026-04-25/diffusiondb_query_to_ours_50k.support.json"),
("Qwen3-Embedding-4B query/doc", "ref", "qwen3-embedding-4b/2026-04-25/diffusiondb_query_to_ref_50k.support.json"),
("E5-Mistral raw/raw", "ours", "e5-mistral-7b-instruct/2026-04-25/diffusiondb_raw_to_ours_50k.support.json"),
("E5-Mistral raw/raw", "ref", "e5-mistral-7b-instruct/2026-04-25/diffusiondb_raw_to_ref_50k.support.json"),
("E5-Mistral query/doc", "ours", "e5-mistral-7b-instruct/2026-04-25/diffusiondb_query_to_ours_50k.support.json"),
("E5-Mistral query/doc", "ref", "e5-mistral-7b-instruct/2026-04-25/diffusiondb_query_to_ref_50k.support.json"),
("BGE-M3 raw/corpus", "ours", "bge-m3-official/2026-04-25/diffusiondb_raw_to_ours_50k.support.json"),
("BGE-M3 raw/corpus", "ref", "bge-m3-official/2026-04-25/diffusiondb_raw_to_ref_50k.support.json"),
("BGE-M3 query/corpus", "ours", "bge-m3-official/2026-04-25/diffusiondb_query_to_ours_50k.support.json"),
("BGE-M3 query/corpus", "ref", "bge-m3-official/2026-04-25/diffusiondb_query_to_ref_50k.support.json"),
]
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--output-dir", default="artifacts/recap-ed/metrics-2026-04-25")
return parser.parse_args()
def load_json(path: Path) -> dict[str, Any]:
with path.open("r", encoding="utf-8") as handle:
return json.load(handle)
def write_tsv(path: Path, rows: list[dict[str, Any]], fields: list[str]) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
with path.open("w", encoding="utf-8", newline="") as handle:
writer = csv.DictWriter(handle, fields, delimiter="\t")
writer.writeheader()
writer.writerows(rows)
def rel_or_abs(path: Path) -> str:
try:
return str(path.relative_to(ROOT))
except ValueError:
return str(path)
def pack_embedding(out_dir: Path, manifest: dict[str, Any]) -> None:
rows: list[dict[str, Any]] = []
for encoder, surface, rel in EMBEDDING_RUNS:
base = NVME / "caption-embeddings" / rel
vendi_path = base / "vendi_partition_b4096_seed0.json"
rel_path = Path(rel)
geometry_path = NVME / "caption-geometry" / rel_path.parent / f"{rel_path.name}.geometry.json"
if not geometry_path.exists():
geometry_path = base / "geometry_seed0.json"
vendi = load_json(vendi_path)
geometry = load_json(geometry_path)
geometry_metrics = geometry.get("metrics", geometry)
summary = vendi["summary"]["vendi"]
rows.append(
{
"encoder": encoder,
"surface": surface,
"rows": vendi.get("source_rows"),
"vendi_mean": f"{summary['mean']:.6f}",
"vendi_ci95_low": f"{summary['ci95_low']:.6f}",
"vendi_ci95_high": f"{summary['ci95_high']:.6f}",
"cov_effective_rank": f"{geometry_metrics.get('cov_effective_rank', 0):.6f}",
"cov_participation_ratio": f"{geometry_metrics.get('cov_participation_ratio', 0):.6f}",
"cov_top1_mass": f"{geometry_metrics.get('cov_top1_mass', 0):.6f}",
}
)
manifest["sources"].append(rel_or_abs(vendi_path))
manifest["sources"].append(rel_or_abs(geometry_path))
write_tsv(
out_dir / "embedding" / "caption_embedding_profile.tsv",
rows,
[
"encoder",
"surface",
"rows",
"vendi_mean",
"vendi_ci95_low",
"vendi_ci95_high",
"cov_effective_rank",
"cov_participation_ratio",
"cov_top1_mass",
],
)
def pack_support(out_dir: Path, manifest: dict[str, Any]) -> None:
rows: list[dict[str, Any]] = []
for protocol, surface, rel in SUPPORT_RUNS:
path = NVME / "prompt-caption-support" / rel
data = load_json(path)
metrics = data["metrics"]
rows.append(
{
"protocol": protocol,
"surface": surface,
"prompt_rows": data.get("query_rows"),
"caption_rows": data.get("gallery_rows"),
"k": data.get("k"),
"coverage": f"{metrics['coverage']:.6f}",
"density": f"{metrics['density']:.6f}",
"nn_cosine_mean": f"{metrics['nn_cosine_mean']:.6f}",
"nn_distance_p95": f"{metrics['nn_distance_p95']:.6f}",
}
)
manifest["sources"].append(rel_or_abs(path))
write_tsv(
out_dir / "embedding" / "prompt_caption_support.tsv",
rows,
[
"protocol",
"surface",
"prompt_rows",
"caption_rows",
"k",
"coverage",
"density",
"nn_cosine_mean",
"nn_distance_p95",
],
)
def pack_cpu(out_dir: Path, manifest: dict[str, Any]) -> None:
cpu_dir = out_dir / "cpu"
cpu_dir.mkdir(parents=True, exist_ok=True)
small_files = [
ROOT / "artifacts/caption-survey/cpu_remaining_2026-04-24/paired_delta_ci.tsv",
NVME / "caption-survey/local_long_1m.json",
NVME / "caption-survey/hf_manifest_1m.json",
]
for src in small_files:
dst = cpu_dir / src.name
shutil.copy2(src, dst)
manifest["sources"].append(rel_or_abs(src))
manifest["packed_files"].append(rel_or_abs(dst))
def write_readme(out_dir: Path) -> None:
readme = """# Recap E&D Metric Artifacts
Date: 2026-04-25
This directory contains small, paper-facing metric artifacts for the recap E&D draft.
Large intermediate embedding arrays, VLM response JSONL files, and source image data are
not copied here. The manifest records local source paths for reproducibility.
Contents:
- `cpu/paired_delta_ci.tsv`: paired CPU lexical/surface metric deltas with CIs.
- `cpu/local_long_1m.json`: local long-caption corpus survey summaries.
- `cpu/hf_manifest_1m.json`: public-reference corpus survey summaries.
- `cbu/claimed_cbu_ci.tsv`: caption-level bootstrap CIs for claimed CBU density.
- `cbu/grounded_cbu_ci.tsv`: caption-level bootstrap CIs for exact-unit grounded CBU audits.
- `cbu/grounded_cbu_category_ci.tsv`: category-level grounded CBU audit CIs.
- `embedding/caption_embedding_profile.tsv`: Vendi and covariance-geometry profiles.
- `embedding/prompt_caption_support.tsv`: PRDC-style prompt-in-caption support metrics.
Boundary:
- Text-only metrics describe caption/supervision structure.
- `GroundedCBU` is a sampled VLM proxy audit, not a human-certified faithfulness score.
- Embedding metrics are encoder-sensitive and should be reported as profiles, not a single scalar quality score.
"""
(out_dir / "README.md").write_text(readme, encoding="utf-8")
def main() -> int:
args = parse_args()
out_dir = Path(args.output_dir)
out_dir.mkdir(parents=True, exist_ok=True)
manifest: dict[str, Any] = {
"date": "2026-04-25",
"purpose": "paper-facing recap E&D metric artifact bundle",
"sources": [],
"packed_files": [],
}
pack_cpu(out_dir, manifest)
pack_embedding(out_dir, manifest)
pack_support(out_dir, manifest)
write_readme(out_dir)
manifest["packed_files"].extend(
rel_or_abs(path)
for path in sorted(out_dir.rglob("*"))
if path.is_file() and path.name != "manifest.json"
)
(out_dir / "manifest.json").write_text(json.dumps(manifest, indent=2), encoding="utf-8")
print(json.dumps({"output_dir": str(out_dir), "files": len(manifest["packed_files"])}, indent=2))
return 0
if __name__ == "__main__":
raise SystemExit(main())