#!/usr/bin/env python3 """Build a clean Hugging Face artifact bundle from current repo outputs.""" from __future__ import annotations import argparse import json import shutil import sys from pathlib import Path from typing import Iterable, List ROOT = Path(__file__).resolve().parent.parent if str(ROOT) not in sys.path: sys.path.insert(0, str(ROOT)) from scripts.hf_training_contract import ARTIFACT_ROOT_DEFAULT, build_artifact_layout from scripts.generate_defense_packet import _latest_summary RESULTS_ROOT = ROOT / "training" / "kan_bench_results" DEFAULT_DOCS = [ ROOT / "docs" / "defense_packet.md", ROOT / "docs" / "defense_packet.json", ROOT / "docs" / "patent_evidence_packet.md", ROOT / "docs" / "patents" / "CROSS_REFERENCE_MATRIX.md", ROOT / "docs" / "patents" / "REPO_PATENT_INVENTORY.md", ] def _copy_many(paths: Iterable[Path], dest: Path, keep_parent: bool = False) -> List[str]: copied: List[str] = [] dest.mkdir(parents=True, exist_ok=True) for path in paths: if not path.exists(): continue filename = f"{path.parent.name}__{path.name}" if keep_parent else path.name target = dest / filename shutil.copy2(path, target) copied.append(str(target.resolve().relative_to(ROOT.resolve()))) return copied def _latest_verification_summaries() -> List[Path]: out: List[Path] = [] for prefix in ["smoke", "benchmark", "benchmark-matrix", "ablation", "patent-evidence"]: latest = _latest_summary(prefix) if latest is not None: rel, _ = latest out.append(ROOT / rel) return out def build_bundle(root: str = ARTIFACT_ROOT_DEFAULT) -> dict: layout = build_artifact_layout(root).ensure() if layout.root.exists(): shutil.rmtree(layout.root) layout = build_artifact_layout(root).ensure() docs_dir = layout.root / "docs" verify_dir = layout.root / "verification" result_files = [ RESULTS_ROOT / "text2cypher_v4_results.json", RESULTS_ROOT / "spider2_v2_results.json", RESULTS_ROOT / "unified_dialect_results.json", RESULTS_ROOT / "swebench_results.json", RESULTS_ROOT / "sota_comparison_table.json", ] checkpoint_files = [ RESULTS_ROOT / "sota_text2cypher_checkpoint.pt", RESULTS_ROOT / "sota_spider2_checkpoint.pt", RESULTS_ROOT / "sota_unified_checkpoint.pt", RESULTS_ROOT / "sota_swebench_checkpoint.pt", RESULTS_ROOT / "sota_gaia_checkpoint.pt", ] copied = { "results": _copy_many(result_files, layout.results_dir), "checkpoints": _copy_many(checkpoint_files, layout.checkpoints_dir), "docs": _copy_many(DEFAULT_DOCS, docs_dir), "verification": _copy_many(_latest_verification_summaries(), verify_dir, keep_parent=True), } manifest = { "artifact_root": str(layout.root.resolve().relative_to(ROOT.resolve())), "results": copied["results"], "checkpoints": copied["checkpoints"], "docs": copied["docs"], "verification": copied["verification"], } manifest_path = layout.root / "manifest.json" manifest_path.write_text(json.dumps(manifest, indent=2)) copied["manifest"] = [str(manifest_path.resolve().relative_to(ROOT.resolve()))] return copied def main() -> int: parser = argparse.ArgumentParser(description="Package current repo artifacts for HF upload") parser.add_argument("--artifact-root", default=ARTIFACT_ROOT_DEFAULT) args = parser.parse_args() payload = build_bundle(args.artifact_root) print(json.dumps(payload, indent=2)) return 0 if __name__ == "__main__": raise SystemExit(main())