ane-kan-runtime / scripts /package_hf_artifacts.py
JohnGenetica's picture
Deploy ANE KAN runtime Space
201cf4d verified
#!/usr/bin/env python3
"""Build a clean Hugging Face artifact bundle from current repo outputs."""
from __future__ import annotations
import argparse
import json
import shutil
import sys
from pathlib import Path
from typing import Iterable, List
ROOT = Path(__file__).resolve().parent.parent
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
from scripts.hf_training_contract import ARTIFACT_ROOT_DEFAULT, build_artifact_layout
from scripts.generate_defense_packet import _latest_summary
RESULTS_ROOT = ROOT / "training" / "kan_bench_results"
DEFAULT_DOCS = [
ROOT / "docs" / "defense_packet.md",
ROOT / "docs" / "defense_packet.json",
ROOT / "docs" / "patent_evidence_packet.md",
ROOT / "docs" / "patents" / "CROSS_REFERENCE_MATRIX.md",
ROOT / "docs" / "patents" / "REPO_PATENT_INVENTORY.md",
]
def _copy_many(paths: Iterable[Path], dest: Path, keep_parent: bool = False) -> List[str]:
copied: List[str] = []
dest.mkdir(parents=True, exist_ok=True)
for path in paths:
if not path.exists():
continue
filename = f"{path.parent.name}__{path.name}" if keep_parent else path.name
target = dest / filename
shutil.copy2(path, target)
copied.append(str(target.resolve().relative_to(ROOT.resolve())))
return copied
def _latest_verification_summaries() -> List[Path]:
out: List[Path] = []
for prefix in ["smoke", "benchmark", "benchmark-matrix", "ablation", "patent-evidence"]:
latest = _latest_summary(prefix)
if latest is not None:
rel, _ = latest
out.append(ROOT / rel)
return out
def build_bundle(root: str = ARTIFACT_ROOT_DEFAULT) -> dict:
layout = build_artifact_layout(root).ensure()
if layout.root.exists():
shutil.rmtree(layout.root)
layout = build_artifact_layout(root).ensure()
docs_dir = layout.root / "docs"
verify_dir = layout.root / "verification"
result_files = [
RESULTS_ROOT / "text2cypher_v4_results.json",
RESULTS_ROOT / "spider2_v2_results.json",
RESULTS_ROOT / "unified_dialect_results.json",
RESULTS_ROOT / "swebench_results.json",
RESULTS_ROOT / "sota_comparison_table.json",
]
checkpoint_files = [
RESULTS_ROOT / "sota_text2cypher_checkpoint.pt",
RESULTS_ROOT / "sota_spider2_checkpoint.pt",
RESULTS_ROOT / "sota_unified_checkpoint.pt",
RESULTS_ROOT / "sota_swebench_checkpoint.pt",
RESULTS_ROOT / "sota_gaia_checkpoint.pt",
]
copied = {
"results": _copy_many(result_files, layout.results_dir),
"checkpoints": _copy_many(checkpoint_files, layout.checkpoints_dir),
"docs": _copy_many(DEFAULT_DOCS, docs_dir),
"verification": _copy_many(_latest_verification_summaries(), verify_dir, keep_parent=True),
}
manifest = {
"artifact_root": str(layout.root.resolve().relative_to(ROOT.resolve())),
"results": copied["results"],
"checkpoints": copied["checkpoints"],
"docs": copied["docs"],
"verification": copied["verification"],
}
manifest_path = layout.root / "manifest.json"
manifest_path.write_text(json.dumps(manifest, indent=2))
copied["manifest"] = [str(manifest_path.resolve().relative_to(ROOT.resolve()))]
return copied
def main() -> int:
parser = argparse.ArgumentParser(description="Package current repo artifacts for HF upload")
parser.add_argument("--artifact-root", default=ARTIFACT_ROOT_DEFAULT)
args = parser.parse_args()
payload = build_bundle(args.artifact_root)
print(json.dumps(payload, indent=2))
return 0
if __name__ == "__main__":
raise SystemExit(main())