# tar the run dir with the model, onnx, eval.json, and a manifest. prints the # sha256 and a scp line for the user to copy. import hashlib import json import tarfile from pathlib import Path def _sha256_file(path: Path) -> str: h = hashlib.sha256() with open(path, "rb") as f: while True: chunk = f.read(1024 * 1024) if not chunk: break h.update(chunk) return h.hexdigest() def pack(run_dir: Path, dist_dir: Path, run_id: str) -> Path: run_dir = Path(run_dir) dist_dir = Path(dist_dir) dist_dir.mkdir(parents=True, exist_ok=True) # whitelist what we ship. excludes optimizer state, training_args, etc. include = [ run_dir / "model", run_dir / "onnx", run_dir / "eval.json", run_dir / "training_history.json", run_dir / "train_summary.json", ] manifest = {"files": []} for p in include: if not p.exists(): continue if p.is_file(): manifest["files"].append({ "path": str(p.relative_to(run_dir)), "size": p.stat().st_size, "sha256": _sha256_file(p), }) else: for child in p.rglob("*"): if child.is_file(): manifest["files"].append({ "path": str(child.relative_to(run_dir)), "size": child.stat().st_size, "sha256": _sha256_file(child), }) manifest_path = run_dir / "manifest.json" manifest_path.write_text(json.dumps(manifest, indent=2)) tarball = dist_dir / f"mumble-cleanup-{run_id}.tar.gz" print(f"[pack] writing {tarball}") with tarfile.open(tarball, "w:gz") as tar: tar.add(manifest_path, arcname="manifest.json") for p in include: if p.exists(): tar.add(p, arcname=p.relative_to(run_dir)) digest = _sha256_file(tarball) print(f"[pack] tarball sha256: {digest}") print() print("scp it down with:") print(f" vastai copy :/workspace/cleanup/{tarball} ./dist/{tarball.name}") print() print(f"verify locally with:") print(f" sha256sum dist/{tarball.name} # expect: {digest}") return tarball