| |
| """Package + upload the MapGS/MapTokenGS bundle to a PRIVATE HF repo. |
| |
| Code + checkpoints + README upload directly. The ~98GB of processed clips are |
| ~390k small files (exceeds HF file limits) so they are tarred into <=SHARD_GB |
| shards and uploaded one at a time, deleting each tar after upload to stay within |
| local free disk. Token is read from $HF_TOKEN (never hard-coded).""" |
| import os, glob, tarfile, sys |
| from huggingface_hub import HfApi, create_repo |
|
|
| TOKEN = os.environ["HF_TOKEN"] |
| REPO = os.environ.get("HF_REPO", "ChenmingWu/mapgs-maptokengs") |
| ROOT = "/mnt/william" |
| STAGE = "/mnt/william/_upload" |
| SHARD_GB = 18.0 |
| api = HfApi(token=TOKEN) |
|
|
|
|
| def log(*a): |
| print(*a, flush=True) |
|
|
|
|
| def dir_size(d): |
| s = 0 |
| for r, _, fs in os.walk(d): |
| for f in fs: |
| try: s += os.path.getsize(os.path.join(r, f)) |
| except OSError: pass |
| return s |
|
|
|
|
| def main(): |
| create_repo(REPO, private=True, repo_type="model", exist_ok=True, token=TOKEN) |
| log(f"repo ready (private): {REPO}") |
|
|
| |
| api.upload_file(path_or_fileobj=f"{ROOT}/README_HF.md", path_in_repo="README.md", |
| repo_id=REPO, repo_type="model") |
| log("uploaded README.md") |
|
|
| |
| api.upload_folder(folder_path=ROOT, repo_id=REPO, repo_type="model", path_in_repo=".", |
| allow_patterns=["mapgs/**", "maptokengs/**", "scripts/**", "_tokengs_repo/**"], |
| ignore_patterns=["**/__pycache__/**", "**/*.pyc", "**/.git/**", "**/*.tar", |
| "_tokengs_repo/assets/**", "**/*.gif"]) |
| log("uploaded code (mapgs, maptokengs, scripts, _tokengs_repo)") |
|
|
| |
| cks = [p for p in sorted(glob.glob(f"{ROOT}/runs/*.safetensors")) |
| if "maptokengs_fixed" not in os.path.basename(p)] |
| for p in cks: |
| api.upload_file(path_or_fileobj=p, path_in_repo=f"runs/{os.path.basename(p)}", |
| repo_id=REPO, repo_type="model") |
| log("uploaded ckpt", os.path.basename(p)) |
|
|
| |
| os.makedirs(STAGE, exist_ok=True) |
| clip_dirs = sorted(d for d in glob.glob(f"{ROOT}/data/unified/*/*/*") if os.path.isdir(d)) |
| log(f"clip dirs: {len(clip_dirs)} — computing sizes / packing into shards") |
| shards, cur, cur_sz, cap = [], [], 0, SHARD_GB * 1e9 |
| for d in clip_dirs: |
| sz = dir_size(d) |
| if cur and cur_sz + sz > cap: |
| shards.append(cur); cur, cur_sz = [], 0 |
| cur.append(d); cur_sz += sz |
| if cur: shards.append(cur) |
| log(f"{len(shards)} data shards") |
|
|
| for i, dirs in enumerate(shards): |
| tarp = f"{STAGE}/data_shard_{i:03d}.tar" |
| with tarfile.open(tarp, "w") as tf: |
| for d in dirs: |
| tf.add(d, arcname=os.path.relpath(d, ROOT)) |
| gb = os.path.getsize(tarp) / 1e9 |
| api.upload_file(path_or_fileobj=tarp, path_in_repo=f"data/data_shard_{i:03d}.tar", |
| repo_id=REPO, repo_type="model") |
| os.remove(tarp) |
| log(f"uploaded data_shard_{i:03d}.tar ({gb:.1f} GB, {len(dirs)} clips) [{i+1}/{len(shards)}]") |
|
|
| log("=== UPLOAD COMPLETE ===") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|