#!/usr/bin/env python3 from __future__ import annotations import argparse import os from pathlib import Path from huggingface_hub import HfApi PATTERN_SETS = { "meta": [ "README.md", "run_logs/hf_upload_20260310.log", "artifacts/twin_dual_push_128_stepcmp_2k_20260311/**", "artifacts/twin_dual_push_128_stepcmp_2k_20260311_debug/**", "artifacts/twin_split_expert_bringup_20260310/**", "openpi/run_logs/*.log", "openpi/scripts/check_parallel_warmstart_equivalence.py", "openpi/scripts/check_split_expert_invariants.py", "openpi/scripts/eval_twin_val_loss_pytorch.py", "openpi/scripts/init_parallel_pi05_from_single_pytorch.py", "openpi/scripts/prune_stepcmp_checkpoints.py", "openpi/scripts/run_twin_dual_push_128_packed_5k.sh", "openpi/scripts/run_twin_dual_push_128_stepcmp_2k.sh", "openpi/scripts/run_twin_handover_packed_10k.sh", "openpi/scripts/train_pytorch.py", "openpi/scripts/collect_twin_dual_push_128_stepcmp_metrics.py", "openpi/scripts/upload_stepcmp_bundle_to_hf.py", "openpi/src/openpi/models/pi0_config.py", "openpi/src/openpi/models/utils/fsq_tokenizer.py", "openpi/src/openpi/models_pytorch/gemma_pytorch.py", "openpi/src/openpi/models_pytorch/pi0_pytorch.py", "openpi/src/openpi/training/config.py", "openpi/src/openpi/training/data_loader.py", ], "debug_smoke_ckpts": [ "openpi/checkpoints/debug_pi05_split_communicating_pytorch_smoke/**", "openpi/checkpoints/debug_pi05_split_independent_pytorch_smoke/**", ], "shared_ckpt": [ "openpi/checkpoints/pi05_twin_dual_push_128_packed_baseline_pytorch_5k/dual_push_128_stepcmp_shared_2k/**", ], "head_ckpt": [ "openpi/checkpoints/pi05_twin_dual_push_128_packed_parallel_pytorch_5k/dual_push_128_stepcmp_head_only_2k/**", ], "split_ind_ckpt": [ "openpi/checkpoints/pi05_twin_dual_push_128_packed_split_expert_independent_pytorch_5k/dual_push_128_stepcmp_split_ind_2k/**", ], "split_comm_ckpt": [ "openpi/checkpoints/pi05_twin_dual_push_128_packed_split_expert_communicating_pytorch_5k/dual_push_128_stepcmp_split_comm_2k/**", ], } DEFAULT_PROFILES = ["meta", "debug_smoke_ckpts", "shared_ckpt", "head_ckpt", "split_ind_ckpt", "split_comm_ckpt"] def _load_token() -> str: token = os.environ.get("HF_TOKEN") token_file = os.environ.get("HF_TOKEN_FILE") if not token and token_file: path = Path(token_file) if path.exists(): token = path.read_text().strip() if os.environ.get("HF_TOKEN_FILE_DELETE_AFTER_READ") == "1": path.unlink(missing_ok=True) if not token: raise RuntimeError("HF_TOKEN or HF_TOKEN_FILE must be set") return token def _resolve_patterns(profiles: list[str]) -> list[str]: patterns: list[str] = [] for profile in profiles: if profile not in PATTERN_SETS: raise ValueError(f"Unknown profile: {profile}") patterns.extend(PATTERN_SETS[profile]) # Preserve order while dropping duplicates. return list(dict.fromkeys(patterns)) def main() -> None: parser = argparse.ArgumentParser() parser.add_argument("--repo-id", default="lsnu/pi05tests-openpi-multiarm") parser.add_argument("--folder-path", default="/workspace/pi05tests") parser.add_argument("--repo-type", default="model") parser.add_argument("--num-workers", type=int, default=8) parser.add_argument("--profile", action="append", choices=sorted(PATTERN_SETS), help="Upload a named subset. May be repeated.") args = parser.parse_args() token = _load_token() folder_path = Path(args.folder_path).resolve() if not folder_path.is_dir(): raise FileNotFoundError(folder_path) profiles = args.profile or DEFAULT_PROFILES patterns = _resolve_patterns(profiles) api = HfApi(token=token) print( f"upload_large_folder repo_id={args.repo_id} repo_type={args.repo_type} " f"folder_path={folder_path} profiles={','.join(profiles)}" ) for pattern in patterns: print(f"allow_pattern={pattern}") api.upload_large_folder( repo_id=args.repo_id, folder_path=folder_path, repo_type=args.repo_type, allow_patterns=patterns, num_workers=args.num_workers, print_report=True, ) if __name__ == "__main__": main()