| | |
| |
|
| | from __future__ import annotations |
| |
|
| | import argparse |
| | import os |
| | from pathlib import Path |
| |
|
| | from huggingface_hub import HfApi |
| |
|
| |
|
| | PATTERN_SETS = { |
| | "meta": [ |
| | "README.md", |
| | "run_logs/hf_upload_20260310.log", |
| | "artifacts/twin_dual_push_128_stepcmp_2k_20260311/**", |
| | "artifacts/twin_dual_push_128_stepcmp_2k_20260311_debug/**", |
| | "artifacts/twin_split_expert_bringup_20260310/**", |
| | "openpi/run_logs/*.log", |
| | "openpi/scripts/check_parallel_warmstart_equivalence.py", |
| | "openpi/scripts/check_split_expert_invariants.py", |
| | "openpi/scripts/eval_twin_val_loss_pytorch.py", |
| | "openpi/scripts/init_parallel_pi05_from_single_pytorch.py", |
| | "openpi/scripts/prune_stepcmp_checkpoints.py", |
| | "openpi/scripts/run_twin_dual_push_128_packed_5k.sh", |
| | "openpi/scripts/run_twin_dual_push_128_stepcmp_2k.sh", |
| | "openpi/scripts/run_twin_handover_packed_10k.sh", |
| | "openpi/scripts/train_pytorch.py", |
| | "openpi/scripts/collect_twin_dual_push_128_stepcmp_metrics.py", |
| | "openpi/scripts/upload_stepcmp_bundle_to_hf.py", |
| | "openpi/src/openpi/models/pi0_config.py", |
| | "openpi/src/openpi/models/utils/fsq_tokenizer.py", |
| | "openpi/src/openpi/models_pytorch/gemma_pytorch.py", |
| | "openpi/src/openpi/models_pytorch/pi0_pytorch.py", |
| | "openpi/src/openpi/training/config.py", |
| | "openpi/src/openpi/training/data_loader.py", |
| | ], |
| | "debug_smoke_ckpts": [ |
| | "openpi/checkpoints/debug_pi05_split_communicating_pytorch_smoke/**", |
| | "openpi/checkpoints/debug_pi05_split_independent_pytorch_smoke/**", |
| | ], |
| | "shared_ckpt": [ |
| | "openpi/checkpoints/pi05_twin_dual_push_128_packed_baseline_pytorch_5k/dual_push_128_stepcmp_shared_2k/**", |
| | ], |
| | "head_ckpt": [ |
| | "openpi/checkpoints/pi05_twin_dual_push_128_packed_parallel_pytorch_5k/dual_push_128_stepcmp_head_only_2k/**", |
| | ], |
| | "split_ind_ckpt": [ |
| | "openpi/checkpoints/pi05_twin_dual_push_128_packed_split_expert_independent_pytorch_5k/dual_push_128_stepcmp_split_ind_2k/**", |
| | ], |
| | "split_comm_ckpt": [ |
| | "openpi/checkpoints/pi05_twin_dual_push_128_packed_split_expert_communicating_pytorch_5k/dual_push_128_stepcmp_split_comm_2k/**", |
| | ], |
| | } |
| | DEFAULT_PROFILES = ["meta", "debug_smoke_ckpts", "shared_ckpt", "head_ckpt", "split_ind_ckpt", "split_comm_ckpt"] |
| |
|
| |
|
| | def _load_token() -> str: |
| | token = os.environ.get("HF_TOKEN") |
| | token_file = os.environ.get("HF_TOKEN_FILE") |
| | if not token and token_file: |
| | path = Path(token_file) |
| | if path.exists(): |
| | token = path.read_text().strip() |
| | if os.environ.get("HF_TOKEN_FILE_DELETE_AFTER_READ") == "1": |
| | path.unlink(missing_ok=True) |
| | if not token: |
| | raise RuntimeError("HF_TOKEN or HF_TOKEN_FILE must be set") |
| | return token |
| |
|
| |
|
| | def _resolve_patterns(profiles: list[str]) -> list[str]: |
| | patterns: list[str] = [] |
| | for profile in profiles: |
| | if profile not in PATTERN_SETS: |
| | raise ValueError(f"Unknown profile: {profile}") |
| | patterns.extend(PATTERN_SETS[profile]) |
| | |
| | return list(dict.fromkeys(patterns)) |
| |
|
| |
|
| | def main() -> None: |
| | parser = argparse.ArgumentParser() |
| | parser.add_argument("--repo-id", default="lsnu/pi05tests-openpi-multiarm") |
| | parser.add_argument("--folder-path", default="/workspace/pi05tests") |
| | parser.add_argument("--repo-type", default="model") |
| | parser.add_argument("--num-workers", type=int, default=8) |
| | parser.add_argument("--profile", action="append", choices=sorted(PATTERN_SETS), help="Upload a named subset. May be repeated.") |
| | args = parser.parse_args() |
| |
|
| | token = _load_token() |
| |
|
| | folder_path = Path(args.folder_path).resolve() |
| | if not folder_path.is_dir(): |
| | raise FileNotFoundError(folder_path) |
| |
|
| | profiles = args.profile or DEFAULT_PROFILES |
| | patterns = _resolve_patterns(profiles) |
| | api = HfApi(token=token) |
| | print( |
| | f"upload_large_folder repo_id={args.repo_id} repo_type={args.repo_type} " |
| | f"folder_path={folder_path} profiles={','.join(profiles)}" |
| | ) |
| | for pattern in patterns: |
| | print(f"allow_pattern={pattern}") |
| |
|
| | api.upload_large_folder( |
| | repo_id=args.repo_id, |
| | folder_path=folder_path, |
| | repo_type=args.repo_type, |
| | allow_patterns=patterns, |
| | num_workers=args.num_workers, |
| | print_report=True, |
| | ) |
| |
|
| |
|
| | if __name__ == "__main__": |
| | main() |
| |
|