pi05tests-openpi-multiarm / openpi /scripts /upload_stepcmp_bundle_to_hf.py
lsnu's picture
Add files using upload-large-folder tool
aea1653 verified
#!/usr/bin/env python3
from __future__ import annotations
import argparse
import os
from pathlib import Path
from huggingface_hub import HfApi
PATTERN_SETS = {
"meta": [
"README.md",
"run_logs/hf_upload_20260310.log",
"artifacts/twin_dual_push_128_stepcmp_2k_20260311/**",
"artifacts/twin_dual_push_128_stepcmp_2k_20260311_debug/**",
"artifacts/twin_split_expert_bringup_20260310/**",
"openpi/run_logs/*.log",
"openpi/scripts/check_parallel_warmstart_equivalence.py",
"openpi/scripts/check_split_expert_invariants.py",
"openpi/scripts/eval_twin_val_loss_pytorch.py",
"openpi/scripts/init_parallel_pi05_from_single_pytorch.py",
"openpi/scripts/prune_stepcmp_checkpoints.py",
"openpi/scripts/run_twin_dual_push_128_packed_5k.sh",
"openpi/scripts/run_twin_dual_push_128_stepcmp_2k.sh",
"openpi/scripts/run_twin_handover_packed_10k.sh",
"openpi/scripts/train_pytorch.py",
"openpi/scripts/collect_twin_dual_push_128_stepcmp_metrics.py",
"openpi/scripts/upload_stepcmp_bundle_to_hf.py",
"openpi/src/openpi/models/pi0_config.py",
"openpi/src/openpi/models/utils/fsq_tokenizer.py",
"openpi/src/openpi/models_pytorch/gemma_pytorch.py",
"openpi/src/openpi/models_pytorch/pi0_pytorch.py",
"openpi/src/openpi/training/config.py",
"openpi/src/openpi/training/data_loader.py",
],
"debug_smoke_ckpts": [
"openpi/checkpoints/debug_pi05_split_communicating_pytorch_smoke/**",
"openpi/checkpoints/debug_pi05_split_independent_pytorch_smoke/**",
],
"shared_ckpt": [
"openpi/checkpoints/pi05_twin_dual_push_128_packed_baseline_pytorch_5k/dual_push_128_stepcmp_shared_2k/**",
],
"head_ckpt": [
"openpi/checkpoints/pi05_twin_dual_push_128_packed_parallel_pytorch_5k/dual_push_128_stepcmp_head_only_2k/**",
],
"split_ind_ckpt": [
"openpi/checkpoints/pi05_twin_dual_push_128_packed_split_expert_independent_pytorch_5k/dual_push_128_stepcmp_split_ind_2k/**",
],
"split_comm_ckpt": [
"openpi/checkpoints/pi05_twin_dual_push_128_packed_split_expert_communicating_pytorch_5k/dual_push_128_stepcmp_split_comm_2k/**",
],
}
DEFAULT_PROFILES = ["meta", "debug_smoke_ckpts", "shared_ckpt", "head_ckpt", "split_ind_ckpt", "split_comm_ckpt"]
def _load_token() -> str:
token = os.environ.get("HF_TOKEN")
token_file = os.environ.get("HF_TOKEN_FILE")
if not token and token_file:
path = Path(token_file)
if path.exists():
token = path.read_text().strip()
if os.environ.get("HF_TOKEN_FILE_DELETE_AFTER_READ") == "1":
path.unlink(missing_ok=True)
if not token:
raise RuntimeError("HF_TOKEN or HF_TOKEN_FILE must be set")
return token
def _resolve_patterns(profiles: list[str]) -> list[str]:
patterns: list[str] = []
for profile in profiles:
if profile not in PATTERN_SETS:
raise ValueError(f"Unknown profile: {profile}")
patterns.extend(PATTERN_SETS[profile])
# Preserve order while dropping duplicates.
return list(dict.fromkeys(patterns))
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--repo-id", default="lsnu/pi05tests-openpi-multiarm")
parser.add_argument("--folder-path", default="/workspace/pi05tests")
parser.add_argument("--repo-type", default="model")
parser.add_argument("--num-workers", type=int, default=8)
parser.add_argument("--profile", action="append", choices=sorted(PATTERN_SETS), help="Upload a named subset. May be repeated.")
args = parser.parse_args()
token = _load_token()
folder_path = Path(args.folder_path).resolve()
if not folder_path.is_dir():
raise FileNotFoundError(folder_path)
profiles = args.profile or DEFAULT_PROFILES
patterns = _resolve_patterns(profiles)
api = HfApi(token=token)
print(
f"upload_large_folder repo_id={args.repo_id} repo_type={args.repo_type} "
f"folder_path={folder_path} profiles={','.join(profiles)}"
)
for pattern in patterns:
print(f"allow_pattern={pattern}")
api.upload_large_folder(
repo_id=args.repo_id,
folder_path=folder_path,
repo_type=args.repo_type,
allow_patterns=patterns,
num_workers=args.num_workers,
print_report=True,
)
if __name__ == "__main__":
main()