DINO-Protomorph / scripts /upload_to_hf.py
shiowo's picture
Upload ProtoMorph-DINO scaffold and random head checkpoint
63089c1 verified
#!/usr/bin/env python3
from __future__ import annotations
import argparse
import os
import sys
from pathlib import Path
from typing import Iterable, List
# Allow running from the repo root without installing the package.
ROOT = Path(__file__).resolve().parents[1]
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
from src.protomorph.hf_utils import get_hf_repo_id, get_hf_token, normalize_repo_id
DEFAULT_REPO = "shiowo/DINO-Protomorph"
REQUIRED_FILES = [
"README.md",
"checkpoints/config.json",
"checkpoints/labels.txt",
"checkpoints/protomorph_head.safetensors",
"src/protomorph/model.py",
"src/protomorph/config.py",
"infer.py",
]
IGNORE_PATTERNS = [
".git/*",
".venv/*",
"venv/*",
"env/*",
"__pycache__/*",
"**/__pycache__/*",
"*.pyc",
".ipynb_checkpoints/*",
"**/.ipynb_checkpoints/*",
".cache/*",
"hf_cache/*",
"outputs/*",
"wandb/*",
"data/*",
"datasets/*",
"*.zip",
"*.tar",
"*.tar.gz",
"*.7z",
]
def human_size(n: int) -> str:
units = ["B", "KB", "MB", "GB", "TB"]
size = float(n)
for unit in units:
if size < 1024 or unit == units[-1]:
return f"{size:.1f} {unit}" if unit != "B" else f"{int(size)} B"
size /= 1024
return f"{n} B"
def iter_upload_files(source: Path, ignore_dirs: Iterable[str]) -> List[Path]:
ignore_dir_names = set(ignore_dirs)
files: List[Path] = []
for path in source.rglob("*"):
if path.is_dir():
continue
rel = path.relative_to(source)
parts = set(rel.parts)
if parts & ignore_dir_names:
continue
if path.suffix in {".pyc", ".zip", ".7z"}:
continue
files.append(rel)
return sorted(files)
def check_required(source: Path) -> None:
missing = [rel for rel in REQUIRED_FILES if not (source / rel).exists()]
if missing:
joined = "\n - ".join(missing)
raise FileNotFoundError(f"Missing required files for HF upload:\n - {joined}")
def parse_args() -> argparse.Namespace:
p = argparse.ArgumentParser(description="Upload ProtoMorph-DINO files to a Hugging Face model repo.")
p.add_argument("--source", default=".", help="Folder to upload. Default: current project root.")
p.add_argument("--repo-id", default=None, help="HF repo id or URL. Default: env hf_repo/HF_REPO_ID, then shiowo/DINO-Protomorph.")
p.add_argument("--token", default=None, help="HF token. Default: env hf_key/HF_TOKEN/etc. Do not paste this into logs.")
p.add_argument("--revision", default="main", help="Target branch/revision. Default: main.")
p.add_argument("--private", action="store_true", help="Create repo as private if it does not exist yet.")
p.add_argument("--no-create", action="store_true", help="Do not create the repo if missing.")
p.add_argument("--dry-run", action="store_true", help="Print what would be uploaded, then exit.")
p.add_argument("--commit-message", default="Upload ProtoMorph-DINO scaffold and checkpoint", help="HF commit message.")
return p.parse_args()
def main() -> None:
args = parse_args()
source = Path(args.source).resolve()
if not source.exists() or not source.is_dir():
raise NotADirectoryError(f"Source folder does not exist: {source}")
repo_id = normalize_repo_id(args.repo_id) if args.repo_id else get_hf_repo_id(DEFAULT_REPO)
token = args.token or get_hf_token()
check_required(source)
files = iter_upload_files(source, ignore_dirs={".git", ".venv", "venv", "env", "__pycache__", ".ipynb_checkpoints", ".cache", "hf_cache", "outputs", "wandb", "data", "datasets"})
total_bytes = sum((source / f).stat().st_size for f in files)
print(f"HF repo: {repo_id}")
print(f"Source: {source}")
print(f"Files: {len(files)} files, {human_size(total_bytes)}")
print("Token: " + ("found" if token else "missing"))
if args.dry_run:
print("\nDry run file list:")
for rel in files:
print(f" {rel}")
print("\nNo upload performed.")
return
if not token:
raise RuntimeError(
"No Hugging Face token found. In RunPod environment variables, set `hf_key=hf_xxx`, "
"or set standard `HF_TOKEN=hf_xxx`."
)
try:
from huggingface_hub import HfApi
except ImportError as e:
raise ImportError("Install huggingface_hub first: pip install huggingface_hub") from e
api = HfApi(token=token)
if not args.no_create:
api.create_repo(repo_id=repo_id, repo_type="model", private=args.private, exist_ok=True)
api.upload_folder(
folder_path=str(source),
repo_id=repo_id,
repo_type="model",
revision=args.revision,
commit_message=args.commit_message,
ignore_patterns=IGNORE_PATTERNS,
)
print(f"\nUpload complete: https://huggingface.co/{repo_id}")
if __name__ == "__main__":
main()