#!/usr/bin/env python3 from __future__ import annotations import argparse import os import sys from pathlib import Path from typing import Iterable, List # Allow running from the repo root without installing the package. ROOT = Path(__file__).resolve().parents[1] if str(ROOT) not in sys.path: sys.path.insert(0, str(ROOT)) from src.protomorph.hf_utils import get_hf_repo_id, get_hf_token, normalize_repo_id DEFAULT_REPO = "shiowo/DINO-Protomorph" REQUIRED_FILES = [ "README.md", "checkpoints/config.json", "checkpoints/labels.txt", "checkpoints/protomorph_head.safetensors", "src/protomorph/model.py", "src/protomorph/config.py", "infer.py", ] IGNORE_PATTERNS = [ ".git/*", ".venv/*", "venv/*", "env/*", "__pycache__/*", "**/__pycache__/*", "*.pyc", ".ipynb_checkpoints/*", "**/.ipynb_checkpoints/*", ".cache/*", "hf_cache/*", "outputs/*", "wandb/*", "data/*", "datasets/*", "*.zip", "*.tar", "*.tar.gz", "*.7z", ] def human_size(n: int) -> str: units = ["B", "KB", "MB", "GB", "TB"] size = float(n) for unit in units: if size < 1024 or unit == units[-1]: return f"{size:.1f} {unit}" if unit != "B" else f"{int(size)} B" size /= 1024 return f"{n} B" def iter_upload_files(source: Path, ignore_dirs: Iterable[str]) -> List[Path]: ignore_dir_names = set(ignore_dirs) files: List[Path] = [] for path in source.rglob("*"): if path.is_dir(): continue rel = path.relative_to(source) parts = set(rel.parts) if parts & ignore_dir_names: continue if path.suffix in {".pyc", ".zip", ".7z"}: continue files.append(rel) return sorted(files) def check_required(source: Path) -> None: missing = [rel for rel in REQUIRED_FILES if not (source / rel).exists()] if missing: joined = "\n - ".join(missing) raise FileNotFoundError(f"Missing required files for HF upload:\n - {joined}") def parse_args() -> argparse.Namespace: p = argparse.ArgumentParser(description="Upload ProtoMorph-DINO files to a Hugging Face model repo.") p.add_argument("--source", default=".", help="Folder to upload. Default: current project root.") p.add_argument("--repo-id", default=None, help="HF repo id or URL. Default: env hf_repo/HF_REPO_ID, then shiowo/DINO-Protomorph.") p.add_argument("--token", default=None, help="HF token. Default: env hf_key/HF_TOKEN/etc. Do not paste this into logs.") p.add_argument("--revision", default="main", help="Target branch/revision. Default: main.") p.add_argument("--private", action="store_true", help="Create repo as private if it does not exist yet.") p.add_argument("--no-create", action="store_true", help="Do not create the repo if missing.") p.add_argument("--dry-run", action="store_true", help="Print what would be uploaded, then exit.") p.add_argument("--commit-message", default="Upload ProtoMorph-DINO scaffold and checkpoint", help="HF commit message.") return p.parse_args() def main() -> None: args = parse_args() source = Path(args.source).resolve() if not source.exists() or not source.is_dir(): raise NotADirectoryError(f"Source folder does not exist: {source}") repo_id = normalize_repo_id(args.repo_id) if args.repo_id else get_hf_repo_id(DEFAULT_REPO) token = args.token or get_hf_token() check_required(source) files = iter_upload_files(source, ignore_dirs={".git", ".venv", "venv", "env", "__pycache__", ".ipynb_checkpoints", ".cache", "hf_cache", "outputs", "wandb", "data", "datasets"}) total_bytes = sum((source / f).stat().st_size for f in files) print(f"HF repo: {repo_id}") print(f"Source: {source}") print(f"Files: {len(files)} files, {human_size(total_bytes)}") print("Token: " + ("found" if token else "missing")) if args.dry_run: print("\nDry run file list:") for rel in files: print(f" {rel}") print("\nNo upload performed.") return if not token: raise RuntimeError( "No Hugging Face token found. In RunPod environment variables, set `hf_key=hf_xxx`, " "or set standard `HF_TOKEN=hf_xxx`." ) try: from huggingface_hub import HfApi except ImportError as e: raise ImportError("Install huggingface_hub first: pip install huggingface_hub") from e api = HfApi(token=token) if not args.no_create: api.create_repo(repo_id=repo_id, repo_type="model", private=args.private, exist_ok=True) api.upload_folder( folder_path=str(source), repo_id=repo_id, repo_type="model", revision=args.revision, commit_message=args.commit_message, ignore_patterns=IGNORE_PATTERNS, ) print(f"\nUpload complete: https://huggingface.co/{repo_id}") if __name__ == "__main__": main()