| |
| from __future__ import annotations |
|
|
| import argparse |
| import os |
| import sys |
| from pathlib import Path |
| from typing import Iterable, List |
|
|
| |
| ROOT = Path(__file__).resolve().parents[1] |
| if str(ROOT) not in sys.path: |
| sys.path.insert(0, str(ROOT)) |
|
|
| from src.protomorph.hf_utils import get_hf_repo_id, get_hf_token, normalize_repo_id |
|
|
| DEFAULT_REPO = "shiowo/DINO-Protomorph" |
| REQUIRED_FILES = [ |
| "README.md", |
| "checkpoints/config.json", |
| "checkpoints/labels.txt", |
| "checkpoints/protomorph_head.safetensors", |
| "src/protomorph/model.py", |
| "src/protomorph/config.py", |
| "infer.py", |
| ] |
| IGNORE_PATTERNS = [ |
| ".git/*", |
| ".venv/*", |
| "venv/*", |
| "env/*", |
| "__pycache__/*", |
| "**/__pycache__/*", |
| "*.pyc", |
| ".ipynb_checkpoints/*", |
| "**/.ipynb_checkpoints/*", |
| ".cache/*", |
| "hf_cache/*", |
| "outputs/*", |
| "wandb/*", |
| "data/*", |
| "datasets/*", |
| "*.zip", |
| "*.tar", |
| "*.tar.gz", |
| "*.7z", |
| ] |
|
|
|
|
| def human_size(n: int) -> str: |
| units = ["B", "KB", "MB", "GB", "TB"] |
| size = float(n) |
| for unit in units: |
| if size < 1024 or unit == units[-1]: |
| return f"{size:.1f} {unit}" if unit != "B" else f"{int(size)} B" |
| size /= 1024 |
| return f"{n} B" |
|
|
|
|
| def iter_upload_files(source: Path, ignore_dirs: Iterable[str]) -> List[Path]: |
| ignore_dir_names = set(ignore_dirs) |
| files: List[Path] = [] |
| for path in source.rglob("*"): |
| if path.is_dir(): |
| continue |
| rel = path.relative_to(source) |
| parts = set(rel.parts) |
| if parts & ignore_dir_names: |
| continue |
| if path.suffix in {".pyc", ".zip", ".7z"}: |
| continue |
| files.append(rel) |
| return sorted(files) |
|
|
|
|
| def check_required(source: Path) -> None: |
| missing = [rel for rel in REQUIRED_FILES if not (source / rel).exists()] |
| if missing: |
| joined = "\n - ".join(missing) |
| raise FileNotFoundError(f"Missing required files for HF upload:\n - {joined}") |
|
|
|
|
| def parse_args() -> argparse.Namespace: |
| p = argparse.ArgumentParser(description="Upload ProtoMorph-DINO files to a Hugging Face model repo.") |
| p.add_argument("--source", default=".", help="Folder to upload. Default: current project root.") |
| p.add_argument("--repo-id", default=None, help="HF repo id or URL. Default: env hf_repo/HF_REPO_ID, then shiowo/DINO-Protomorph.") |
| p.add_argument("--token", default=None, help="HF token. Default: env hf_key/HF_TOKEN/etc. Do not paste this into logs.") |
| p.add_argument("--revision", default="main", help="Target branch/revision. Default: main.") |
| p.add_argument("--private", action="store_true", help="Create repo as private if it does not exist yet.") |
| p.add_argument("--no-create", action="store_true", help="Do not create the repo if missing.") |
| p.add_argument("--dry-run", action="store_true", help="Print what would be uploaded, then exit.") |
| p.add_argument("--commit-message", default="Upload ProtoMorph-DINO scaffold and checkpoint", help="HF commit message.") |
| return p.parse_args() |
|
|
|
|
| def main() -> None: |
| args = parse_args() |
| source = Path(args.source).resolve() |
| if not source.exists() or not source.is_dir(): |
| raise NotADirectoryError(f"Source folder does not exist: {source}") |
|
|
| repo_id = normalize_repo_id(args.repo_id) if args.repo_id else get_hf_repo_id(DEFAULT_REPO) |
| token = args.token or get_hf_token() |
|
|
| check_required(source) |
|
|
| files = iter_upload_files(source, ignore_dirs={".git", ".venv", "venv", "env", "__pycache__", ".ipynb_checkpoints", ".cache", "hf_cache", "outputs", "wandb", "data", "datasets"}) |
| total_bytes = sum((source / f).stat().st_size for f in files) |
|
|
| print(f"HF repo: {repo_id}") |
| print(f"Source: {source}") |
| print(f"Files: {len(files)} files, {human_size(total_bytes)}") |
| print("Token: " + ("found" if token else "missing")) |
|
|
| if args.dry_run: |
| print("\nDry run file list:") |
| for rel in files: |
| print(f" {rel}") |
| print("\nNo upload performed.") |
| return |
|
|
| if not token: |
| raise RuntimeError( |
| "No Hugging Face token found. In RunPod environment variables, set `hf_key=hf_xxx`, " |
| "or set standard `HF_TOKEN=hf_xxx`." |
| ) |
|
|
| try: |
| from huggingface_hub import HfApi |
| except ImportError as e: |
| raise ImportError("Install huggingface_hub first: pip install huggingface_hub") from e |
|
|
| api = HfApi(token=token) |
| if not args.no_create: |
| api.create_repo(repo_id=repo_id, repo_type="model", private=args.private, exist_ok=True) |
|
|
| api.upload_folder( |
| folder_path=str(source), |
| repo_id=repo_id, |
| repo_type="model", |
| revision=args.revision, |
| commit_message=args.commit_message, |
| ignore_patterns=IGNORE_PATTERNS, |
| ) |
| print(f"\nUpload complete: https://huggingface.co/{repo_id}") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|