File size: 4,987 Bytes
63089c1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#!/usr/bin/env python3
from __future__ import annotations

import argparse
import os
import sys
from pathlib import Path
from typing import Iterable, List

# Allow running from the repo root without installing the package.
ROOT = Path(__file__).resolve().parents[1]
if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))

from src.protomorph.hf_utils import get_hf_repo_id, get_hf_token, normalize_repo_id

DEFAULT_REPO = "shiowo/DINO-Protomorph"
REQUIRED_FILES = [
    "README.md",
    "checkpoints/config.json",
    "checkpoints/labels.txt",
    "checkpoints/protomorph_head.safetensors",
    "src/protomorph/model.py",
    "src/protomorph/config.py",
    "infer.py",
]
IGNORE_PATTERNS = [
    ".git/*",
    ".venv/*",
    "venv/*",
    "env/*",
    "__pycache__/*",
    "**/__pycache__/*",
    "*.pyc",
    ".ipynb_checkpoints/*",
    "**/.ipynb_checkpoints/*",
    ".cache/*",
    "hf_cache/*",
    "outputs/*",
    "wandb/*",
    "data/*",
    "datasets/*",
    "*.zip",
    "*.tar",
    "*.tar.gz",
    "*.7z",
]


def human_size(n: int) -> str:
    units = ["B", "KB", "MB", "GB", "TB"]
    size = float(n)
    for unit in units:
        if size < 1024 or unit == units[-1]:
            return f"{size:.1f} {unit}" if unit != "B" else f"{int(size)} B"
        size /= 1024
    return f"{n} B"


def iter_upload_files(source: Path, ignore_dirs: Iterable[str]) -> List[Path]:
    ignore_dir_names = set(ignore_dirs)
    files: List[Path] = []
    for path in source.rglob("*"):
        if path.is_dir():
            continue
        rel = path.relative_to(source)
        parts = set(rel.parts)
        if parts & ignore_dir_names:
            continue
        if path.suffix in {".pyc", ".zip", ".7z"}:
            continue
        files.append(rel)
    return sorted(files)


def check_required(source: Path) -> None:
    missing = [rel for rel in REQUIRED_FILES if not (source / rel).exists()]
    if missing:
        joined = "\n  - ".join(missing)
        raise FileNotFoundError(f"Missing required files for HF upload:\n  - {joined}")


def parse_args() -> argparse.Namespace:
    p = argparse.ArgumentParser(description="Upload ProtoMorph-DINO files to a Hugging Face model repo.")
    p.add_argument("--source", default=".", help="Folder to upload. Default: current project root.")
    p.add_argument("--repo-id", default=None, help="HF repo id or URL. Default: env hf_repo/HF_REPO_ID, then shiowo/DINO-Protomorph.")
    p.add_argument("--token", default=None, help="HF token. Default: env hf_key/HF_TOKEN/etc. Do not paste this into logs.")
    p.add_argument("--revision", default="main", help="Target branch/revision. Default: main.")
    p.add_argument("--private", action="store_true", help="Create repo as private if it does not exist yet.")
    p.add_argument("--no-create", action="store_true", help="Do not create the repo if missing.")
    p.add_argument("--dry-run", action="store_true", help="Print what would be uploaded, then exit.")
    p.add_argument("--commit-message", default="Upload ProtoMorph-DINO scaffold and checkpoint", help="HF commit message.")
    return p.parse_args()


def main() -> None:
    args = parse_args()
    source = Path(args.source).resolve()
    if not source.exists() or not source.is_dir():
        raise NotADirectoryError(f"Source folder does not exist: {source}")

    repo_id = normalize_repo_id(args.repo_id) if args.repo_id else get_hf_repo_id(DEFAULT_REPO)
    token = args.token or get_hf_token()

    check_required(source)

    files = iter_upload_files(source, ignore_dirs={".git", ".venv", "venv", "env", "__pycache__", ".ipynb_checkpoints", ".cache", "hf_cache", "outputs", "wandb", "data", "datasets"})
    total_bytes = sum((source / f).stat().st_size for f in files)

    print(f"HF repo: {repo_id}")
    print(f"Source:  {source}")
    print(f"Files:   {len(files)} files, {human_size(total_bytes)}")
    print("Token:   " + ("found" if token else "missing"))

    if args.dry_run:
        print("\nDry run file list:")
        for rel in files:
            print(f"  {rel}")
        print("\nNo upload performed.")
        return

    if not token:
        raise RuntimeError(
            "No Hugging Face token found. In RunPod environment variables, set `hf_key=hf_xxx`, "
            "or set standard `HF_TOKEN=hf_xxx`."
        )

    try:
        from huggingface_hub import HfApi
    except ImportError as e:
        raise ImportError("Install huggingface_hub first: pip install huggingface_hub") from e

    api = HfApi(token=token)
    if not args.no_create:
        api.create_repo(repo_id=repo_id, repo_type="model", private=args.private, exist_ok=True)

    api.upload_folder(
        folder_path=str(source),
        repo_id=repo_id,
        repo_type="model",
        revision=args.revision,
        commit_message=args.commit_message,
        ignore_patterns=IGNORE_PATTERNS,
    )
    print(f"\nUpload complete: https://huggingface.co/{repo_id}")


if __name__ == "__main__":
    main()