File size: 5,085 Bytes
1f30844 1948c68 1f30844 1948c68 1f30844 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 | #!/usr/bin/env python3
"""Incremental HF Space sync — upload only changed files.
Avoids the 50k+ file scan that breaks `hf upload . .` on large repos.
"""
import argparse
import subprocess
import sys
from pathlib import Path
REPO_ID = "crexs/phi-drift"
REPO_TYPE = "space"
# Paths that should never be uploaded to HF
SKIP_PATHS = {
"venv",
".venv",
"__pycache__",
".git",
".pytest_cache",
".idea",
".obsidian",
"ABLATION_RESULTS",
"BLKKNIGHT_RECOVERY",
"LIVE_ABLATION_RESULTS",
".mouse_vanguard",
".agents",
"outreach",
"chroma_db",
"voices",
"data",
"logs",
".cache",
"scratch",
}
# Exact filenames or suffixes to block
SKIP_FILES = {
"being.db",
"svalbard_ledger.jsonl",
}
SKIP_SUFFIXES = {
".pyc",
}
SKIP_PREFIXES = {
".env",
}
def _should_skip(path: str) -> bool:
parts = Path(path).parts
if any(p in SKIP_PATHS for p in parts):
return True
name = Path(path).name
if name in SKIP_FILES:
return True
if any(name.endswith(suffix) for suffix in SKIP_SUFFIXES):
return True
if any(name.startswith(prefix) for prefix in SKIP_PREFIXES):
return True
return False
def _run(cmd: list[str], check: bool = True) -> str:
result = subprocess.run(cmd, capture_output=True, text=True, check=check)
return result.stdout.strip()
def get_changed_files(base: str = "origin/master") -> tuple[list[str], list[str]]:
"""Return (upload_files, delete_files) relative to base."""
status = _run(["git", "diff", "--name-status", base])
upload_files: list[str] = []
delete_files: list[str] = []
for line in status.splitlines():
if not line.strip():
continue
parts = line.split("\t")
code = parts[0]
if code.startswith("R"): # Renamed
upload_files.append(parts[2])
delete_files.append(parts[1])
elif code == "D":
delete_files.append(parts[1])
elif code in ("M", "A"):
upload_files.append(parts[1])
else:
# Unknown status — treat as upload to be safe
upload_files.append(parts[1])
return upload_files, delete_files
def upload_file(path: str, dry_run: bool) -> None:
if _should_skip(path):
print(f" SKIP (blocklist): {path}")
return
local = Path(path)
if not local.exists():
print(f" SKIP (missing): {path}")
return
if dry_run:
print(f" UPLOAD (dry-run): {path}")
return
cmd = [
"hf", "upload",
REPO_ID,
str(local),
path,
"--repo-type", REPO_TYPE,
"--commit-message", f"sync: update {path}",
"--quiet",
]
print(f" UPLOAD: {path}")
_run(cmd)
def delete_file(path: str, dry_run: bool) -> None:
if _should_skip(path):
print(f" SKIP DELETE (blocklist): {path}")
return
if dry_run:
print(f" DELETE (dry-run): {path}")
return
# huggingface_hub API deletion requires Python; fallback to git push
print(f" DELETE: {path}")
from huggingface_hub import HfApi, CommitOperationDelete
api = HfApi()
api.create_commit(
repo_id=REPO_ID,
repo_type=REPO_TYPE,
operations=[CommitOperationDelete(path_in_repo=path)],
commit_message=f"sync: delete {path}",
)
def main() -> int:
parser = argparse.ArgumentParser(description="Sync changed files to HF Space")
parser.add_argument(
"--base",
default="origin/master",
help="Git ref to diff against (default: origin/master)",
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Print what would be uploaded/deleted without doing it",
)
parser.add_argument(
"--all",
action="store_true",
help="Upload all tracked files (full reset)",
)
args = parser.parse_args()
if args.all:
files = _run(["git", "ls-files"]).splitlines()
upload_files = [f for f in files if not _should_skip(f)]
delete_files = []
else:
upload_files, delete_files = get_changed_files(args.base)
if not upload_files and not delete_files:
print("No changes to sync.")
return 0
print(f"Changes against {args.base}:")
print(f" Upload: {len(upload_files)} file(s)")
print(f" Delete: {len(delete_files)} file(s)")
if args.dry_run:
print("\nDry-run mode — no changes will be made.")
for path in upload_files:
try:
upload_file(path, args.dry_run)
except subprocess.CalledProcessError as exc:
print(f" ERROR uploading {path}: {exc.stderr}", file=sys.stderr)
return 1
for path in delete_files:
try:
delete_file(path, args.dry_run)
except Exception as exc:
print(f" ERROR deleting {path}: {exc}", file=sys.stderr)
return 1
print("\nSync complete.")
return 0
if __name__ == "__main__":
raise SystemExit(main())
|