Spaces:
Sleeping
Sleeping
fix: preserve file permissions across HF dataset sync
Browse filesHF datasets strip Unix permissions, causing binaries (cursor-server,
etc) to lose execute bits after restore.
- Save exec permission manifest before upload (find -perm /111)
- Restore +x from manifest after download
- Exclude .cursor-server/.vscode-server from sync (huge, reinstallable,
caused persistent 504 timeouts on home/ uploads)
- Clean up old IDE server data from /data/
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
- entrypoint.py +41 -4
entrypoint.py
CHANGED
|
@@ -35,7 +35,10 @@ PIP_FILE = os.path.join(PERSIST_PATH, "pip-packages.txt")
|
|
| 35 |
SYNC_DIRS = ["/home", "/root", "/usr/local", "/opt", "/var/lib", "/var/log", "/etc"]
|
| 36 |
|
| 37 |
# Exclude from rsync (relative paths - rsync matches from transfer root)
|
| 38 |
-
RSYNC_EXCLUDES = [
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
# Extra excludes only for /etc/ restore (container-managed files)
|
| 41 |
ETC_RESTORE_EXCLUDES = [
|
|
@@ -49,7 +52,9 @@ UPLOAD_IGNORE = [
|
|
| 49 |
"__pycache__", "*.pyc", ".git", ".git*",
|
| 50 |
"*.sock", "*.lock", ".huggingface", ".cache",
|
| 51 |
"huggingrun.env", # contains HF_TOKEN
|
|
|
|
| 52 |
]
|
|
|
|
| 53 |
|
| 54 |
|
| 55 |
# -- Logging -----------------------------------------------------------
|
|
@@ -190,6 +195,21 @@ def restore():
|
|
| 190 |
else:
|
| 191 |
log(f" some pip packages failed")
|
| 192 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
# Fix SSH key permissions (HF dataset doesn't preserve permissions)
|
| 194 |
sh("chmod 600 /etc/ssh/ssh_host_*_key 2>/dev/null")
|
| 195 |
sh("chmod 644 /etc/ssh/ssh_host_*_key.pub 2>/dev/null")
|
|
@@ -250,10 +270,26 @@ def save_and_upload():
|
|
| 250 |
cmd = f"rsync -rlptD --delete {excludes} '{d}/' '{dst}/'"
|
| 251 |
sh(cmd)
|
| 252 |
|
| 253 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 254 |
for dirpath, dirnames, _ in os.walk(PERSIST_PATH):
|
| 255 |
for dn in list(dirnames):
|
| 256 |
-
if dn
|
| 257 |
shutil.rmtree(os.path.join(dirpath, dn), ignore_errors=True)
|
| 258 |
dirnames.remove(dn)
|
| 259 |
|
|
@@ -264,7 +300,8 @@ def save_and_upload():
|
|
| 264 |
fail = 0
|
| 265 |
|
| 266 |
# Upload package list files + log
|
| 267 |
-
for pf, name in [(PKG_FILE, "user-packages.list"), (PIP_FILE, "pip-packages.txt"),
|
|
|
|
| 268 |
if os.path.exists(pf):
|
| 269 |
try:
|
| 270 |
api.upload_file(
|
|
|
|
| 35 |
SYNC_DIRS = ["/home", "/root", "/usr/local", "/opt", "/var/lib", "/var/log", "/etc"]
|
| 36 |
|
| 37 |
# Exclude from rsync (relative paths - rsync matches from transfer root)
|
| 38 |
+
RSYNC_EXCLUDES = [
|
| 39 |
+
"*.sock", "*.pid", "*.lock", "huggingrun.log",
|
| 40 |
+
".cursor-server", ".vscode-server", # IDE servers: huge, reinstallable
|
| 41 |
+
]
|
| 42 |
|
| 43 |
# Extra excludes only for /etc/ restore (container-managed files)
|
| 44 |
ETC_RESTORE_EXCLUDES = [
|
|
|
|
| 52 |
"__pycache__", "*.pyc", ".git", ".git*",
|
| 53 |
"*.sock", "*.lock", ".huggingface", ".cache",
|
| 54 |
"huggingrun.env", # contains HF_TOKEN
|
| 55 |
+
".cursor-server", ".vscode-server",
|
| 56 |
]
|
| 57 |
+
EXEC_MANIFEST = os.path.join(PERSIST_PATH, "exec-manifest.txt")
|
| 58 |
|
| 59 |
|
| 60 |
# -- Logging -----------------------------------------------------------
|
|
|
|
| 195 |
else:
|
| 196 |
log(f" some pip packages failed")
|
| 197 |
|
| 198 |
+
# Restore execute permissions from manifest (HF dataset strips them)
|
| 199 |
+
manifest = os.path.join(PERSIST_PATH, "exec-manifest.txt")
|
| 200 |
+
if os.path.exists(manifest):
|
| 201 |
+
count = 0
|
| 202 |
+
with open(manifest) as f:
|
| 203 |
+
for line in f:
|
| 204 |
+
path = line.strip()
|
| 205 |
+
if path and os.path.exists(path):
|
| 206 |
+
try:
|
| 207 |
+
os.chmod(path, os.stat(path).st_mode | 0o111)
|
| 208 |
+
count += 1
|
| 209 |
+
except Exception:
|
| 210 |
+
pass
|
| 211 |
+
log(f" restored +x on {count} files")
|
| 212 |
+
|
| 213 |
# Fix SSH key permissions (HF dataset doesn't preserve permissions)
|
| 214 |
sh("chmod 600 /etc/ssh/ssh_host_*_key 2>/dev/null")
|
| 215 |
sh("chmod 644 /etc/ssh/ssh_host_*_key.pub 2>/dev/null")
|
|
|
|
| 270 |
cmd = f"rsync -rlptD --delete {excludes} '{d}/' '{dst}/'"
|
| 271 |
sh(cmd)
|
| 272 |
|
| 273 |
+
# Save executable file manifest (HF dataset strips permissions)
|
| 274 |
+
exec_files = []
|
| 275 |
+
for d in SYNC_DIRS:
|
| 276 |
+
dst = os.path.join(PERSIST_PATH, d.lstrip("/"))
|
| 277 |
+
if os.path.isdir(dst):
|
| 278 |
+
rc, out = sh(f"find '{dst}' -type f -perm /111 2>/dev/null")
|
| 279 |
+
if rc == 0 and out:
|
| 280 |
+
for p in out.strip().split('\n'):
|
| 281 |
+
p = p.strip()
|
| 282 |
+
if p.startswith(PERSIST_PATH):
|
| 283 |
+
exec_files.append('/' + p[len(PERSIST_PATH):].lstrip('/'))
|
| 284 |
+
if exec_files:
|
| 285 |
+
with open(EXEC_MANIFEST, "w") as f:
|
| 286 |
+
f.write('\n'.join(exec_files) + '\n')
|
| 287 |
+
log(f" saved {len(exec_files)} executable paths")
|
| 288 |
+
|
| 289 |
+
# Clean dirs that should not be uploaded
|
| 290 |
for dirpath, dirnames, _ in os.walk(PERSIST_PATH):
|
| 291 |
for dn in list(dirnames):
|
| 292 |
+
if dn in (".cache", ".cursor-server", ".vscode-server"):
|
| 293 |
shutil.rmtree(os.path.join(dirpath, dn), ignore_errors=True)
|
| 294 |
dirnames.remove(dn)
|
| 295 |
|
|
|
|
| 300 |
fail = 0
|
| 301 |
|
| 302 |
# Upload package list files + log
|
| 303 |
+
for pf, name in [(PKG_FILE, "user-packages.list"), (PIP_FILE, "pip-packages.txt"),
|
| 304 |
+
(EXEC_MANIFEST, "exec-manifest.txt"), (LOGFILE, "huggingrun.log")]:
|
| 305 |
if os.path.exists(pf):
|
| 306 |
try:
|
| 307 |
api.upload_file(
|