tao-shen Claude Opus 4.6 commited on
Commit
0cdf7b0
Β·
1 Parent(s): 03209c8

feat: sync /etc with proper excludes + pip package persistence

Browse files

- Restore /etc/ sync with relative-path excludes for container-managed
files (hostname, hosts, resolv.conf, alternatives, ld.so.cache)
- Add pip freeze/install cycle for pip package persistence
- Upload pip-packages.txt alongside user-packages.list

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (1) hide show
  1. entrypoint.py +45 -15
entrypoint.py CHANGED
@@ -3,7 +3,7 @@
3
  HuggingRun v2 β€” Ubuntu Server on HuggingFace Spaces.
4
 
5
  Persistence via HF Dataset (direct file sync, no archives):
6
- - Sync scope: /home, /root, /usr/local, /opt, /var/lib, /etc/ssh
7
  - System packages: saved as package name list, restored via apt install
8
  - Symlinks/permissions safe: no system binary dirs synced
9
  """
@@ -29,14 +29,21 @@ TTYD_PORT = os.environ.get("TTYD_PORT", "7681")
29
  LOGFILE = "/var/log/huggingrun.log"
30
  BASE_PKG_FILE = "/etc/base-packages.list"
31
  PKG_FILE = os.path.join(PERSIST_PATH, "user-packages.list")
 
32
 
33
  # Directories to sync (user data only, no system binaries)
34
- # NOTE: /etc/ssh only (for host keys), NOT all of /etc/ (breaks sshd/PAM)
35
- SYNC_DIRS = ["/home", "/root", "/usr/local", "/opt", "/var/lib", "/etc/ssh"]
36
 
37
- # Exclude from rsync
38
  RSYNC_EXCLUDES = ["*.sock", "*.pid", "*.lock"]
39
 
 
 
 
 
 
 
 
40
  # Exclude from HF upload
41
  UPLOAD_IGNORE = [
42
  "__pycache__", "*.pyc", ".git", ".git*",
@@ -136,11 +143,15 @@ def restore():
136
 
137
  # Rsync each directory back (no --delete, don't remove existing files)
138
  log("── RESTORE: rsync /data/ β†’ /")
139
- excludes = " ".join(f"--exclude='{e}'" for e in RSYNC_EXCLUDES)
 
140
  for d in SYNC_DIRS:
141
  src = os.path.join(PERSIST_PATH, d.lstrip("/"))
142
  if not os.path.isdir(src):
143
  continue
 
 
 
144
  cmd = f"rsync -rlptD {excludes} '{src}/' '{d}/'"
145
  rc, _ = sh(cmd, log_err=True)
146
  if rc == 0:
@@ -167,6 +178,18 @@ def restore():
167
  else:
168
  log("── RESTORE: no package list")
169
 
 
 
 
 
 
 
 
 
 
 
 
 
170
  # Fix SSH key permissions (HF dataset doesn't preserve permissions)
171
  sh("chmod 600 /etc/ssh/ssh_host_*_key 2>/dev/null")
172
  sh("chmod 644 /etc/ssh/ssh_host_*_key.pub 2>/dev/null")
@@ -211,6 +234,12 @@ def save_and_upload():
211
  with open(PKG_FILE, "w") as f:
212
  f.write(out + "\n")
213
 
 
 
 
 
 
 
214
  # Rsync each sync dir β†’ /data/
215
  excludes = " ".join(f"--exclude='{e}'" for e in RSYNC_EXCLUDES)
216
  for d in SYNC_DIRS:
@@ -234,16 +263,17 @@ def save_and_upload():
234
  ok = 0
235
  fail = 0
236
 
237
- # Upload package list file
238
- if os.path.exists(PKG_FILE):
239
- try:
240
- api.upload_file(
241
- path_or_fileobj=PKG_FILE, path_in_repo="user-packages.list",
242
- repo_id=HF_DATASET_REPO, repo_type="dataset",
243
- commit_message=f"sync {ts}: packages",
244
- )
245
- except Exception:
246
- pass
 
247
 
248
  # Upload each sync dir
249
  for d in SYNC_DIRS:
 
3
  HuggingRun v2 β€” Ubuntu Server on HuggingFace Spaces.
4
 
5
  Persistence via HF Dataset (direct file sync, no archives):
6
+ - Sync scope: /home, /root, /usr/local, /opt, /var/lib, /etc
7
  - System packages: saved as package name list, restored via apt install
8
  - Symlinks/permissions safe: no system binary dirs synced
9
  """
 
29
  LOGFILE = "/var/log/huggingrun.log"
30
  BASE_PKG_FILE = "/etc/base-packages.list"
31
  PKG_FILE = os.path.join(PERSIST_PATH, "user-packages.list")
32
+ PIP_FILE = os.path.join(PERSIST_PATH, "pip-packages.txt")
33
 
34
  # Directories to sync (user data only, no system binaries)
35
+ SYNC_DIRS = ["/home", "/root", "/usr/local", "/opt", "/var/lib", "/etc"]
 
36
 
37
+ # Exclude from rsync (relative paths β€” rsync matches from transfer root)
38
  RSYNC_EXCLUDES = ["*.sock", "*.pid", "*.lock"]
39
 
40
+ # Extra excludes only for /etc/ restore (container-managed files)
41
+ ETC_RESTORE_EXCLUDES = [
42
+ "hostname", "hosts", "resolv.conf", "mtab", "fstab",
43
+ "alternatives", # symlinks managed by dpkg
44
+ "ld.so.cache", # rebuilt by ldconfig
45
+ ]
46
+
47
  # Exclude from HF upload
48
  UPLOAD_IGNORE = [
49
  "__pycache__", "*.pyc", ".git", ".git*",
 
143
 
144
  # Rsync each directory back (no --delete, don't remove existing files)
145
  log("── RESTORE: rsync /data/ β†’ /")
146
+ base_excludes = " ".join(f"--exclude='{e}'" for e in RSYNC_EXCLUDES)
147
+ etc_excludes = " ".join(f"--exclude='{e}'" for e in ETC_RESTORE_EXCLUDES)
148
  for d in SYNC_DIRS:
149
  src = os.path.join(PERSIST_PATH, d.lstrip("/"))
150
  if not os.path.isdir(src):
151
  continue
152
+ excludes = base_excludes
153
+ if d == "/etc":
154
+ excludes = f"{base_excludes} {etc_excludes}"
155
  cmd = f"rsync -rlptD {excludes} '{src}/' '{d}/'"
156
  rc, _ = sh(cmd, log_err=True)
157
  if rc == 0:
 
178
  else:
179
  log("── RESTORE: no package list")
180
 
181
+ # Reinstall pip packages
182
+ if os.path.exists(PIP_FILE):
183
+ with open(PIP_FILE) as f:
184
+ pip_pkgs = [l.strip() for l in f if l.strip() and not l.startswith('#')]
185
+ if pip_pkgs:
186
+ log(f"── RESTORE: pip install {len(pip_pkgs)} packages")
187
+ rc, _ = sh(f"pip install --break-system-packages -q -r '{PIP_FILE}'")
188
+ if rc == 0:
189
+ log(f" pip packages restored")
190
+ else:
191
+ log(f" some pip packages failed")
192
+
193
  # Fix SSH key permissions (HF dataset doesn't preserve permissions)
194
  sh("chmod 600 /etc/ssh/ssh_host_*_key 2>/dev/null")
195
  sh("chmod 644 /etc/ssh/ssh_host_*_key.pub 2>/dev/null")
 
234
  with open(PKG_FILE, "w") as f:
235
  f.write(out + "\n")
236
 
237
+ # Save pip package list (user-installed only, exclude base)
238
+ rc, out = sh("pip freeze --exclude-editable 2>/dev/null")
239
+ if rc == 0 and out:
240
+ with open(PIP_FILE, "w") as f:
241
+ f.write(out + "\n")
242
+
243
  # Rsync each sync dir β†’ /data/
244
  excludes = " ".join(f"--exclude='{e}'" for e in RSYNC_EXCLUDES)
245
  for d in SYNC_DIRS:
 
263
  ok = 0
264
  fail = 0
265
 
266
+ # Upload package list files
267
+ for pf, name in [(PKG_FILE, "user-packages.list"), (PIP_FILE, "pip-packages.txt")]:
268
+ if os.path.exists(pf):
269
+ try:
270
+ api.upload_file(
271
+ path_or_fileobj=pf, path_in_repo=name,
272
+ repo_id=HF_DATASET_REPO, repo_type="dataset",
273
+ commit_message=f"sync {ts}: {name}",
274
+ )
275
+ except Exception:
276
+ pass
277
 
278
  # Upload each sync dir
279
  for d in SYNC_DIRS: