tao-shen Claude Opus 4.6 commited on
Commit
acafb8d
Β·
1 Parent(s): d09e48b

feat: full filesystem persistence (not just /home + /root)

Browse files

Persist all user-modifiable dirs: /home, /root, /etc, /opt, /var,
/usr/local β€” with tar -p preserving all permissions. Exclude
Docker-managed files (hostname, hosts, resolv.conf), transient
runtime (var/run, var/lock, *.sock, *.pid), and caches.

Dataset = disk. Container state is fully mirrored.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (1) hide show
  1. entrypoint.py +49 -14
entrypoint.py CHANGED
@@ -4,12 +4,10 @@ HuggingRun v2 β€” Single entrypoint for Ubuntu Server on HuggingFace Spaces.
4
 
5
  Replaces: entrypoint.sh, start-server.sh, git_sync_daemon.py, log_streamer.py
6
 
7
- Lifecycle:
8
- 1. Resolve config (HF_TOKEN, dataset repo)
9
- 2. Download state.tar.zst from HF Dataset β†’ extract /home, /root
10
- 3. Reinstall user-added packages
11
- 4. Start services (sshd, ttyd, ws-bridge, log streamer, sync thread)
12
- 5. exec nginx (becomes PID 1, opens port 7860)
13
  """
14
 
15
  import http.server
@@ -34,6 +32,38 @@ STATE_FILE = os.path.join(PERSIST_PATH, "state.tar.zst")
34
  PKG_FILE = os.path.join(PERSIST_PATH, "user-packages.list")
35
  BASE_PKG_FILE = "/etc/base-packages.list"
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
  def log(msg):
39
  ts = time.strftime("%H:%M:%S", time.gmtime())
@@ -133,12 +163,18 @@ def restore_state():
133
  log(f" no state to restore: {e}")
134
  return
135
 
136
- log("── RESTORE: extracting state.tar.zst")
137
  t0 = time.time()
138
  rc, out = run(f"tar --zstd -xpf {STATE_FILE} -C /")
139
  elapsed = time.time() - t0
140
  if rc == 0:
 
 
141
  log(f" extracted ({elapsed:.1f}s)")
 
 
 
 
142
  else:
143
  log(f" extraction failed ({elapsed:.1f}s)")
144
 
@@ -201,24 +237,23 @@ def save_and_upload():
201
  except Exception:
202
  pass
203
 
204
- # Create tar.zst of /home and /root
205
  t0 = time.time()
206
- dirs_to_persist = []
207
- for d in ["home", "root"]:
208
- if os.path.isdir(f"/{d}"):
209
- dirs_to_persist.append(d)
210
  if not dirs_to_persist:
211
  log(" nothing to persist")
212
  return
213
 
214
- rc, out = run(f"tar --zstd -cpf {STATE_FILE} -C / {' '.join(dirs_to_persist)}")
 
 
215
  elapsed = time.time() - t0
216
  if rc != 0:
217
  log(f" tar failed ({elapsed:.1f}s)")
218
  return
219
 
220
  size_mb = os.path.getsize(STATE_FILE) / 1024 / 1024
221
- log(f" tar: {size_mb:.1f}MB ({elapsed:.1f}s)")
222
 
223
  # Upload both files
224
  api = HfApi(token=HF_TOKEN)
 
4
 
5
  Replaces: entrypoint.sh, start-server.sh, git_sync_daemon.py, log_streamer.py
6
 
7
+ Persistence: full filesystem sync via tar.zst ↔ HF Dataset.
8
+ Dataset = disk. Container state is fully mirrored to the dataset.
9
+ On startup: download state.tar.zst β†’ extract to /
10
+ On sync: tar selected dirs β†’ upload state.tar.zst
 
 
11
  """
12
 
13
  import http.server
 
32
  PKG_FILE = os.path.join(PERSIST_PATH, "user-packages.list")
33
  BASE_PKG_FILE = "/etc/base-packages.list"
34
 
35
+ # Full filesystem persistence β€” these dirs are synced to/from the dataset.
36
+ # tar -p preserves all permissions, so no corruption.
37
+ PERSIST_DIRS = [
38
+ "home",
39
+ "root",
40
+ "etc",
41
+ "opt",
42
+ "var",
43
+ "usr/local",
44
+ ]
45
+
46
+ # Exclude from tar: Docker-managed, virtual, transient, or regenerable files
47
+ TAR_EXCLUDES = [
48
+ # Docker-managed (overwritten each container start)
49
+ "etc/hostname",
50
+ "etc/hosts",
51
+ "etc/resolv.conf",
52
+ "etc/mtab",
53
+ # Transient runtime
54
+ "var/run",
55
+ "var/lock",
56
+ "var/cache/apt",
57
+ "var/lib/apt/lists",
58
+ "var/log", # logs are ephemeral
59
+ # Sockets, pids, caches
60
+ "*.sock",
61
+ "*.pid",
62
+ "__pycache__",
63
+ "*.pyc",
64
+ ".cache",
65
+ ]
66
+
67
 
68
  def log(msg):
69
  ts = time.strftime("%H:%M:%S", time.gmtime())
 
163
  log(f" no state to restore: {e}")
164
  return
165
 
166
+ log("── RESTORE: extracting state.tar.zst β†’ /")
167
  t0 = time.time()
168
  rc, out = run(f"tar --zstd -xpf {STATE_FILE} -C /")
169
  elapsed = time.time() - t0
170
  if rc == 0:
171
+ # Show what was restored
172
+ rc2, listing = run(f"tar --zstd -tf {STATE_FILE} | head -1 | cut -d/ -f1 | sort -u")
173
  log(f" extracted ({elapsed:.1f}s)")
174
+ # Count files
175
+ rc3, count = run(f"tar --zstd -tf {STATE_FILE} | wc -l")
176
+ if rc3 == 0:
177
+ log(f" {count.strip()} files restored")
178
  else:
179
  log(f" extraction failed ({elapsed:.1f}s)")
180
 
 
237
  except Exception:
238
  pass
239
 
240
+ # Create tar.zst of all persist dirs
241
  t0 = time.time()
242
+ dirs_to_persist = [d for d in PERSIST_DIRS if os.path.isdir(f"/{d}")]
 
 
 
243
  if not dirs_to_persist:
244
  log(" nothing to persist")
245
  return
246
 
247
+ excludes = " ".join(f"--exclude='{e}'" for e in TAR_EXCLUDES)
248
+ cmd = f"tar --zstd {excludes} -cpf {STATE_FILE} -C / {' '.join(dirs_to_persist)}"
249
+ rc, out = run(cmd)
250
  elapsed = time.time() - t0
251
  if rc != 0:
252
  log(f" tar failed ({elapsed:.1f}s)")
253
  return
254
 
255
  size_mb = os.path.getsize(STATE_FILE) / 1024 / 1024
256
+ log(f" tar: {size_mb:.1f}MB ({elapsed:.1f}s) [{', '.join(dirs_to_persist)}]")
257
 
258
  # Upload both files
259
  api = HfApi(token=HF_TOKEN)