tao-shen Claude Opus 4.6 commited on
Commit
501b164
·
1 Parent(s): 20de61f

feat: direct file sync — dataset root = filesystem root

Browse files

No more tar.zst. Dataset mirrors the container's / directly:
- SAVE: rsync / → /data/ → upload_folder to dataset
- RESTORE: snapshot_download → rsync /data/ → /

Dataset browsable: bin/, etc/, home/, usr/, etc. match the OS.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (1) hide show
  1. entrypoint.py +86 -91
entrypoint.py CHANGED
@@ -2,12 +2,10 @@
2
  """
3
  HuggingRun v2 — Single entrypoint for Ubuntu Server on HuggingFace Spaces.
4
 
5
- Replaces: entrypoint.sh, start-server.sh, git_sync_daemon.py, log_streamer.py
6
-
7
- Persistence: full filesystem sync via tar.zst ↔ HF Dataset.
8
- Dataset = disk. Container state is fully mirrored to the dataset.
9
- On startup: download state.tar.zst → extract to /
10
- On sync: tar selected dirs → upload state.tar.zst
11
  """
12
 
13
  import http.server
@@ -28,32 +26,31 @@ SYNC_INTERVAL = int(os.environ.get("SYNC_INTERVAL", "60"))
28
  SSH_PORT = os.environ.get("SSH_PORT", "2222")
29
  TTYD_PORT = os.environ.get("TTYD_PORT", "7681")
30
  LOGFILE = "/var/log/huggingrun.log"
31
- STATE_FILE = os.path.join(PERSIST_PATH, "state.tar.zst")
32
  PKG_FILE = os.path.join(PERSIST_PATH, "user-packages.list")
33
  BASE_PKG_FILE = "/etc/base-packages.list"
34
 
35
- # Full-disk persistence: tar everything from / except virtual/transient dirs.
36
- # Excludes only things that CANNOT or SHOULD NOT be persisted.
37
- TAR_EXCLUDES = [
38
- # Virtual filesystems (kernel-provided, not real files)
39
- "proc",
40
- "sys",
41
- "dev",
42
- # Our own persist path (avoid recursive tar of state.tar.zst)
43
- "data",
44
- # Temporary
45
- "tmp",
46
- "run",
47
  # Docker-managed (overwritten each container start)
48
- "etc/hostname",
49
- "etc/hosts",
50
- "etc/resolv.conf",
51
- "etc/mtab",
52
- # Transient runtime
53
- "var/lock",
54
- # Sockets, pids
55
- "*.sock",
56
- "*.pid",
 
 
 
 
57
  ]
58
 
59
 
@@ -136,52 +133,49 @@ def restore_state():
136
  os.makedirs(PERSIST_PATH, exist_ok=True)
137
  return
138
 
139
- log("── RESTORE: downloading state")
140
  os.makedirs(PERSIST_PATH, exist_ok=True)
141
  t0 = time.time()
142
  try:
143
- from huggingface_hub import hf_hub_download
144
- path = hf_hub_download(
145
  repo_id=HF_DATASET_REPO,
146
- filename="state.tar.zst",
147
  repo_type="dataset",
148
  local_dir=PERSIST_PATH,
149
  token=HF_TOKEN,
150
  )
151
  elapsed = time.time() - t0
152
- size_mb = os.path.getsize(path) / 1024 / 1024
153
- log(f" downloaded {size_mb:.1f}MB ({elapsed:.1f}s)")
154
  except Exception as e:
155
- log(f" no state to restore: {e}")
156
  return
157
 
158
- log("── RESTORE: extracting state.tar.zst /")
 
 
 
 
 
 
 
 
 
 
159
  t0 = time.time()
160
- rc, out = run(f"tar --zstd -xpf {STATE_FILE} -C /")
 
 
 
 
 
 
 
161
  elapsed = time.time() - t0
162
  if rc == 0:
163
- # Show what was restored
164
- rc2, listing = run(f"tar --zstd -tf {STATE_FILE} | head -1 | cut -d/ -f1 | sort -u")
165
- log(f" extracted ({elapsed:.1f}s)")
166
- # Count files
167
- rc3, count = run(f"tar --zstd -tf {STATE_FILE} | wc -l")
168
- if rc3 == 0:
169
- log(f" {count.strip()} files restored")
170
  else:
171
- log(f" extraction failed ({elapsed:.1f}s)")
172
-
173
- # Also download package list if it exists
174
- try:
175
- from huggingface_hub import hf_hub_download
176
- hf_hub_download(
177
- repo_id=HF_DATASET_REPO,
178
- filename="user-packages.list",
179
- repo_type="dataset",
180
- local_dir=PERSIST_PATH,
181
- token=HF_TOKEN,
182
- )
183
- except Exception:
184
- pass
185
 
186
 
187
  def restore_packages():
@@ -215,6 +209,7 @@ def ensure_passwords():
215
  def save_and_upload():
216
  if not HF_TOKEN or not HF_DATASET_REPO:
217
  return
 
218
  from huggingface_hub import HfApi
219
 
220
  log("══ SYNC: save + upload ══")
@@ -229,54 +224,54 @@ def save_and_upload():
229
  except Exception:
230
  pass
231
 
232
- # Create tar.zst of entire filesystem (full-disk persistence)
233
  t0 = time.time()
234
- excludes = " ".join(f"--exclude='{e}'" for e in TAR_EXCLUDES)
235
- cmd = f"tar --zstd {excludes} -cpf {STATE_FILE} -C / ."
 
 
 
 
236
  rc, out = run(cmd)
237
  elapsed = time.time() - t0
238
  if rc != 0:
239
- # tar returns 1 for "file changed as we read it" — that's OK
240
- if rc == 1:
241
- log(f" tar: warnings (files changed during archive) ({elapsed:.1f}s)")
242
- else:
243
- log(f" tar failed rc={rc} ({elapsed:.1f}s)")
244
- return
245
-
246
- size_mb = os.path.getsize(STATE_FILE) / 1024 / 1024
247
- log(f" tar: {size_mb:.1f}MB ({elapsed:.1f}s) [full disk]")
248
-
249
- # Upload both files
 
 
 
 
 
 
 
 
250
  api = HfApi(token=HF_TOKEN)
251
  ts = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime())
252
 
253
  t0 = time.time()
254
  try:
255
- api.upload_file(
256
- path_or_fileobj=STATE_FILE,
257
- path_in_repo="state.tar.zst",
258
  repo_id=HF_DATASET_REPO,
259
  repo_type="dataset",
 
260
  commit_message=f"sync {ts}",
 
261
  )
262
  elapsed = time.time() - t0
263
- log(f" uploaded state.tar.zst ({elapsed:.1f}s)")
264
  except Exception as e:
265
- log(f" upload state.tar.zst failed: {e}")
266
-
267
- # Upload package list (separate commit to avoid conflicts)
268
- if os.path.exists(PKG_FILE):
269
- try:
270
- api.upload_file(
271
- path_or_fileobj=PKG_FILE,
272
- path_in_repo="user-packages.list",
273
- repo_id=HF_DATASET_REPO,
274
- repo_type="dataset",
275
- commit_message=f"packages {ts}",
276
- )
277
- log(" uploaded user-packages.list")
278
- except Exception as e:
279
- log(f" upload packages failed: {e}")
280
 
281
  log("══ SYNC: done ══")
282
 
 
2
  """
3
  HuggingRun v2 — Single entrypoint for Ubuntu Server on HuggingFace Spaces.
4
 
5
+ Persistence: dataset root = filesystem root (direct file mirror).
6
+ Dataset directory structure is identical to the container's /.
7
+ On startup: snapshot_download rsync /data/ /
8
+ On sync: rsync / /data/ upload_folder
 
 
9
  """
10
 
11
  import http.server
 
26
  SSH_PORT = os.environ.get("SSH_PORT", "2222")
27
  TTYD_PORT = os.environ.get("TTYD_PORT", "7681")
28
  LOGFILE = "/var/log/huggingrun.log"
 
29
  PKG_FILE = os.path.join(PERSIST_PATH, "user-packages.list")
30
  BASE_PKG_FILE = "/etc/base-packages.list"
31
 
32
+ # rsync excludes for both save (/ /data/) and restore (/data/ → /)
33
+ RSYNC_EXCLUDES = [
34
+ # Virtual / kernel (don't exist as real files)
35
+ "/proc", "/sys", "/dev",
36
+ # Our persist path (avoid recursion)
37
+ "/data",
38
+ # Temporary / runtime
39
+ "/tmp", "/run",
 
 
 
 
40
  # Docker-managed (overwritten each container start)
41
+ "/etc/hostname", "/etc/hosts", "/etc/resolv.conf", "/etc/mtab",
42
+ # Transient
43
+ "*.sock", "*.pid",
44
+ "/var/lock",
45
+ ]
46
+
47
+ # upload_folder ignore patterns (HF API rejects some paths)
48
+ UPLOAD_IGNORE = [
49
+ "__pycache__", "*.pyc",
50
+ ".git", ".git*",
51
+ "*.sock", "*.lock",
52
+ ".huggingface",
53
+ ".cache",
54
  ]
55
 
56
 
 
133
  os.makedirs(PERSIST_PATH, exist_ok=True)
134
  return
135
 
136
+ log("── RESTORE: downloading dataset → /data/")
137
  os.makedirs(PERSIST_PATH, exist_ok=True)
138
  t0 = time.time()
139
  try:
140
+ from huggingface_hub import snapshot_download
141
+ snapshot_download(
142
  repo_id=HF_DATASET_REPO,
 
143
  repo_type="dataset",
144
  local_dir=PERSIST_PATH,
145
  token=HF_TOKEN,
146
  )
147
  elapsed = time.time() - t0
148
+ log(f" downloaded ({elapsed:.1f}s)")
 
149
  except Exception as e:
150
+ log(f" download failed or empty: {e}")
151
  return
152
 
153
+ # Check if there's actual filesystem data (look for top-level dirs like bin/, etc/)
154
+ has_data = any(
155
+ os.path.isdir(os.path.join(PERSIST_PATH, d))
156
+ for d in ["bin", "etc", "home", "usr", "root"]
157
+ )
158
+ if not has_data:
159
+ log(" no filesystem data in dataset (fresh start)")
160
+ return
161
+
162
+ log("── RESTORE: rsync /data/ → /")
163
+ excludes = " ".join(f"--exclude='{e}'" for e in RSYNC_EXCLUDES)
164
  t0 = time.time()
165
+ # rsync with -rlptD: recursive, links, perms, times, devices
166
+ # --exclude .huggingface and .git (download metadata, not filesystem data)
167
+ cmd = (f"rsync -rlptD --delete "
168
+ f"{excludes} "
169
+ f"--exclude='.huggingface' --exclude='.git' --exclude='.gitattributes' "
170
+ f"--exclude='user-packages.list' "
171
+ f"{PERSIST_PATH}/ /")
172
+ rc, out = run(cmd)
173
  elapsed = time.time() - t0
174
  if rc == 0:
175
+ rc2, count = run(f"find {PERSIST_PATH} -type f | wc -l")
176
+ log(f" restored ({elapsed:.1f}s), {count.strip()} files")
 
 
 
 
 
177
  else:
178
+ log(f" restore failed ({elapsed:.1f}s)")
 
 
 
 
 
 
 
 
 
 
 
 
 
179
 
180
 
181
  def restore_packages():
 
209
  def save_and_upload():
210
  if not HF_TOKEN or not HF_DATASET_REPO:
211
  return
212
+ import shutil
213
  from huggingface_hub import HfApi
214
 
215
  log("══ SYNC: save + upload ══")
 
224
  except Exception:
225
  pass
226
 
227
+ # rsync entire filesystem /data/
228
  t0 = time.time()
229
+ excludes = " ".join(f"--exclude='{e}'" for e in RSYNC_EXCLUDES)
230
+ cmd = (f"rsync -rlptD --delete "
231
+ f"{excludes} "
232
+ f"--exclude='.huggingface' --exclude='.git' --exclude='.gitattributes' "
233
+ f"--exclude='user-packages.list' "
234
+ f"/ {PERSIST_PATH}/")
235
  rc, out = run(cmd)
236
  elapsed = time.time() - t0
237
  if rc != 0:
238
+ log(f" rsync failed ({elapsed:.1f}s)")
239
+ return
240
+ log(f" rsync /data/ ({elapsed:.1f}s)")
241
+
242
+ # Clean dirs that HF API rejects
243
+ for reject_dir in [".cache"]:
244
+ for dirpath, dirnames, filenames in os.walk(PERSIST_PATH):
245
+ for d in list(dirnames):
246
+ if d == reject_dir:
247
+ full = os.path.join(dirpath, d)
248
+ log(f" rm {full}")
249
+ shutil.rmtree(full, ignore_errors=True)
250
+ dirnames.remove(d)
251
+
252
+ # Count files to upload
253
+ rc2, count = run(f"find {PERSIST_PATH} -type f -not -path '*/.git/*' -not -path '*/.huggingface/*' | wc -l")
254
+ log(f" {count.strip()} files to upload")
255
+
256
+ # Upload folder
257
  api = HfApi(token=HF_TOKEN)
258
  ts = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime())
259
 
260
  t0 = time.time()
261
  try:
262
+ api.upload_folder(
263
+ folder_path=PERSIST_PATH,
 
264
  repo_id=HF_DATASET_REPO,
265
  repo_type="dataset",
266
+ path_in_repo="",
267
  commit_message=f"sync {ts}",
268
+ ignore_patterns=UPLOAD_IGNORE,
269
  )
270
  elapsed = time.time() - t0
271
+ log(f" uploaded ({elapsed:.1f}s)")
272
  except Exception as e:
273
+ elapsed = time.time() - t0
274
+ log(f" upload failed ({elapsed:.1f}s): {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
275
 
276
  log("══ SYNC: done ══")
277