Marlin Lee Claude Sonnet 4.6 commited on
Commit
bc1017d
·
1 Parent(s): 0b334cf

Persist feature names back to HF dataset on save

Browse files

Adds a debounced (2 s) background-thread upload so any manual label
entered in the explorer is pushed to the HF dataset repo after the
user stops typing. Uses the existing HF_TOKEN / HF_DATASET_REPO env
vars that are already configured as Space secrets.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (1) hide show
  1. scripts/explorer_app.py +40 -0
scripts/explorer_app.py CHANGED
@@ -31,6 +31,7 @@ import os
31
  import io
32
  import json
33
  import base64
 
34
  from collections import OrderedDict
35
 
36
  import cv2
@@ -281,6 +282,45 @@ def _save_names():
281
  with open(_names_file, 'w') as _f:
282
  json.dump({str(k): v for k, v in sorted(feature_names.items())}, _f, indent=2)
283
  print(f"Saved {len(feature_names)} feature names to {_names_file}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
284
 
285
 
286
  def _display_name(feat: int) -> str:
 
31
  import io
32
  import json
33
  import base64
34
+ import threading
35
  from collections import OrderedDict
36
 
37
  import cv2
 
282
  with open(_names_file, 'w') as _f:
283
  json.dump({str(k): v for k, v in sorted(feature_names.items())}, _f, indent=2)
284
  print(f"Saved {len(feature_names)} feature names to {_names_file}")
285
+ _schedule_hf_push(_names_file)
286
+
287
+
288
+ _hf_push_pending = [None] # holds the active debounce timeout handle
289
+
290
+ def _schedule_hf_push(names_file_path):
291
+ """Debounce HF dataset upload: waits 2 s after the last save, then pushes in a thread."""
292
+ hf_token = os.environ.get("HF_TOKEN")
293
+ hf_repo = os.environ.get("HF_DATASET_REPO")
294
+ if not (hf_token and hf_repo):
295
+ return
296
+
297
+ # Cancel any already-pending push for this session.
298
+ if _hf_push_pending[0] is not None:
299
+ try:
300
+ curdoc().remove_timeout_callback(_hf_push_pending[0])
301
+ except Exception:
302
+ pass
303
+
304
+ def _push_thread():
305
+ try:
306
+ from huggingface_hub import upload_file
307
+ upload_file(
308
+ path_or_fileobj=names_file_path,
309
+ path_in_repo=os.path.basename(names_file_path),
310
+ repo_id=hf_repo,
311
+ repo_type="dataset",
312
+ token=hf_token,
313
+ commit_message="Update feature names",
314
+ )
315
+ print(f" Pushed {os.path.basename(names_file_path)} to HF dataset {hf_repo}")
316
+ except Exception as e:
317
+ print(f" Warning: could not push feature names to HF: {e}")
318
+
319
+ def _fire():
320
+ _hf_push_pending[0] = None
321
+ threading.Thread(target=_push_thread, daemon=True).start()
322
+
323
+ _hf_push_pending[0] = curdoc().add_timeout_callback(_fire, 2000)
324
 
325
 
326
  def _display_name(feat: int) -> str: