Spaces:

fast-stager
/

data-collection

Sleeping

App Files Files Community

Nightfury16 commited on Dec 15, 2025

Commit

ff75b10

1 Parent(s): 1b04ad0

Initial commit

Browse files

Files changed (1) hide show

app.py +27 -22

app.py CHANGED Viewed

@@ -11,14 +11,17 @@ from datetime import datetime
 from filelock import FileLock
 from huggingface_hub import HfApi, hf_hub_download
-DATASET_REPO_ID = os.environ.get("DATASET_REPO_ID", "mit123/property-labels")
-HF_TOKEN = os.environ.get("HF_TOKEN")
-URL_FILE = "urls.txt"
-LABEL_FILE = "annotations.csv"
-VERIFY_FILE = "verifications.csv"
-SKIP_FILE = "skipped.csv"
-LOCK_FILE = "data.lock"
 MAX_IMAGES = 6
 THUMB_SIZE = (350, 350)
@@ -26,40 +29,41 @@ ROOM_CLASSES = ["living_room", "bedroom", "kitchen", "bathroom", "dining_room",
 def sync_pull():
     if not HF_TOKEN:
-        print("⚠️ HF_TOKEN not set. Data will not be saved permanently.")
         return
-    api = HfApi(token=HF_TOKEN)
-    for filename in [LABEL_FILE, VERIFY_FILE, SKIP_FILE]:
         try:
-            print(f"📥 Pulling {filename} from Hub...")
             hf_hub_download(
                 repo_id=DATASET_REPO_ID,
                 filename=filename,
                 repo_type="dataset",
-                local_dir=".",
                 token=HF_TOKEN
             )
         except Exception:
-            print(f"ℹ️ {filename} not found on Hub. Starting fresh.")
-def sync_push_background(filename):
     if not HF_TOKEN: return
     def _push():
         try:
             api = HfApi(token=HF_TOKEN)
             api.upload_file(
-                path_or_fileobj=filename,
-                path_in_repo=filename,
                 repo_id=DATASET_REPO_ID,
                 repo_type="dataset",
-                commit_message=f"Update {filename}"
             )
-            print(f"☁️ Uploaded {filename} to Hub")
         except Exception as e:
-            print(f"❌ Upload failed: {e}")
     thread = threading.Thread(target=_push)
     thread.start()
@@ -75,7 +79,7 @@ def init_files():
             pd.DataFrame(columns=cols).to_csv(f, index=False)
     if not os.path.exists(URL_FILE):
-        with open(URL_FILE, "w") as f: f.write("")
 init_files()
@@ -264,7 +268,8 @@ def save_data(mode, history, urls, *args):
         with open(fname, "a", newline="") as f:
             csv.writer(f).writerows(rows)
-    sync_push_background(fname)
     return render_workspace(mode, history)
@@ -274,7 +279,7 @@ def skip_group(idx, history, mode):
         with FileLock(LOCK_FILE):
             with open(SKIP_FILE, "a", newline="") as f:
                 csv.writer(f).writerow([datetime.now().isoformat(), "user", gid])
-        sync_push_background(SKIP_FILE)
     return render_workspace(mode, history, specific_index=idx + 1)

 from filelock import FileLock
 from huggingface_hub import HfApi, hf_hub_download
+DATASET_REPO_ID = os.environ.get("DATASET_REPO_ID", "fast-stager/property-labels")
+HF_TOKEN = os.environ.get("HF_TOKEN")
+CACHE_DIR = "/tmp"
+URL_FILE = "urls.txt"
+LABEL_FILE = os.path.join(CACHE_DIR, "annotations.csv")
+VERIFY_FILE = os.path.join(CACHE_DIR, "verifications.csv")
+SKIP_FILE = os.path.join(CACHE_DIR, "skipped.csv")
+LOCK_FILE = os.path.join(CACHE_DIR, "data.lock")
 MAX_IMAGES = 6
 THUMB_SIZE = (350, 350)
 def sync_pull():
     if not HF_TOKEN:
+        print("⚠️ HF_TOKEN not set. Persistence disabled.")
         return
+    print(f"🔄 Syncing from {DATASET_REPO_ID}...")
+    for filename in ["annotations.csv", "verifications.csv", "skipped.csv"]:
+        target_path = os.path.join(CACHE_DIR, filename)
         try:
             hf_hub_download(
                 repo_id=DATASET_REPO_ID,
                 filename=filename,
                 repo_type="dataset",
+                local_dir=CACHE_DIR,
                 token=HF_TOKEN
             )
+            print(f"✅ Loaded {filename}")
         except Exception:
+            print(f"ℹ️ {filename} not found on Hub. Creating empty.")
+def sync_push_background(local_path, remote_filename):
     if not HF_TOKEN: return
     def _push():
         try:
             api = HfApi(token=HF_TOKEN)
             api.upload_file(
+                path_or_fileobj=local_path,
+                path_in_repo=remote_filename,
                 repo_id=DATASET_REPO_ID,
                 repo_type="dataset",
+                commit_message=f"Update {remote_filename}"
             )
+            print(f"☁️ Synced {remote_filename}")
         except Exception as e:
+            print(f"❌ Sync failed: {e}")
     thread = threading.Thread(target=_push)
     thread.start()
             pd.DataFrame(columns=cols).to_csv(f, index=False)
     if not os.path.exists(URL_FILE):
+        print("⚠️ urls.txt not found in root directory! Please upload it to your Space.")
 init_files()
         with open(fname, "a", newline="") as f:
             csv.writer(f).writerows(rows)
+    remote_filename = os.path.basename(fname)
+    sync_push_background(fname, remote_filename)
     return render_workspace(mode, history)
         with FileLock(LOCK_FILE):
             with open(SKIP_FILE, "a", newline="") as f:
                 csv.writer(f).writerow([datetime.now().isoformat(), "user", gid])
+        sync_push_background(SKIP_FILE, "skipped.csv")
     return render_workspace(mode, history, specific_index=idx + 1)