Nightfury16 commited on
Commit
ff75b10
·
1 Parent(s): 1b04ad0

Initial commit

Browse files
Files changed (1) hide show
  1. app.py +27 -22
app.py CHANGED
@@ -11,14 +11,17 @@ from datetime import datetime
11
  from filelock import FileLock
12
  from huggingface_hub import HfApi, hf_hub_download
13
 
14
- DATASET_REPO_ID = os.environ.get("DATASET_REPO_ID", "mit123/property-labels")
15
- HF_TOKEN = os.environ.get("HF_TOKEN")
16
 
17
- URL_FILE = "urls.txt"
18
- LABEL_FILE = "annotations.csv"
19
- VERIFY_FILE = "verifications.csv"
20
- SKIP_FILE = "skipped.csv"
21
- LOCK_FILE = "data.lock"
 
 
 
22
 
23
  MAX_IMAGES = 6
24
  THUMB_SIZE = (350, 350)
@@ -26,40 +29,41 @@ ROOM_CLASSES = ["living_room", "bedroom", "kitchen", "bathroom", "dining_room",
26
 
27
  def sync_pull():
28
  if not HF_TOKEN:
29
- print("⚠️ HF_TOKEN not set. Data will not be saved permanently.")
30
  return
31
 
32
- api = HfApi(token=HF_TOKEN)
33
 
34
- for filename in [LABEL_FILE, VERIFY_FILE, SKIP_FILE]:
 
35
  try:
36
- print(f"📥 Pulling {filename} from Hub...")
37
  hf_hub_download(
38
  repo_id=DATASET_REPO_ID,
39
  filename=filename,
40
  repo_type="dataset",
41
- local_dir=".",
42
  token=HF_TOKEN
43
  )
 
44
  except Exception:
45
- print(f"ℹ️ {filename} not found on Hub. Starting fresh.")
46
 
47
- def sync_push_background(filename):
48
  if not HF_TOKEN: return
49
 
50
  def _push():
51
  try:
52
  api = HfApi(token=HF_TOKEN)
53
  api.upload_file(
54
- path_or_fileobj=filename,
55
- path_in_repo=filename,
56
  repo_id=DATASET_REPO_ID,
57
  repo_type="dataset",
58
- commit_message=f"Update {filename}"
59
  )
60
- print(f"☁️ Uploaded {filename} to Hub")
61
  except Exception as e:
62
- print(f"❌ Upload failed: {e}")
63
 
64
  thread = threading.Thread(target=_push)
65
  thread.start()
@@ -75,7 +79,7 @@ def init_files():
75
  pd.DataFrame(columns=cols).to_csv(f, index=False)
76
 
77
  if not os.path.exists(URL_FILE):
78
- with open(URL_FILE, "w") as f: f.write("")
79
 
80
  init_files()
81
 
@@ -264,7 +268,8 @@ def save_data(mode, history, urls, *args):
264
  with open(fname, "a", newline="") as f:
265
  csv.writer(f).writerows(rows)
266
 
267
- sync_push_background(fname)
 
268
 
269
  return render_workspace(mode, history)
270
 
@@ -274,7 +279,7 @@ def skip_group(idx, history, mode):
274
  with FileLock(LOCK_FILE):
275
  with open(SKIP_FILE, "a", newline="") as f:
276
  csv.writer(f).writerow([datetime.now().isoformat(), "user", gid])
277
- sync_push_background(SKIP_FILE)
278
 
279
  return render_workspace(mode, history, specific_index=idx + 1)
280
 
 
11
  from filelock import FileLock
12
  from huggingface_hub import HfApi, hf_hub_download
13
 
14
+ DATASET_REPO_ID = os.environ.get("DATASET_REPO_ID", "fast-stager/property-labels")
15
+ HF_TOKEN = os.environ.get("HF_TOKEN")
16
 
17
+ CACHE_DIR = "/tmp"
18
+
19
+ URL_FILE = "urls.txt"
20
+
21
+ LABEL_FILE = os.path.join(CACHE_DIR, "annotations.csv")
22
+ VERIFY_FILE = os.path.join(CACHE_DIR, "verifications.csv")
23
+ SKIP_FILE = os.path.join(CACHE_DIR, "skipped.csv")
24
+ LOCK_FILE = os.path.join(CACHE_DIR, "data.lock")
25
 
26
  MAX_IMAGES = 6
27
  THUMB_SIZE = (350, 350)
 
29
 
30
  def sync_pull():
31
  if not HF_TOKEN:
32
+ print("⚠️ HF_TOKEN not set. Persistence disabled.")
33
  return
34
 
35
+ print(f"🔄 Syncing from {DATASET_REPO_ID}...")
36
 
37
+ for filename in ["annotations.csv", "verifications.csv", "skipped.csv"]:
38
+ target_path = os.path.join(CACHE_DIR, filename)
39
  try:
 
40
  hf_hub_download(
41
  repo_id=DATASET_REPO_ID,
42
  filename=filename,
43
  repo_type="dataset",
44
+ local_dir=CACHE_DIR,
45
  token=HF_TOKEN
46
  )
47
+ print(f"✅ Loaded {filename}")
48
  except Exception:
49
+ print(f"ℹ️ {filename} not found on Hub. Creating empty.")
50
 
51
+ def sync_push_background(local_path, remote_filename):
52
  if not HF_TOKEN: return
53
 
54
  def _push():
55
  try:
56
  api = HfApi(token=HF_TOKEN)
57
  api.upload_file(
58
+ path_or_fileobj=local_path,
59
+ path_in_repo=remote_filename,
60
  repo_id=DATASET_REPO_ID,
61
  repo_type="dataset",
62
+ commit_message=f"Update {remote_filename}"
63
  )
64
+ print(f"☁️ Synced {remote_filename}")
65
  except Exception as e:
66
+ print(f"❌ Sync failed: {e}")
67
 
68
  thread = threading.Thread(target=_push)
69
  thread.start()
 
79
  pd.DataFrame(columns=cols).to_csv(f, index=False)
80
 
81
  if not os.path.exists(URL_FILE):
82
+ print("⚠️ urls.txt not found in root directory! Please upload it to your Space.")
83
 
84
  init_files()
85
 
 
268
  with open(fname, "a", newline="") as f:
269
  csv.writer(f).writerows(rows)
270
 
271
+ remote_filename = os.path.basename(fname)
272
+ sync_push_background(fname, remote_filename)
273
 
274
  return render_workspace(mode, history)
275
 
 
279
  with FileLock(LOCK_FILE):
280
  with open(SKIP_FILE, "a", newline="") as f:
281
  csv.writer(f).writerow([datetime.now().isoformat(), "user", gid])
282
+ sync_push_background(SKIP_FILE, "skipped.csv")
283
 
284
  return render_workspace(mode, history, specific_index=idx + 1)
285