Nightfury16 commited on
Commit
352bdd3
·
1 Parent(s): 6b53e51
Files changed (2) hide show
  1. .claude/settings.local.json +7 -0
  2. app.py +136 -26
.claude/settings.local.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(python3:*)"
5
+ ]
6
+ }
7
+ }
app.py CHANGED
@@ -9,7 +9,7 @@ import random
9
  from io import BytesIO
10
  from PIL import Image
11
  from concurrent.futures import ThreadPoolExecutor
12
- from datetime import datetime
13
  from filelock import FileLock
14
  from huggingface_hub import HfApi, hf_hub_download
15
 
@@ -24,6 +24,10 @@ LABEL_FILE = os.path.join(CACHE_DIR, "annotations.csv")
24
  VERIFY_FILE = os.path.join(CACHE_DIR, "verifications.csv")
25
  SKIP_FILE = os.path.join(CACHE_DIR, "skipped.csv")
26
  LOCK_FILE = os.path.join(CACHE_DIR, "data.lock")
 
 
 
 
27
 
28
  FIXED_IN_SESSION = set()
29
  MANUAL_EXCLUDE = {"075c8bb8a73c45d71788e711edd9e8d5l", "07a0544f217db88fe2b06fd5d38f02a6l", "6bf16112723de3318c44641958638a56l"}
@@ -59,6 +63,8 @@ def init_files():
59
  ["timestamp", "user", "group_id", "url", "is_correct", "corrected_label", "corrected_score"] if f == VERIFY_FILE else \
60
  ["timestamp", "user", "group_id"]
61
  pd.DataFrame(columns=cols).to_csv(f, index=False)
 
 
62
 
63
  init_files()
64
 
@@ -99,6 +105,63 @@ def get_flagged_groups():
99
  flagged = errors['group_id'].unique().tolist()
100
  return [g for g in flagged if g not in FIXED_IN_SESSION and g not in MANUAL_EXCLUDE]
101
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  def get_stats_text():
103
  all_gids = get_ordered_groups()
104
  flagged = get_flagged_groups()
@@ -107,29 +170,49 @@ def get_stats_text():
107
  l_count = len(df_l['group_id'].unique()) if not df_l.empty else 0
108
  v_count = len(df_v['group_id'].unique()) if not df_v.empty else 0
109
  err_msg = f" | ⚠️ **Fix:** {len(flagged)}" if flagged else " | ✅ Clean"
110
- return f"**Total:** {len(all_gids)} | **Labeled:** {l_count} | **Verified:** {v_count}{err_msg}"
 
 
111
 
112
- def render_workspace(mode, history, specific_index=None, move_back=False):
113
  all_ordered = get_ordered_groups()
114
  flagged_pool = get_flagged_groups()
115
  current_gid = history[-1] if history else None
 
116
  target_gid = None
117
  if specific_index is not None:
118
- if 0 <= specific_index < len(all_ordered): target_gid = all_ordered[specific_index]
 
 
119
  elif move_back and len(history) > 1:
120
  history.pop(); target_gid = history[-1]
 
121
  else:
122
  if mode == "fix":
123
- candidates = [g for g in flagged_pool if g != current_gid]
124
- if not candidates and flagged_pool: candidates = flagged_pool
125
- if candidates: target_gid = candidates[0]
 
 
 
126
  else:
127
  df_l, df_v = get_clean_df(LABEL_FILE), get_clean_df(VERIFY_FILE)
128
  l_done = set(df_l['group_id'].unique()) if not df_l.empty else set()
129
  v_done = set(df_v['group_id'].unique()) if not df_v.empty else set()
130
- candidates = [g for g in all_ordered if (mode=="label" and g not in l_done) or (mode=="verify" and g in l_done and g not in v_done)]
131
- if candidates: target_gid = random.choice(candidates)
132
- if not target_gid: return {screen_menu: gr.update(visible=True), screen_work: gr.update(visible=False), log_box: "Done!"}
 
 
 
 
 
 
 
 
 
 
 
133
  urls = [u for u in load_all_urls() if target_gid in u][:MAX_IMAGES]
134
  if not history or history[-1] != target_gid: history.append(target_gid)
135
  saved_vals = {}
@@ -141,7 +224,7 @@ def render_workspace(mode, history, specific_index=None, move_back=False):
141
  with ThreadPoolExecutor(max_workers=MAX_IMAGES) as ex:
142
  def fetch(u):
143
  try:
144
- res = requests.get(u, timeout=3, headers={'User-Agent': 'Mozilla/5.0'})
145
  img = Image.open(BytesIO(res.content)); img.thumbnail(THUMB_SIZE); return img
146
  except: return None
147
  processed_images = list(ex.map(fetch, urls))
@@ -173,7 +256,7 @@ def render_workspace(mode, history, specific_index=None, move_back=False):
173
  for obj in [c_sld, c_drp, c_chk, c_lbl]: updates[obj] = gr.update(visible=False)
174
  return updates
175
 
176
- def save_data(mode, history, urls, *args):
177
  if not history: return
178
  gid = history[-1]
179
  if mode == "fix": FIXED_IN_SESSION.add(gid)
@@ -181,12 +264,13 @@ def save_data(mode, history, urls, *args):
181
  for i, u in enumerate(urls):
182
  sc, lbl, chk = args[i*4], args[i*4+1], args[i*4+2]
183
  clean_lbl = str(lbl).strip().lower()
184
- if mode in ["label", "fix"]: rows.append([ts, "user", gid, u, int(sc), clean_lbl])
185
- else: rows.append([ts, "user", gid, u, chk, clean_lbl, int(sc)])
186
  with FileLock(LOCK_FILE):
187
  with open(LABEL_FILE if mode in ["label", "fix"] else VERIFY_FILE, "a", newline="") as f: csv.writer(f).writerows(rows)
188
  sync_push_background(LABEL_FILE if mode in ["label", "fix"] else VERIFY_FILE, os.path.basename(LABEL_FILE if mode in ["label", "fix"] else VERIFY_FILE))
189
- return render_workspace(mode, history)
 
190
 
191
  def refresh_cat():
192
  all_gids = get_ordered_groups()
@@ -194,17 +278,37 @@ def refresh_cat():
194
  df_l, df_v = get_clean_df(LABEL_FILE), get_clean_df(VERIFY_FILE)
195
  l_set = set(df_l['group_id'].unique()) if not df_l.empty else set()
196
  v_set = set(df_v['group_id'].unique()) if not df_v.empty else set()
197
- data = [[i+1, "⚠️ Fix Needed" if gid in flagged else "✅ Verified" if gid in v_set else "🔵 Labeled" if gid in l_set else "⚪ Pending", gid] for i, gid in enumerate(all_gids)]
 
 
 
 
 
198
  return pd.DataFrame(data, columns=["#", "Status", "ID"])
199
 
 
 
 
 
 
 
 
 
 
200
  with gr.Blocks(theme=gr.themes.Soft(), title="Labeler Pro") as demo:
201
  state_mode, state_hist, state_urls, state_idx = gr.State("label"), gr.State([]), gr.State([]), gr.State(0)
 
202
  with gr.Row():
203
  top_stats = gr.Markdown("Loading...")
204
  btn_home = gr.Button("🏠 Home", size="sm", scale=0)
205
  with gr.Tabs():
206
  with gr.Tab("Workspace"):
207
- with gr.Group() as screen_menu:
 
 
 
 
 
208
  gr.Markdown("# Property Labeler Pro")
209
  with gr.Row():
210
  b_start_l, b_start_v, b_start_f = gr.Button("Label", variant="primary"), gr.Button("Verify"), gr.Button("🛠 Fix Errors", variant="secondary")
@@ -227,15 +331,21 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Labeler Pro") as demo:
227
  df_cat = gr.Dataframe(interactive=False)
228
  b_ref_cat = gr.Button("Refresh Catalog")
229
  ALL_IO = [screen_menu, screen_work, header_md, state_urls, state_hist, state_idx, top_stats, log_box] + img_objs + input_objs
230
- b_start_l.click(lambda: "label", None, state_mode).then(render_workspace, [state_mode, state_hist], ALL_IO)
231
- b_start_v.click(lambda: "verify", None, state_mode).then(render_workspace, [state_mode, state_hist], ALL_IO)
232
- b_start_f.click(lambda: "fix", None, state_mode).then(render_workspace, [state_mode, state_hist], ALL_IO)
233
- b_save.click(save_data, [state_mode, state_hist, state_urls] + input_objs, ALL_IO)
234
- b_back.click(lambda m, h: render_workspace(m, h, move_back=True), [state_mode, state_hist], ALL_IO)
235
- btn_home.click(lambda: {screen_menu: gr.update(visible=True), screen_work: gr.update(visible=False), state_hist: []}, None, [screen_menu, screen_work, state_hist])
236
- b_go_l.click(lambda: "label", None, state_mode).then(lambda n,m,h: render_workspace(m,h,int(n)-1), [num_in, state_mode, state_hist], ALL_IO)
237
- b_go_v.click(lambda: "verify", None, state_mode).then(lambda n,m,h: render_workspace(m,h,int(n)-1), [num_in, state_mode, state_hist], ALL_IO)
238
- b_go_f.click(lambda: "fix", None, state_mode).then(lambda n,m,h: render_workspace(m,h,int(n)-1), [num_in, state_mode, state_hist], ALL_IO)
 
 
 
 
 
 
239
  b_ref_cat.click(refresh_cat, None, df_cat).then(get_stats_text, None, top_stats)
240
  demo.load(refresh_cat, None, df_cat).then(get_stats_text, None, top_stats)
241
  demo.queue().launch(server_name="0.0.0.0", server_port=7860)
 
9
  from io import BytesIO
10
  from PIL import Image
11
  from concurrent.futures import ThreadPoolExecutor
12
+ from datetime import datetime, timedelta
13
  from filelock import FileLock
14
  from huggingface_hub import HfApi, hf_hub_download
15
 
 
24
  VERIFY_FILE = os.path.join(CACHE_DIR, "verifications.csv")
25
  SKIP_FILE = os.path.join(CACHE_DIR, "skipped.csv")
26
  LOCK_FILE = os.path.join(CACHE_DIR, "data.lock")
27
+ LEASE_FILE = os.path.join(CACHE_DIR, "leases.csv")
28
+ LEASE_DURATION_SECONDS = 600
29
+
30
+ FETCH_SIZE = "w480_h360"
31
 
32
  FIXED_IN_SESSION = set()
33
  MANUAL_EXCLUDE = {"075c8bb8a73c45d71788e711edd9e8d5l", "07a0544f217db88fe2b06fd5d38f02a6l", "6bf16112723de3318c44641958638a56l"}
 
63
  ["timestamp", "user", "group_id", "url", "is_correct", "corrected_label", "corrected_score"] if f == VERIFY_FILE else \
64
  ["timestamp", "user", "group_id"]
65
  pd.DataFrame(columns=cols).to_csv(f, index=False)
66
+ if not os.path.exists(LEASE_FILE):
67
+ pd.DataFrame(columns=["user_id", "group_id", "mode", "leased_at", "expires_at"]).to_csv(LEASE_FILE, index=False)
68
 
69
  init_files()
70
 
 
105
  flagged = errors['group_id'].unique().tolist()
106
  return [g for g in flagged if g not in FIXED_IN_SESSION and g not in MANUAL_EXCLUDE]
107
 
108
+ def _read_leases():
109
+ if not os.path.exists(LEASE_FILE) or os.path.getsize(LEASE_FILE) == 0:
110
+ return pd.DataFrame(columns=["user_id", "group_id", "mode", "leased_at", "expires_at"])
111
+ try:
112
+ df = pd.read_csv(LEASE_FILE)
113
+ df['expires_at'] = pd.to_datetime(df['expires_at'], errors='coerce')
114
+ return df
115
+ except:
116
+ return pd.DataFrame(columns=["user_id", "group_id", "mode", "leased_at", "expires_at"])
117
+
118
+ def acquire_lease(user_id, group_id, mode):
119
+ now = datetime.now()
120
+ expires = now + timedelta(seconds=LEASE_DURATION_SECONDS)
121
+ with FileLock(LOCK_FILE):
122
+ df = _read_leases()
123
+ df = df[df['expires_at'] > now]
124
+ existing = df[(df['group_id'] == group_id) & (df['user_id'] != user_id)]
125
+ if not existing.empty:
126
+ df.to_csv(LEASE_FILE, index=False)
127
+ return False
128
+ df = df[df['user_id'] != user_id]
129
+ new_row = pd.DataFrame([{"user_id": user_id, "group_id": group_id, "mode": mode,
130
+ "leased_at": now.isoformat(), "expires_at": expires.isoformat()}])
131
+ df = pd.concat([df, new_row], ignore_index=True)
132
+ df.to_csv(LEASE_FILE, index=False)
133
+ return True
134
+
135
+ def release_lease(user_id):
136
+ with FileLock(LOCK_FILE):
137
+ df = _read_leases()
138
+ df = df[df['user_id'] != user_id]
139
+ df.to_csv(LEASE_FILE, index=False)
140
+
141
+ def get_leased_group_ids(exclude_user=None):
142
+ now = datetime.now()
143
+ with FileLock(LOCK_FILE):
144
+ df = _read_leases()
145
+ if df.empty: return set()
146
+ df = df[df['expires_at'] > now]
147
+ df.to_csv(LEASE_FILE, index=False)
148
+ if exclude_user:
149
+ df = df[df['user_id'] != exclude_user]
150
+ return set(df['group_id'].unique())
151
+
152
+ def renew_lease(user_id):
153
+ if not user_id: return
154
+ new_expires = (datetime.now() + timedelta(seconds=LEASE_DURATION_SECONDS)).isoformat()
155
+ with FileLock(LOCK_FILE):
156
+ df = _read_leases()
157
+ mask = df['user_id'] == user_id
158
+ if mask.any():
159
+ df.loc[mask, 'expires_at'] = new_expires
160
+ df.to_csv(LEASE_FILE, index=False)
161
+
162
+ def thumb_url(url):
163
+ return url.replace("w2048_h1536", FETCH_SIZE)
164
+
165
  def get_stats_text():
166
  all_gids = get_ordered_groups()
167
  flagged = get_flagged_groups()
 
170
  l_count = len(df_l['group_id'].unique()) if not df_l.empty else 0
171
  v_count = len(df_v['group_id'].unique()) if not df_v.empty else 0
172
  err_msg = f" | ⚠️ **Fix:** {len(flagged)}" if flagged else " | ✅ Clean"
173
+ active = len(get_leased_group_ids())
174
+ active_msg = f" | **Active:** {active}" if active > 0 else ""
175
+ return f"**Total:** {len(all_gids)} | **Labeled:** {l_count} | **Verified:** {v_count}{err_msg}{active_msg}"
176
 
177
+ def render_workspace(mode, history, user_id="user", specific_index=None, move_back=False):
178
  all_ordered = get_ordered_groups()
179
  flagged_pool = get_flagged_groups()
180
  current_gid = history[-1] if history else None
181
+ leased_by_others = get_leased_group_ids(exclude_user=user_id)
182
  target_gid = None
183
  if specific_index is not None:
184
+ if 0 <= specific_index < len(all_ordered):
185
+ target_gid = all_ordered[specific_index]
186
+ acquire_lease(user_id, target_gid, mode)
187
  elif move_back and len(history) > 1:
188
  history.pop(); target_gid = history[-1]
189
+ acquire_lease(user_id, target_gid, mode)
190
  else:
191
  if mode == "fix":
192
+ candidates = [g for g in flagged_pool if g != current_gid and g not in leased_by_others]
193
+ if not candidates:
194
+ candidates = [g for g in flagged_pool if g not in leased_by_others]
195
+ if candidates:
196
+ target_gid = candidates[0]
197
+ acquire_lease(user_id, target_gid, mode)
198
  else:
199
  df_l, df_v = get_clean_df(LABEL_FILE), get_clean_df(VERIFY_FILE)
200
  l_done = set(df_l['group_id'].unique()) if not df_l.empty else set()
201
  v_done = set(df_v['group_id'].unique()) if not df_v.empty else set()
202
+ candidates = [g for g in all_ordered if g not in leased_by_others and (
203
+ (mode=="label" and g not in l_done) or (mode=="verify" and g in l_done and g not in v_done))]
204
+ if candidates:
205
+ target_gid = random.choice(candidates)
206
+ if not acquire_lease(user_id, target_gid, mode):
207
+ for c in candidates:
208
+ if c != target_gid and acquire_lease(user_id, c, mode):
209
+ target_gid = c
210
+ break
211
+ else:
212
+ target_gid = None
213
+ if not target_gid:
214
+ msg = "All available properties are currently being worked on. Try again shortly." if leased_by_others else "Done! All properties processed."
215
+ return {screen_menu: gr.update(visible=True), screen_work: gr.update(visible=False), log_box: msg}
216
  urls = [u for u in load_all_urls() if target_gid in u][:MAX_IMAGES]
217
  if not history or history[-1] != target_gid: history.append(target_gid)
218
  saved_vals = {}
 
224
  with ThreadPoolExecutor(max_workers=MAX_IMAGES) as ex:
225
  def fetch(u):
226
  try:
227
+ res = requests.get(thumb_url(u), timeout=5, headers={'User-Agent': 'Mozilla/5.0'})
228
  img = Image.open(BytesIO(res.content)); img.thumbnail(THUMB_SIZE); return img
229
  except: return None
230
  processed_images = list(ex.map(fetch, urls))
 
256
  for obj in [c_sld, c_drp, c_chk, c_lbl]: updates[obj] = gr.update(visible=False)
257
  return updates
258
 
259
+ def save_data(mode, history, urls, user_id, *args):
260
  if not history: return
261
  gid = history[-1]
262
  if mode == "fix": FIXED_IN_SESSION.add(gid)
 
264
  for i, u in enumerate(urls):
265
  sc, lbl, chk = args[i*4], args[i*4+1], args[i*4+2]
266
  clean_lbl = str(lbl).strip().lower()
267
+ if mode in ["label", "fix"]: rows.append([ts, user_id, gid, u, int(sc), clean_lbl])
268
+ else: rows.append([ts, user_id, gid, u, chk, clean_lbl, int(sc)])
269
  with FileLock(LOCK_FILE):
270
  with open(LABEL_FILE if mode in ["label", "fix"] else VERIFY_FILE, "a", newline="") as f: csv.writer(f).writerows(rows)
271
  sync_push_background(LABEL_FILE if mode in ["label", "fix"] else VERIFY_FILE, os.path.basename(LABEL_FILE if mode in ["label", "fix"] else VERIFY_FILE))
272
+ release_lease(user_id)
273
+ return render_workspace(mode, history, user_id)
274
 
275
  def refresh_cat():
276
  all_gids = get_ordered_groups()
 
278
  df_l, df_v = get_clean_df(LABEL_FILE), get_clean_df(VERIFY_FILE)
279
  l_set = set(df_l['group_id'].unique()) if not df_l.empty else set()
280
  v_set = set(df_v['group_id'].unique()) if not df_v.empty else set()
281
+ leased = get_leased_group_ids()
282
+ data = []
283
+ for i, gid in enumerate(all_gids):
284
+ status = "⚠️ Fix Needed" if gid in flagged else "✅ Verified" if gid in v_set else "🔵 Labeled" if gid in l_set else "⚪ Pending"
285
+ if gid in leased: status += " [IN USE]"
286
+ data.append([i+1, status, gid])
287
  return pd.DataFrame(data, columns=["#", "Status", "ID"])
288
 
289
+ def do_login(name):
290
+ name = str(name).strip().lower()
291
+ if not name:
292
+ return gr.update(), gr.update(), gr.update(visible=True, value="Please enter your name."), ""
293
+ return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), name
294
+
295
+ def renew_lease_handler(user_id):
296
+ renew_lease(user_id)
297
+
298
  with gr.Blocks(theme=gr.themes.Soft(), title="Labeler Pro") as demo:
299
  state_mode, state_hist, state_urls, state_idx = gr.State("label"), gr.State([]), gr.State([]), gr.State(0)
300
+ state_user = gr.State("")
301
  with gr.Row():
302
  top_stats = gr.Markdown("Loading...")
303
  btn_home = gr.Button("🏠 Home", size="sm", scale=0)
304
  with gr.Tabs():
305
  with gr.Tab("Workspace"):
306
+ with gr.Group() as screen_login:
307
+ gr.Markdown("# Property Labeler Pro\n### Enter your name to start")
308
+ user_input = gr.Textbox(label="Your Name / ID", placeholder="e.g., alice")
309
+ b_login = gr.Button("Start Labeling", variant="primary")
310
+ login_error = gr.Markdown("", visible=False)
311
+ with gr.Group(visible=False) as screen_menu:
312
  gr.Markdown("# Property Labeler Pro")
313
  with gr.Row():
314
  b_start_l, b_start_v, b_start_f = gr.Button("Label", variant="primary"), gr.Button("Verify"), gr.Button("🛠 Fix Errors", variant="secondary")
 
331
  df_cat = gr.Dataframe(interactive=False)
332
  b_ref_cat = gr.Button("Refresh Catalog")
333
  ALL_IO = [screen_menu, screen_work, header_md, state_urls, state_hist, state_idx, top_stats, log_box] + img_objs + input_objs
334
+ b_login.click(do_login, [user_input], [screen_login, screen_menu, login_error, state_user])
335
+ b_start_l.click(lambda: "label", None, state_mode).then(render_workspace, [state_mode, state_hist, state_user], ALL_IO)
336
+ b_start_v.click(lambda: "verify", None, state_mode).then(render_workspace, [state_mode, state_hist, state_user], ALL_IO)
337
+ b_start_f.click(lambda: "fix", None, state_mode).then(render_workspace, [state_mode, state_hist, state_user], ALL_IO)
338
+ b_save.click(save_data, [state_mode, state_hist, state_urls, state_user] + input_objs, ALL_IO)
339
+ b_back.click(lambda m, h, u: render_workspace(m, h, u, move_back=True), [state_mode, state_hist, state_user], ALL_IO)
340
+ def go_home(user_id):
341
+ release_lease(user_id)
342
+ return gr.update(visible=True), gr.update(visible=False), []
343
+ btn_home.click(go_home, [state_user], [screen_menu, screen_work, state_hist])
344
+ b_go_l.click(lambda: "label", None, state_mode).then(lambda n,m,h,u: render_workspace(m,h,u,int(n)-1), [num_in, state_mode, state_hist, state_user], ALL_IO)
345
+ b_go_v.click(lambda: "verify", None, state_mode).then(lambda n,m,h,u: render_workspace(m,h,u,int(n)-1), [num_in, state_mode, state_hist, state_user], ALL_IO)
346
+ b_go_f.click(lambda: "fix", None, state_mode).then(lambda n,m,h,u: render_workspace(m,h,u,int(n)-1), [num_in, state_mode, state_hist, state_user], ALL_IO)
347
+ for i in range(0, len(input_objs), 4):
348
+ input_objs[i].change(renew_lease_handler, [state_user], None)
349
  b_ref_cat.click(refresh_cat, None, df_cat).then(get_stats_text, None, top_stats)
350
  demo.load(refresh_cat, None, df_cat).then(get_stats_text, None, top_stats)
351
  demo.queue().launch(server_name="0.0.0.0", server_port=7860)