Nightfury16 commited on
Commit
3c1b983
·
1 Parent(s): ace880e

update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -88
app.py CHANGED
@@ -33,15 +33,8 @@ def sync_pull():
33
  token = HF_TOKEN if HF_TOKEN and len(HF_TOKEN) > 5 else None
34
  for filename in ["annotations.csv", "verifications.csv", "skipped.csv"]:
35
  try:
36
- hf_hub_download(
37
- repo_id=DATASET_REPO_ID,
38
- filename=filename,
39
- repo_type="dataset",
40
- local_dir=CACHE_DIR,
41
- token=token
42
- )
43
- except:
44
- pass
45
 
46
  def sync_push_background(local_path, remote_filename):
47
  token = HF_TOKEN if HF_TOKEN and len(HF_TOKEN) > 5 else None
@@ -49,26 +42,17 @@ def sync_push_background(local_path, remote_filename):
49
  def _push():
50
  try:
51
  api = HfApi(token=token)
52
- api.upload_file(
53
- path_or_fileobj=local_path,
54
- path_in_repo=remote_filename,
55
- repo_id=DATASET_REPO_ID,
56
- repo_type="dataset"
57
- )
58
- except:
59
- pass
60
  threading.Thread(target=_push).start()
61
 
62
  def init_files():
63
  sync_pull()
64
  for f in [LABEL_FILE, VERIFY_FILE, SKIP_FILE]:
65
  if not os.path.exists(f):
66
- if f == LABEL_FILE:
67
- cols = ["timestamp", "user", "group_id", "url", "score", "label"]
68
- elif f == VERIFY_FILE:
69
- cols = ["timestamp", "user", "group_id", "url", "is_correct", "corrected_label", "corrected_score"]
70
- else:
71
- cols = ["timestamp", "user", "group_id"]
72
  pd.DataFrame(columns=cols).to_csv(f, index=False)
73
 
74
  init_files()
@@ -80,15 +64,8 @@ def load_all_urls():
80
  with open(URL_FILE, 'r') as f:
81
  data = json.load(f)
82
  if "groups" in data:
83
- for group in data["groups"]:
84
- urls.extend(group.get("images", []))
85
- elif isinstance(data, dict):
86
- for rows in data.values():
87
- if isinstance(rows, list):
88
- for row in rows:
89
- if "unstaged_images" in row: urls.append(row["unstaged_images"])
90
- except:
91
- pass
92
  return urls
93
 
94
  def get_ordered_groups():
@@ -106,6 +83,8 @@ def get_flagged_groups():
106
  if not os.path.exists(LABEL_FILE): return []
107
  try:
108
  df = pd.read_csv(LABEL_FILE)
 
 
109
  errors = df[(df['score'] == 10) & (df['label'] != 'living_room')]
110
  return errors['group_id'].unique().tolist()
111
  except: return []
@@ -118,61 +97,51 @@ def get_saved_values(gid, mode):
118
  try:
119
  fname = LABEL_FILE if mode in ["label", "fix"] else VERIFY_FILE
120
  df = pd.read_csv(fname)
 
121
  rows = df[df['group_id'] == gid]
122
  for _, row in rows.iterrows():
123
  if mode in ["label", "fix"]:
124
  saved_data[row['url']] = {"score": row['score'], "label": row['label']}
125
  else:
126
- saved_data[row['url']] = {
127
- "is_correct": row['is_correct'],
128
- "label": row['corrected_label'],
129
- "score": row['corrected_score']
130
- }
131
  except: pass
132
  return saved_data
133
 
134
  def get_stats_text():
135
  all_gids = get_ordered_groups()
136
  flagged = get_flagged_groups()
137
- try: l = len(pd.read_csv(LABEL_FILE)['group_id'].unique())
 
 
138
  except: l = 0
139
- try: v = len(pd.read_csv(VERIFY_FILE)['group_id'].unique())
140
- except: v = 0
141
- err_msg = f" | ⚠️ **Fix:** {len(flagged)}" if flagged else ""
142
- return f"**Total:** {len(all_gids)} | **Labeled:** {l} | **Verified:** {v}{err_msg}"
143
 
144
  def render_workspace(mode, history, specific_index=None, move_back=False):
145
  all_ordered = get_ordered_groups()
146
  flagged_pool = get_flagged_groups()
147
 
148
- if mode == "fix" and specific_index is None:
149
- target_pool = flagged_pool
150
- if not target_pool:
151
- return {screen_menu: gr.update(visible=True), screen_work: gr.update(visible=False), log_box: "No errors left."}
152
- else:
153
- target_pool = all_ordered
154
-
155
  target_gid = None
156
  if specific_index is not None:
157
  if 0 <= specific_index < len(all_ordered): target_gid = all_ordered[specific_index]
158
- else: return {log_box: "Out of range"}
159
  elif move_back and len(history) > 1:
160
  history.pop()
161
  target_gid = history[-1]
162
  else:
163
- try:
164
- l_done = set(pd.read_csv(LABEL_FILE)['group_id'].unique())
165
- v_done = set(pd.read_csv(VERIFY_FILE)['group_id'].unique())
 
 
166
  except: l_done, v_done = set(), set()
167
 
168
- candidates = []
169
- for g in target_pool:
170
- if mode == "label" and g not in l_done: candidates.append(g)
171
- elif mode == "verify" and g in l_done and g not in v_done: candidates.append(g)
172
- elif mode == "fix": candidates.append(g)
173
 
174
  if not candidates:
175
- return {screen_menu: gr.update(visible=True), screen_work: gr.update(visible=False), log_box: "Finished."}
176
  target_gid = candidates[0]
177
 
178
  urls = get_group_urls(target_gid)
@@ -191,8 +160,7 @@ def render_workspace(mode, history, specific_index=None, move_back=False):
191
  processed_images = list(executor.map(fetch, urls))
192
 
193
  updates = {
194
- screen_menu: gr.update(visible=False),
195
- screen_work: gr.update(visible=True),
196
  header_md: f"# {mode.upper()} Property #{target_idx + 1} ({target_gid})",
197
  state_urls: urls, state_hist: history, state_idx: target_idx,
198
  top_stats: get_stats_text(), log_box: f"Loaded {target_gid}"
@@ -206,22 +174,19 @@ def render_workspace(mode, history, specific_index=None, move_back=False):
206
  updates[img_objs[i]] = gr.update(value=processed_images[i], visible=True)
207
  v_sc = saved_vals.get(u, {}).get('score', 5)
208
  v_lbl = saved_vals.get(u, {}).get('label', "living_room")
209
- v_chk = saved_vals.get(u, {}).get('is_correct', True)
210
-
211
  is_err = (v_sc == 10 and v_lbl != "living_room")
212
- err_txt = "<span style='color:red'>⚠️ Score 10 is Living Room only</span>" if is_err else ""
213
-
214
  if mode in ["label", "fix"]:
215
  updates[c_sld] = gr.update(visible=True, value=v_sc, interactive=True)
216
  updates[c_drp] = gr.update(visible=True, value=v_lbl, interactive=True)
217
  updates[c_chk] = gr.update(visible=False)
218
- updates[c_lbl] = gr.update(visible=True if is_err else False, value=err_txt)
219
  else:
220
  p_lbl, p_sc = r1_vals.get(u, {}).get('label', "?"), r1_vals.get(u, {}).get('score', "?")
221
  updates[c_sld] = gr.update(visible=True, value=v_sc if u in saved_vals else p_sc)
222
  updates[c_drp] = gr.update(visible=True, value=v_lbl)
223
- updates[c_chk] = gr.update(visible=True, value=v_chk)
224
- updates[c_lbl] = gr.update(visible=True, value=f"Label: {p_lbl} ({p_sc})")
225
  else:
226
  updates[img_objs[i]] = gr.update(visible=False)
227
  for obj in [c_sld, c_drp, c_chk, c_lbl]: updates[obj] = gr.update(visible=False)
@@ -236,6 +201,7 @@ def save_data(mode, history, urls, *args):
236
  sc, lbl, chk = args[i*4], args[i*4+1], args[i*4+2]
237
  if mode in ["label", "fix"]: rows.append([ts, "user", gid, u, sc, lbl])
238
  else: rows.append([ts, "user", gid, u, chk, lbl, sc])
 
239
  fname = LABEL_FILE if mode in ["label", "fix"] else VERIFY_FILE
240
  with FileLock(LOCK_FILE):
241
  with open(fname, "a", newline="") as f: csv.writer(f).writerows(rows)
@@ -246,8 +212,10 @@ def refresh_cat():
246
  all_gids = get_ordered_groups()
247
  flagged = set(get_flagged_groups())
248
  try:
249
- l_set = set(pd.read_csv(LABEL_FILE)['group_id'].unique())
250
- v_set = set(pd.read_csv(VERIFY_FILE)['group_id'].unique())
 
 
251
  except: l_set, v_set = set(), set()
252
  data = []
253
  for i, gid in enumerate(all_gids):
@@ -258,20 +226,17 @@ def refresh_cat():
258
  data.append([i+1, s, gid])
259
  return pd.DataFrame(data, columns=["#", "Status", "ID"])
260
 
261
- with gr.Blocks(theme=gr.themes.Soft(), title="Labeler") as demo:
262
  state_mode, state_hist, state_urls, state_idx = gr.State("label"), gr.State([]), gr.State([]), gr.State(0)
263
  with gr.Row():
264
  top_stats = gr.Markdown("Loading...")
265
  btn_home = gr.Button("🏠 Home", size="sm", scale=0)
266
-
267
  with gr.Tabs():
268
  with gr.Tab("Workspace"):
269
  with gr.Group() as screen_menu:
270
  gr.Markdown("# Welcome")
271
  with gr.Row():
272
- b_start_l = gr.Button("Start Labeling", variant="primary")
273
- b_start_v = gr.Button("Start Verification")
274
- b_start_f = gr.Button("🛠 Fix Errors", variant="secondary")
275
  with gr.Group(visible=False) as screen_work:
276
  header_md = gr.Markdown()
277
  img_objs, input_objs = [], []
@@ -279,22 +244,15 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Labeler") as demo:
279
  for i in range(MAX_IMAGES):
280
  with gr.Column(min_width=200):
281
  img = gr.Image(interactive=False, height=240)
282
- sld = gr.Slider(1, 10, step=1, label="Score")
283
- drp = gr.Dropdown(ROOM_CLASSES, label="Class")
284
- chk = gr.Checkbox(label="Correct?", value=True)
285
- lbl = gr.Markdown()
286
  img_objs.append(img); input_objs.extend([sld, drp, chk, lbl])
287
  with gr.Row():
288
- b_back = gr.Button("⬅ Back")
289
- b_save = gr.Button("💾 Save & Next", variant="primary")
290
  log_box = gr.Textbox(label="Log", interactive=False)
291
-
292
  with gr.Tab("Catalog"):
293
  with gr.Row():
294
  num_in = gr.Number(value=1, label="Prop #", precision=0)
295
- b_go_l = gr.Button("Go (Label)")
296
- b_go_v = gr.Button("Go (Verify)")
297
- b_go_f = gr.Button("Go (Fix)")
298
  df_cat = gr.Dataframe(interactive=False)
299
  b_ref_cat = gr.Button("Refresh")
300
 
@@ -305,9 +263,9 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Labeler") as demo:
305
  b_save.click(save_data, [state_mode, state_hist, state_urls] + input_objs, ALL_IO)
306
  b_back.click(lambda m, h: render_workspace(m, h, move_back=True), [state_mode, state_hist], ALL_IO)
307
  btn_home.click(lambda: {screen_menu: gr.update(visible=True), screen_work: gr.update(visible=False), state_hist: []}, None, [screen_menu, screen_work, state_hist])
308
- b_go_l.click(lambda: "label", None, state_mode).then(lambda n, m, h: render_workspace(m, h, specific_index=int(n)-1), [num_in, state_mode, state_hist], ALL_IO)
309
- b_go_v.click(lambda: "verify", None, state_mode).then(lambda n, m, h: render_workspace(m, h, specific_index=int(n)-1), [num_in, state_mode, state_hist], ALL_IO)
310
- b_go_f.click(lambda: "fix", None, state_mode).then(lambda n, m, h: render_workspace(m, h, specific_index=int(n)-1), [num_in, state_mode, state_hist], ALL_IO)
311
  b_ref_cat.click(refresh_cat, None, df_cat)
312
  demo.load(refresh_cat, None, df_cat).then(get_stats_text, None, top_stats)
313
 
 
33
  token = HF_TOKEN if HF_TOKEN and len(HF_TOKEN) > 5 else None
34
  for filename in ["annotations.csv", "verifications.csv", "skipped.csv"]:
35
  try:
36
+ hf_hub_download(repo_id=DATASET_REPO_ID, filename=filename, repo_type="dataset", local_dir=CACHE_DIR, token=token)
37
+ except: pass
 
 
 
 
 
 
 
38
 
39
  def sync_push_background(local_path, remote_filename):
40
  token = HF_TOKEN if HF_TOKEN and len(HF_TOKEN) > 5 else None
 
42
  def _push():
43
  try:
44
  api = HfApi(token=token)
45
+ api.upload_file(path_or_fileobj=local_path, path_in_repo=remote_filename, repo_id=DATASET_REPO_ID, repo_type="dataset")
46
+ except: pass
 
 
 
 
 
 
47
  threading.Thread(target=_push).start()
48
 
49
  def init_files():
50
  sync_pull()
51
  for f in [LABEL_FILE, VERIFY_FILE, SKIP_FILE]:
52
  if not os.path.exists(f):
53
+ cols = ["timestamp", "user", "group_id", "url", "score", "label"] if f == LABEL_FILE else \
54
+ ["timestamp", "user", "group_id", "url", "is_correct", "corrected_label", "corrected_score"] if f == VERIFY_FILE else \
55
+ ["timestamp", "user", "group_id"]
 
 
 
56
  pd.DataFrame(columns=cols).to_csv(f, index=False)
57
 
58
  init_files()
 
64
  with open(URL_FILE, 'r') as f:
65
  data = json.load(f)
66
  if "groups" in data:
67
+ for group in data["groups"]: urls.extend(group.get("images", []))
68
+ except: pass
 
 
 
 
 
 
 
69
  return urls
70
 
71
  def get_ordered_groups():
 
83
  if not os.path.exists(LABEL_FILE): return []
84
  try:
85
  df = pd.read_csv(LABEL_FILE)
86
+ if df.empty: return []
87
+ df = df.drop_duplicates(subset=['url'], keep='last')
88
  errors = df[(df['score'] == 10) & (df['label'] != 'living_room')]
89
  return errors['group_id'].unique().tolist()
90
  except: return []
 
97
  try:
98
  fname = LABEL_FILE if mode in ["label", "fix"] else VERIFY_FILE
99
  df = pd.read_csv(fname)
100
+ df = df.drop_duplicates(subset=['url'], keep='last')
101
  rows = df[df['group_id'] == gid]
102
  for _, row in rows.iterrows():
103
  if mode in ["label", "fix"]:
104
  saved_data[row['url']] = {"score": row['score'], "label": row['label']}
105
  else:
106
+ saved_data[row['url']] = {"is_correct": row['is_correct'], "label": row['corrected_label'], "score": row['corrected_score']}
 
 
 
 
107
  except: pass
108
  return saved_data
109
 
110
  def get_stats_text():
111
  all_gids = get_ordered_groups()
112
  flagged = get_flagged_groups()
113
+ try:
114
+ df_l = pd.read_csv(LABEL_FILE).drop_duplicates(subset=['url'], keep='last')
115
+ l = len(df_l['group_id'].unique())
116
  except: l = 0
117
+ err_msg = f" | ⚠️ **Fix Needed:** {len(flagged)}" if flagged else ""
118
+ return f"**Properties:** {len(all_gids)} | **Labeled:** {l}{err_msg}"
 
 
119
 
120
  def render_workspace(mode, history, specific_index=None, move_back=False):
121
  all_ordered = get_ordered_groups()
122
  flagged_pool = get_flagged_groups()
123
 
 
 
 
 
 
 
 
124
  target_gid = None
125
  if specific_index is not None:
126
  if 0 <= specific_index < len(all_ordered): target_gid = all_ordered[specific_index]
 
127
  elif move_back and len(history) > 1:
128
  history.pop()
129
  target_gid = history[-1]
130
  else:
131
+ try:
132
+ df_l = pd.read_csv(LABEL_FILE).drop_duplicates(subset=['url'], keep='last')
133
+ l_done = set(df_l['group_id'].unique())
134
+ df_v = pd.read_csv(VERIFY_FILE).drop_duplicates(subset=['url'], keep='last')
135
+ v_done = set(df_v['group_id'].unique())
136
  except: l_done, v_done = set(), set()
137
 
138
+ if mode == "fix":
139
+ candidates = flagged_pool
140
+ else:
141
+ candidates = [g for g in all_ordered if (mode=="label" and g not in l_done) or (mode=="verify" and g in l_done and g not in v_done)]
 
142
 
143
  if not candidates:
144
+ return {screen_menu: gr.update(visible=True), screen_work: gr.update(visible=False), log_box: "Done!"}
145
  target_gid = candidates[0]
146
 
147
  urls = get_group_urls(target_gid)
 
160
  processed_images = list(executor.map(fetch, urls))
161
 
162
  updates = {
163
+ screen_menu: gr.update(visible=False), screen_work: gr.update(visible=True),
 
164
  header_md: f"# {mode.upper()} Property #{target_idx + 1} ({target_gid})",
165
  state_urls: urls, state_hist: history, state_idx: target_idx,
166
  top_stats: get_stats_text(), log_box: f"Loaded {target_gid}"
 
174
  updates[img_objs[i]] = gr.update(value=processed_images[i], visible=True)
175
  v_sc = saved_vals.get(u, {}).get('score', 5)
176
  v_lbl = saved_vals.get(u, {}).get('label', "living_room")
 
 
177
  is_err = (v_sc == 10 and v_lbl != "living_room")
178
+
 
179
  if mode in ["label", "fix"]:
180
  updates[c_sld] = gr.update(visible=True, value=v_sc, interactive=True)
181
  updates[c_drp] = gr.update(visible=True, value=v_lbl, interactive=True)
182
  updates[c_chk] = gr.update(visible=False)
183
+ updates[c_lbl] = gr.update(visible=True if is_err else False, value="<span style='color:red'>⚠️ Score 10=Living Room only</span>")
184
  else:
185
  p_lbl, p_sc = r1_vals.get(u, {}).get('label', "?"), r1_vals.get(u, {}).get('score', "?")
186
  updates[c_sld] = gr.update(visible=True, value=v_sc if u in saved_vals else p_sc)
187
  updates[c_drp] = gr.update(visible=True, value=v_lbl)
188
+ updates[c_chk] = gr.update(visible=True, value=saved_vals.get(u, {}).get('is_correct', True))
189
+ updates[c_lbl] = gr.update(visible=True, value=f"Prev: {p_lbl} ({p_sc})")
190
  else:
191
  updates[img_objs[i]] = gr.update(visible=False)
192
  for obj in [c_sld, c_drp, c_chk, c_lbl]: updates[obj] = gr.update(visible=False)
 
201
  sc, lbl, chk = args[i*4], args[i*4+1], args[i*4+2]
202
  if mode in ["label", "fix"]: rows.append([ts, "user", gid, u, sc, lbl])
203
  else: rows.append([ts, "user", gid, u, chk, lbl, sc])
204
+
205
  fname = LABEL_FILE if mode in ["label", "fix"] else VERIFY_FILE
206
  with FileLock(LOCK_FILE):
207
  with open(fname, "a", newline="") as f: csv.writer(f).writerows(rows)
 
212
  all_gids = get_ordered_groups()
213
  flagged = set(get_flagged_groups())
214
  try:
215
+ df_l = pd.read_csv(LABEL_FILE).drop_duplicates(subset=['url'], keep='last')
216
+ l_set = set(df_l['group_id'].unique())
217
+ df_v = pd.read_csv(VERIFY_FILE).drop_duplicates(subset=['url'], keep='last')
218
+ v_set = set(df_v['group_id'].unique())
219
  except: l_set, v_set = set(), set()
220
  data = []
221
  for i, gid in enumerate(all_gids):
 
226
  data.append([i+1, s, gid])
227
  return pd.DataFrame(data, columns=["#", "Status", "ID"])
228
 
229
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
230
  state_mode, state_hist, state_urls, state_idx = gr.State("label"), gr.State([]), gr.State([]), gr.State(0)
231
  with gr.Row():
232
  top_stats = gr.Markdown("Loading...")
233
  btn_home = gr.Button("🏠 Home", size="sm", scale=0)
 
234
  with gr.Tabs():
235
  with gr.Tab("Workspace"):
236
  with gr.Group() as screen_menu:
237
  gr.Markdown("# Welcome")
238
  with gr.Row():
239
+ b_start_l, b_start_v, b_start_f = gr.Button("Label", variant="primary"), gr.Button("Verify"), gr.Button("🛠 Fix", variant="secondary")
 
 
240
  with gr.Group(visible=False) as screen_work:
241
  header_md = gr.Markdown()
242
  img_objs, input_objs = [], []
 
244
  for i in range(MAX_IMAGES):
245
  with gr.Column(min_width=200):
246
  img = gr.Image(interactive=False, height=240)
247
+ sld, drp, chk, lbl = gr.Slider(1, 10, step=1, label="Score"), gr.Dropdown(ROOM_CLASSES, label="Class"), gr.Checkbox(label="Correct?"), gr.Markdown()
 
 
 
248
  img_objs.append(img); input_objs.extend([sld, drp, chk, lbl])
249
  with gr.Row():
250
+ b_back, b_save = gr.Button("⬅ Back"), gr.Button("💾 Save & Next", variant="primary")
 
251
  log_box = gr.Textbox(label="Log", interactive=False)
 
252
  with gr.Tab("Catalog"):
253
  with gr.Row():
254
  num_in = gr.Number(value=1, label="Prop #", precision=0)
255
+ b_go_l, b_go_v, b_go_f = gr.Button("Go Label"), gr.Button("Go Verify"), gr.Button("Go Fix")
 
 
256
  df_cat = gr.Dataframe(interactive=False)
257
  b_ref_cat = gr.Button("Refresh")
258
 
 
263
  b_save.click(save_data, [state_mode, state_hist, state_urls] + input_objs, ALL_IO)
264
  b_back.click(lambda m, h: render_workspace(m, h, move_back=True), [state_mode, state_hist], ALL_IO)
265
  btn_home.click(lambda: {screen_menu: gr.update(visible=True), screen_work: gr.update(visible=False), state_hist: []}, None, [screen_menu, screen_work, state_hist])
266
+ b_go_l.click(lambda: "label", None, state_mode).then(lambda n,m,h: render_workspace(m,h,int(n)-1), [num_in, state_mode, state_hist], ALL_IO)
267
+ b_go_v.click(lambda: "verify", None, state_mode).then(lambda n,m,h: render_workspace(m,h,int(n)-1), [num_in, state_mode, state_hist], ALL_IO)
268
+ b_go_f.click(lambda: "fix", None, state_mode).then(lambda n,m,h: render_workspace(m,h,int(n)-1), [num_in, state_mode, state_hist], ALL_IO)
269
  b_ref_cat.click(refresh_cat, None, df_cat)
270
  demo.load(refresh_cat, None, df_cat).then(get_stats_text, None, top_stats)
271