Nightfury16 commited on
Commit
05521a3
·
1 Parent(s): e172b1c

update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -69
app.py CHANGED
@@ -34,7 +34,9 @@ def sync_pull():
34
  for filename in ["annotations.csv", "verifications.csv", "skipped.csv"]:
35
  try:
36
  hf_hub_download(repo_id=DATASET_REPO_ID, filename=filename, repo_type="dataset", local_dir=CACHE_DIR, token=token)
37
- except: pass
 
 
38
 
39
  def sync_push_background(local_path, remote_filename):
40
  token = HF_TOKEN if HF_TOKEN and len(HF_TOKEN) > 5 else None
@@ -43,7 +45,9 @@ def sync_push_background(local_path, remote_filename):
43
  try:
44
  api = HfApi(token=token)
45
  api.upload_file(path_or_fileobj=local_path, path_in_repo=remote_filename, repo_id=DATASET_REPO_ID, repo_type="dataset")
46
- except: pass
 
 
47
  threading.Thread(target=_push).start()
48
 
49
  def init_files():
@@ -78,50 +82,40 @@ def get_ordered_groups():
78
  groups.append(gid); seen.add(gid)
79
  return groups
80
 
81
- def get_flagged_groups():
82
- if not os.path.exists(LABEL_FILE): return []
83
  try:
84
- df = pd.read_csv(LABEL_FILE)
85
- if df.empty: return []
86
- df['label'] = df['label'].astype(str).str.strip().str.lower()
87
- df['score'] = pd.to_numeric(df['score'], errors='coerce')
 
 
88
  df = df.drop_duplicates(subset=['url'], keep='last')
89
- # Logic: Flag if score is 10 AND it's NOT living_room
90
- errors = df[(df['score'] == 10) & (df['label'] != 'living_room')]
91
- return errors['group_id'].unique().tolist()
92
- except: return []
 
 
 
 
 
93
 
94
  def get_group_urls(target_gid):
95
  return [u for u in load_all_urls() if target_gid in u][:MAX_IMAGES]
96
 
97
- def get_saved_values(gid, mode):
98
- saved_data = {}
99
- try:
100
- fname = LABEL_FILE if mode in ["label", "fix"] else VERIFY_FILE
101
- df = pd.read_csv(fname)
102
- df = df.drop_duplicates(subset=['url'], keep='last')
103
- rows = df[df['group_id'] == gid]
104
- for _, row in rows.iterrows():
105
- lbl = str(row['label']).strip().lower() if mode in ["label", "fix"] else str(row['corrected_label']).strip().lower()
106
- if mode in ["label", "fix"]:
107
- saved_data[row['url']] = {"score": row['score'], "label": lbl}
108
- else:
109
- saved_data[row['url']] = {"is_correct": row['is_correct'], "label": lbl, "score": row['corrected_score']}
110
- except: pass
111
- return saved_data
112
-
113
  def get_stats_text():
114
  all_gids = get_ordered_groups()
115
  flagged = get_flagged_groups()
116
- try:
117
- df_l = pd.read_csv(LABEL_FILE).drop_duplicates(subset=['url'], keep='last')
118
- l = len(df_l['group_id'].unique())
119
- except: l = 0
120
- err_msg = f" | ⚠️ **To Fix:** {len(flagged)}" if flagged else ""
121
- return f"**Total Properties:** {len(all_gids)} | **Labeled:** {l}{err_msg}"
122
 
123
  def render_workspace(mode, history, specific_index=None, move_back=False):
124
  all_ordered = get_ordered_groups()
 
125
  current_gid = history[-1] if history else None
126
  target_gid = None
127
 
@@ -131,33 +125,37 @@ def render_workspace(mode, history, specific_index=None, move_back=False):
131
  history.pop()
132
  target_gid = history[-1]
133
  else:
134
- flagged_pool = get_flagged_groups()
135
- try:
136
- df_l = pd.read_csv(LABEL_FILE).drop_duplicates(subset=['url'], keep='last')
137
- l_done = set(df_l['group_id'].unique())
138
- df_v = pd.read_csv(VERIFY_FILE).drop_duplicates(subset=['url'], keep='last')
139
- v_done = set(df_v['group_id'].unique())
140
- except: l_done, v_done = set(), set()
141
 
142
  if mode == "fix":
143
- # Deterministic selection for Fix mode to avoid jumping
144
  candidates = [g for g in flagged_pool if g != current_gid]
145
  if not candidates and flagged_pool: candidates = flagged_pool
146
  if candidates: target_gid = candidates[0]
147
  else:
148
- # Random selection for Label/Verify mode
149
  candidates = [g for g in all_ordered if (mode=="label" and g not in l_done) or (mode=="verify" and g in l_done and g not in v_done)]
150
  if candidates: target_gid = random.choice(candidates)
151
 
152
  if not target_gid:
153
- return {screen_menu: gr.update(visible=True), screen_work: gr.update(visible=False), log_box: "Mode Complete."}
154
 
155
  urls = get_group_urls(target_gid)
156
  if not history or history[-1] != target_gid: history.append(target_gid)
157
- saved_vals = get_saved_values(target_gid, mode)
158
- r1_vals = get_saved_values(target_gid, "label") if mode == "verify" else {}
159
- target_idx = all_ordered.index(target_gid)
 
 
 
 
 
 
 
160
 
 
161
  with ThreadPoolExecutor(max_workers=MAX_IMAGES) as ex:
162
  def fetch(u):
163
  try:
@@ -169,9 +167,9 @@ def render_workspace(mode, history, specific_index=None, move_back=False):
169
 
170
  updates = {
171
  screen_menu: gr.update(visible=False), screen_work: gr.update(visible=True),
172
- header_md: f"# {mode.upper()} Property #{target_idx + 1} ({target_gid})",
173
  state_urls: urls, state_hist: history, state_idx: target_idx,
174
- top_stats: get_stats_text(), log_box: f"Loaded {target_gid}"
175
  }
176
 
177
  for i in range(MAX_IMAGES):
@@ -180,21 +178,21 @@ def render_workspace(mode, history, specific_index=None, move_back=False):
180
  if i < len(urls):
181
  u = urls[i]
182
  updates[img_objs[i]] = gr.update(value=processed_images[i], visible=True)
183
- v_sc = int(float(saved_vals.get(u, {}).get('score', 5)))
184
  v_lbl = str(saved_vals.get(u, {}).get('label', "living_room")).strip().lower()
 
185
  is_err = (v_sc == 10 and v_lbl != "living_room")
186
 
187
  if mode in ["label", "fix"]:
188
  updates[c_sld] = gr.update(visible=True, value=v_sc, interactive=True)
189
- updates[c_drp] = gr.update(visible=True, value=v_lbl, interactive=True)
190
  updates[c_chk] = gr.update(visible=False)
191
- updates[c_lbl] = gr.update(visible=True if is_err else False, value="<span style='color:red'>⚠️ ERROR: Score 10 must be Living Room</span>")
192
  else:
193
- p_lbl, p_sc = r1_vals.get(u, {}).get('label', "?"), r1_vals.get(u, {}).get('score', "?")
194
- updates[c_sld] = gr.update(visible=True, value=v_sc if u in saved_vals else p_sc)
195
  updates[c_drp] = gr.update(visible=True, value=v_lbl)
196
  updates[c_chk] = gr.update(visible=True, value=saved_vals.get(u, {}).get('is_correct', True))
197
- updates[c_lbl] = gr.update(visible=True, value=f"Prev: {p_lbl} ({p_sc})")
198
  else:
199
  updates[img_objs[i]] = gr.update(visible=False)
200
  for obj in [c_sld, c_drp, c_chk, c_lbl]: updates[obj] = gr.update(visible=False)
@@ -208,35 +206,42 @@ def save_data(mode, history, urls, *args):
208
  clean_lbl = str(lbl).strip().lower()
209
  if mode in ["label", "fix"]: rows.append([ts, "user", gid, u, int(sc), clean_lbl])
210
  else: rows.append([ts, "user", gid, u, chk, clean_lbl, int(sc)])
 
211
  fname = LABEL_FILE if mode in ["label", "fix"] else VERIFY_FILE
212
  with FileLock(LOCK_FILE):
213
  with open(fname, "a", newline="") as f: csv.writer(f).writerows(rows)
 
214
  sync_push_background(fname, os.path.basename(fname))
215
  return render_workspace(mode, history)
216
 
217
  def refresh_cat():
218
  all_gids = get_ordered_groups()
219
  flagged = set(get_flagged_groups())
220
- try:
221
- df_l = pd.read_csv(LABEL_FILE).drop_duplicates(subset=['url'], keep='last')
222
- l_set = set(df_l['group_id'].unique())
223
- df_v = pd.read_csv(VERIFY_FILE).drop_duplicates(subset=['url'], keep='last')
224
- v_set = set(df_v['group_id'].unique())
225
- except: l_set, v_set = set(), set()
226
- data = [[i+1, "⚠️ Fix Needed" if gid in flagged else "✅ Verified" if gid in v_set else "🔵 Labeled" if gid in l_set else "⚪ Pending", gid] for i, gid in enumerate(all_gids)]
 
 
227
  return pd.DataFrame(data, columns=["#", "Status", "ID"])
228
 
229
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
230
  state_mode, state_hist, state_urls, state_idx = gr.State("label"), gr.State([]), gr.State([]), gr.State(0)
231
  with gr.Row():
232
- top_stats = gr.Markdown("Loading...")
233
  btn_home = gr.Button("🏠 Home", size="sm", scale=0)
 
234
  with gr.Tabs():
235
  with gr.Tab("Workspace"):
236
  with gr.Group() as screen_menu:
237
- gr.Markdown("# Property Labeler Pro")
238
  with gr.Row():
239
- b_start_l, b_start_v, b_start_f = gr.Button("Start Labeling", variant="primary"), gr.Button("Start Verification"), gr.Button("🛠 Fix Errors", variant="secondary")
 
 
240
  with gr.Group(visible=False) as screen_work:
241
  header_md = gr.Markdown()
242
  img_objs, input_objs = [], []
@@ -244,11 +249,16 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
244
  for i in range(MAX_IMAGES):
245
  with gr.Column(min_width=200):
246
  img = gr.Image(interactive=False, height=240)
247
- sld, drp, chk, lbl = gr.Slider(1, 10, step=1, label="Score"), gr.Dropdown(ROOM_CLASSES, label="Class"), gr.Checkbox(label="Correct?"), gr.Markdown()
 
 
 
248
  img_objs.append(img); input_objs.extend([sld, drp, chk, lbl])
249
  with gr.Row():
250
- b_back, b_save = gr.Button("⬅ Back"), gr.Button("💾 Save & Next", variant="primary")
251
- log_box = gr.Textbox(label="Log", interactive=False)
 
 
252
  with gr.Tab("Catalog"):
253
  with gr.Row():
254
  num_in = gr.Number(value=1, label="Prop #", precision=0)
@@ -257,15 +267,19 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
257
  b_ref_cat = gr.Button("Refresh Catalog")
258
 
259
  ALL_IO = [screen_menu, screen_work, header_md, state_urls, state_hist, state_idx, top_stats, log_box] + img_objs + input_objs
 
260
  b_start_l.click(lambda: "label", None, state_mode).then(render_workspace, [state_mode, state_hist], ALL_IO)
261
  b_start_v.click(lambda: "verify", None, state_mode).then(render_workspace, [state_mode, state_hist], ALL_IO)
262
  b_start_f.click(lambda: "fix", None, state_mode).then(render_workspace, [state_mode, state_hist], ALL_IO)
 
263
  b_save.click(save_data, [state_mode, state_hist, state_urls] + input_objs, ALL_IO)
264
  b_back.click(lambda m, h: render_workspace(m, h, move_back=True), [state_mode, state_hist], ALL_IO)
265
  btn_home.click(lambda: {screen_menu: gr.update(visible=True), screen_work: gr.update(visible=False), state_hist: []}, None, [screen_menu, screen_work, state_hist])
 
266
  b_go_l.click(lambda: "label", None, state_mode).then(lambda n,m,h: render_workspace(m,h,int(n)-1), [num_in, state_mode, state_hist], ALL_IO)
267
  b_go_v.click(lambda: "verify", None, state_mode).then(lambda n,m,h: render_workspace(m,h,int(n)-1), [num_in, state_mode, state_hist], ALL_IO)
268
  b_go_f.click(lambda: "fix", None, state_mode).then(lambda n,m,h: render_workspace(m,h,int(n)-1), [num_in, state_mode, state_hist], ALL_IO)
 
269
  b_ref_cat.click(refresh_cat, None, df_cat)
270
  demo.load(refresh_cat, None, df_cat).then(get_stats_text, None, top_stats)
271
 
 
34
  for filename in ["annotations.csv", "verifications.csv", "skipped.csv"]:
35
  try:
36
  hf_hub_download(repo_id=DATASET_REPO_ID, filename=filename, repo_type="dataset", local_dir=CACHE_DIR, token=token)
37
+ print(f"Successfully pulled {filename}")
38
+ except Exception as e:
39
+ print(f"Pull error {filename}: {e}")
40
 
41
  def sync_push_background(local_path, remote_filename):
42
  token = HF_TOKEN if HF_TOKEN and len(HF_TOKEN) > 5 else None
 
45
  try:
46
  api = HfApi(token=token)
47
  api.upload_file(path_or_fileobj=local_path, path_in_repo=remote_filename, repo_id=DATASET_REPO_ID, repo_type="dataset")
48
+ print(f"Successfully pushed {remote_filename}")
49
+ except Exception as e:
50
+ print(f"Push error {remote_filename}: {e}")
51
  threading.Thread(target=_push).start()
52
 
53
  def init_files():
 
82
  groups.append(gid); seen.add(gid)
83
  return groups
84
 
85
+ def get_clean_df(filepath):
86
+ if not os.path.exists(filepath): return pd.DataFrame()
87
  try:
88
+ df = pd.read_csv(filepath)
89
+ if df.empty: return df
90
+ if 'label' in df.columns:
91
+ df['label'] = df['label'].astype(str).str.strip().str.lower()
92
+ if 'score' in df.columns:
93
+ df['score'] = pd.to_numeric(df['score'], errors='coerce').fillna(0).astype(int)
94
  df = df.drop_duplicates(subset=['url'], keep='last')
95
+ return df
96
+ except:
97
+ return pd.DataFrame()
98
+
99
+ def get_flagged_groups():
100
+ df = get_clean_df(LABEL_FILE)
101
+ if df.empty: return []
102
+ errors = df[(df['score'] == 10) & (df['label'] != 'living_room')]
103
+ return errors['group_id'].unique().tolist()
104
 
105
  def get_group_urls(target_gid):
106
  return [u for u in load_all_urls() if target_gid in u][:MAX_IMAGES]
107
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  def get_stats_text():
109
  all_gids = get_ordered_groups()
110
  flagged = get_flagged_groups()
111
+ df_l = get_clean_df(LABEL_FILE)
112
+ l_count = len(df_l['group_id'].unique()) if not df_l.empty else 0
113
+ err_msg = f" | ⚠️ **Need Fix:** {len(flagged)}" if flagged else " | ✅ No Errors"
114
+ return f"**Total Properties:** {len(all_gids)} | **Labeled:** {l_count}{err_msg}"
 
 
115
 
116
  def render_workspace(mode, history, specific_index=None, move_back=False):
117
  all_ordered = get_ordered_groups()
118
+ flagged_pool = get_flagged_groups()
119
  current_gid = history[-1] if history else None
120
  target_gid = None
121
 
 
125
  history.pop()
126
  target_gid = history[-1]
127
  else:
128
+ df_l = get_clean_df(LABEL_FILE)
129
+ df_v = get_clean_df(VERIFY_FILE)
130
+ l_done = set(df_l['group_id'].unique()) if not df_l.empty else set()
131
+ v_done = set(df_v['group_id'].unique()) if not df_v.empty else set()
 
 
 
132
 
133
  if mode == "fix":
134
+ # Don't show the one we just saved
135
  candidates = [g for g in flagged_pool if g != current_gid]
136
  if not candidates and flagged_pool: candidates = flagged_pool
137
  if candidates: target_gid = candidates[0]
138
  else:
 
139
  candidates = [g for g in all_ordered if (mode=="label" and g not in l_done) or (mode=="verify" and g in l_done and g not in v_done)]
140
  if candidates: target_gid = random.choice(candidates)
141
 
142
  if not target_gid:
143
+ return {screen_menu: gr.update(visible=True), screen_work: gr.update(visible=False), log_box: "🎉 Section Complete!"}
144
 
145
  urls = get_group_urls(target_gid)
146
  if not history or history[-1] != target_gid: history.append(target_gid)
147
+
148
+ saved_vals = {}
149
+ df_mode = get_clean_df(LABEL_FILE if mode in ["label", "fix"] else VERIFY_FILE)
150
+ if not df_mode.empty:
151
+ rows = df_mode[df_mode['group_id'] == target_gid]
152
+ for _, r in rows.iterrows():
153
+ if mode in ["label", "fix"]:
154
+ saved_vals[r['url']] = {"score": r['score'], "label": r['label']}
155
+ else:
156
+ saved_vals[r['url']] = {"is_correct": r['is_correct'], "label": r['corrected_label'], "score": r['corrected_score']}
157
 
158
+ target_idx = all_ordered.index(target_gid)
159
  with ThreadPoolExecutor(max_workers=MAX_IMAGES) as ex:
160
  def fetch(u):
161
  try:
 
167
 
168
  updates = {
169
  screen_menu: gr.update(visible=False), screen_work: gr.update(visible=True),
170
+ header_md: f"# {mode.upper()} - Property #{target_idx + 1} ({target_gid})",
171
  state_urls: urls, state_hist: history, state_idx: target_idx,
172
+ top_stats: get_stats_text(), log_box: f"Loaded group: {target_gid}"
173
  }
174
 
175
  for i in range(MAX_IMAGES):
 
178
  if i < len(urls):
179
  u = urls[i]
180
  updates[img_objs[i]] = gr.update(value=processed_images[i], visible=True)
181
+ v_sc = int(saved_vals.get(u, {}).get('score', 5))
182
  v_lbl = str(saved_vals.get(u, {}).get('label', "living_room")).strip().lower()
183
+
184
  is_err = (v_sc == 10 and v_lbl != "living_room")
185
 
186
  if mode in ["label", "fix"]:
187
  updates[c_sld] = gr.update(visible=True, value=v_sc, interactive=True)
188
+ updates[c_drp] = gr.update(visible=True, value=v_lbl if v_lbl in ROOM_CLASSES else "living_room", interactive=True)
189
  updates[c_chk] = gr.update(visible=False)
190
+ updates[c_lbl] = gr.update(visible=True if is_err else False, value="<span style='color:red; font-weight:bold;'>⚠️ ERROR: Score 10 is ONLY for Living Room</span>")
191
  else:
192
+ updates[c_sld] = gr.update(visible=True, value=v_sc)
 
193
  updates[c_drp] = gr.update(visible=True, value=v_lbl)
194
  updates[c_chk] = gr.update(visible=True, value=saved_vals.get(u, {}).get('is_correct', True))
195
+ updates[c_lbl] = gr.update(visible=True, value=f"Previous Label: {v_lbl}")
196
  else:
197
  updates[img_objs[i]] = gr.update(visible=False)
198
  for obj in [c_sld, c_drp, c_chk, c_lbl]: updates[obj] = gr.update(visible=False)
 
206
  clean_lbl = str(lbl).strip().lower()
207
  if mode in ["label", "fix"]: rows.append([ts, "user", gid, u, int(sc), clean_lbl])
208
  else: rows.append([ts, "user", gid, u, chk, clean_lbl, int(sc)])
209
+
210
  fname = LABEL_FILE if mode in ["label", "fix"] else VERIFY_FILE
211
  with FileLock(LOCK_FILE):
212
  with open(fname, "a", newline="") as f: csv.writer(f).writerows(rows)
213
+
214
  sync_push_background(fname, os.path.basename(fname))
215
  return render_workspace(mode, history)
216
 
217
  def refresh_cat():
218
  all_gids = get_ordered_groups()
219
  flagged = set(get_flagged_groups())
220
+ df_l = get_clean_df(LABEL_FILE)
221
+ df_v = get_clean_df(VERIFY_FILE)
222
+ l_set = set(df_l['group_id'].unique()) if not df_l.empty else set()
223
+ v_set = set(df_v['group_id'].unique()) if not df_v.empty else set()
224
+
225
+ data = []
226
+ for i, gid in enumerate(all_gids):
227
+ status = "⚠️ Fix Needed" if gid in flagged else "✅ Verified" if gid in v_set else "🔵 Labeled" if gid in l_set else "⚪ Pending"
228
+ data.append([i+1, status, gid])
229
  return pd.DataFrame(data, columns=["#", "Status", "ID"])
230
 
231
+ with gr.Blocks(theme=gr.themes.Soft(), title="Property Labeler Pro") as demo:
232
  state_mode, state_hist, state_urls, state_idx = gr.State("label"), gr.State([]), gr.State([]), gr.State(0)
233
  with gr.Row():
234
+ top_stats = gr.Markdown("Syncing...")
235
  btn_home = gr.Button("🏠 Home", size="sm", scale=0)
236
+
237
  with gr.Tabs():
238
  with gr.Tab("Workspace"):
239
  with gr.Group() as screen_menu:
240
+ gr.Markdown("# Welcome to Property Labeler")
241
  with gr.Row():
242
+ b_start_l = gr.Button("Label New", variant="primary")
243
+ b_start_v = gr.Button("Verify Labels")
244
+ b_start_f = gr.Button("🛠 Fix Errors", variant="secondary")
245
  with gr.Group(visible=False) as screen_work:
246
  header_md = gr.Markdown()
247
  img_objs, input_objs = [], []
 
249
  for i in range(MAX_IMAGES):
250
  with gr.Column(min_width=200):
251
  img = gr.Image(interactive=False, height=240)
252
+ sld = gr.Slider(1, 10, step=1, label="Score")
253
+ drp = gr.Dropdown(ROOM_CLASSES, label="Class")
254
+ chk = gr.Checkbox(label="Correct?")
255
+ lbl = gr.Markdown()
256
  img_objs.append(img); input_objs.extend([sld, drp, chk, lbl])
257
  with gr.Row():
258
+ b_back = gr.Button("⬅ Back")
259
+ b_save = gr.Button("💾 Save & Next", variant="primary")
260
+ log_box = gr.Textbox(label="Last Action", interactive=False)
261
+
262
  with gr.Tab("Catalog"):
263
  with gr.Row():
264
  num_in = gr.Number(value=1, label="Prop #", precision=0)
 
267
  b_ref_cat = gr.Button("Refresh Catalog")
268
 
269
  ALL_IO = [screen_menu, screen_work, header_md, state_urls, state_hist, state_idx, top_stats, log_box] + img_objs + input_objs
270
+
271
  b_start_l.click(lambda: "label", None, state_mode).then(render_workspace, [state_mode, state_hist], ALL_IO)
272
  b_start_v.click(lambda: "verify", None, state_mode).then(render_workspace, [state_mode, state_hist], ALL_IO)
273
  b_start_f.click(lambda: "fix", None, state_mode).then(render_workspace, [state_mode, state_hist], ALL_IO)
274
+
275
  b_save.click(save_data, [state_mode, state_hist, state_urls] + input_objs, ALL_IO)
276
  b_back.click(lambda m, h: render_workspace(m, h, move_back=True), [state_mode, state_hist], ALL_IO)
277
  btn_home.click(lambda: {screen_menu: gr.update(visible=True), screen_work: gr.update(visible=False), state_hist: []}, None, [screen_menu, screen_work, state_hist])
278
+
279
  b_go_l.click(lambda: "label", None, state_mode).then(lambda n,m,h: render_workspace(m,h,int(n)-1), [num_in, state_mode, state_hist], ALL_IO)
280
  b_go_v.click(lambda: "verify", None, state_mode).then(lambda n,m,h: render_workspace(m,h,int(n)-1), [num_in, state_mode, state_hist], ALL_IO)
281
  b_go_f.click(lambda: "fix", None, state_mode).then(lambda n,m,h: render_workspace(m,h,int(n)-1), [num_in, state_mode, state_hist], ALL_IO)
282
+
283
  b_ref_cat.click(refresh_cat, None, df_cat)
284
  demo.load(refresh_cat, None, df_cat).then(get_stats_text, None, top_stats)
285