Spaces:
Sleeping
Sleeping
Commit
·
6b53e51
1
Parent(s):
eacf6ad
update app.py
Browse files
app.py
CHANGED
|
@@ -86,7 +86,9 @@ def get_clean_df(filepath):
|
|
| 86 |
df = pd.read_csv(filepath)
|
| 87 |
if df.empty: return df
|
| 88 |
if 'label' in df.columns: df['label'] = df['label'].astype(str).str.strip().str.lower()
|
|
|
|
| 89 |
if 'score' in df.columns: df['score'] = pd.to_numeric(df['score'], errors='coerce').fillna(0).astype(int)
|
|
|
|
| 90 |
return df.drop_duplicates(subset=['url'], keep='last')
|
| 91 |
except: return pd.DataFrame()
|
| 92 |
|
|
@@ -101,15 +103,17 @@ def get_stats_text():
|
|
| 101 |
all_gids = get_ordered_groups()
|
| 102 |
flagged = get_flagged_groups()
|
| 103 |
df_l = get_clean_df(LABEL_FILE)
|
|
|
|
| 104 |
l_count = len(df_l['group_id'].unique()) if not df_l.empty else 0
|
| 105 |
-
|
|
|
|
|
|
|
| 106 |
|
| 107 |
def render_workspace(mode, history, specific_index=None, move_back=False):
|
| 108 |
all_ordered = get_ordered_groups()
|
| 109 |
flagged_pool = get_flagged_groups()
|
| 110 |
current_gid = history[-1] if history else None
|
| 111 |
target_gid = None
|
| 112 |
-
|
| 113 |
if specific_index is not None:
|
| 114 |
if 0 <= specific_index < len(all_ordered): target_gid = all_ordered[specific_index]
|
| 115 |
elif move_back and len(history) > 1:
|
|
@@ -125,19 +129,15 @@ def render_workspace(mode, history, specific_index=None, move_back=False):
|
|
| 125 |
v_done = set(df_v['group_id'].unique()) if not df_v.empty else set()
|
| 126 |
candidates = [g for g in all_ordered if (mode=="label" and g not in l_done) or (mode=="verify" and g in l_done and g not in v_done)]
|
| 127 |
if candidates: target_gid = random.choice(candidates)
|
| 128 |
-
|
| 129 |
-
if not target_gid: return {screen_menu: gr.update(visible=True), screen_work: gr.update(visible=False), log_box: "🎉 Finished!"}
|
| 130 |
-
|
| 131 |
urls = [u for u in load_all_urls() if target_gid in u][:MAX_IMAGES]
|
| 132 |
if not history or history[-1] != target_gid: history.append(target_gid)
|
| 133 |
-
|
| 134 |
saved_vals = {}
|
| 135 |
df_mode = get_clean_df(LABEL_FILE if mode in ["label", "fix"] else VERIFY_FILE)
|
| 136 |
if not df_mode.empty:
|
| 137 |
for _, r in df_mode[df_mode['group_id'] == target_gid].iterrows():
|
| 138 |
if mode in ["label", "fix"]: saved_vals[r['url']] = {"score": r['score'], "label": r['label']}
|
| 139 |
else: saved_vals[r['url']] = {"is_correct": r['is_correct'], "label": r['corrected_label'], "score": r['corrected_score']}
|
| 140 |
-
|
| 141 |
with ThreadPoolExecutor(max_workers=MAX_IMAGES) as ex:
|
| 142 |
def fetch(u):
|
| 143 |
try:
|
|
@@ -145,15 +145,13 @@ def render_workspace(mode, history, specific_index=None, move_back=False):
|
|
| 145 |
img = Image.open(BytesIO(res.content)); img.thumbnail(THUMB_SIZE); return img
|
| 146 |
except: return None
|
| 147 |
processed_images = list(ex.map(fetch, urls))
|
| 148 |
-
|
| 149 |
target_idx = all_ordered.index(target_gid)
|
| 150 |
updates = {
|
| 151 |
screen_menu: gr.update(visible=False), screen_work: gr.update(visible=True),
|
| 152 |
-
header_md: f"# {mode.upper()} -
|
| 153 |
state_urls: urls, state_hist: history, state_idx: target_idx,
|
| 154 |
-
top_stats: get_stats_text(), log_box: f"
|
| 155 |
}
|
| 156 |
-
|
| 157 |
for i in range(MAX_IMAGES):
|
| 158 |
base = i * 4
|
| 159 |
c_sld, c_drp, c_chk, c_lbl = input_objs[base:base+4]
|
|
@@ -166,8 +164,7 @@ def render_workspace(mode, history, specific_index=None, move_back=False):
|
|
| 166 |
if mode in ["label", "fix"]:
|
| 167 |
updates[c_sld] = gr.update(visible=True, value=v_sc, interactive=True)
|
| 168 |
updates[c_drp] = gr.update(visible=True, value=v_lbl if v_lbl in ROOM_CLASSES else "living_room", interactive=True)
|
| 169 |
-
updates[c_chk] = gr.update(visible=False)
|
| 170 |
-
updates[c_lbl] = gr.update(visible=True if is_err else False, value="<span style='color:red'>⚠️ Score 10 ONLY for Living Room</span>")
|
| 171 |
else:
|
| 172 |
updates[c_sld], updates[c_drp] = gr.update(visible=True, value=v_sc), gr.update(visible=True, value=v_lbl)
|
| 173 |
updates[c_chk], updates[c_lbl] = gr.update(visible=True, value=True), gr.update(visible=True, value=f"Prev: {v_lbl}")
|
|
@@ -179,17 +176,15 @@ def render_workspace(mode, history, specific_index=None, move_back=False):
|
|
| 179 |
def save_data(mode, history, urls, *args):
|
| 180 |
if not history: return
|
| 181 |
gid = history[-1]
|
| 182 |
-
FIXED_IN_SESSION.add(gid)
|
| 183 |
ts = datetime.now().isoformat(); rows = []
|
| 184 |
for i, u in enumerate(urls):
|
| 185 |
sc, lbl, chk = args[i*4], args[i*4+1], args[i*4+2]
|
| 186 |
clean_lbl = str(lbl).strip().lower()
|
| 187 |
if mode in ["label", "fix"]: rows.append([ts, "user", gid, u, int(sc), clean_lbl])
|
| 188 |
else: rows.append([ts, "user", gid, u, chk, clean_lbl, int(sc)])
|
| 189 |
-
|
| 190 |
with FileLock(LOCK_FILE):
|
| 191 |
with open(LABEL_FILE if mode in ["label", "fix"] else VERIFY_FILE, "a", newline="") as f: csv.writer(f).writerows(rows)
|
| 192 |
-
|
| 193 |
sync_push_background(LABEL_FILE if mode in ["label", "fix"] else VERIFY_FILE, os.path.basename(LABEL_FILE if mode in ["label", "fix"] else VERIFY_FILE))
|
| 194 |
return render_workspace(mode, history)
|
| 195 |
|
|
@@ -202,10 +197,10 @@ def refresh_cat():
|
|
| 202 |
data = [[i+1, "⚠️ Fix Needed" if gid in flagged else "✅ Verified" if gid in v_set else "🔵 Labeled" if gid in l_set else "⚪ Pending", gid] for i, gid in enumerate(all_gids)]
|
| 203 |
return pd.DataFrame(data, columns=["#", "Status", "ID"])
|
| 204 |
|
| 205 |
-
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 206 |
state_mode, state_hist, state_urls, state_idx = gr.State("label"), gr.State([]), gr.State([]), gr.State(0)
|
| 207 |
with gr.Row():
|
| 208 |
-
top_stats = gr.Markdown("
|
| 209 |
btn_home = gr.Button("🏠 Home", size="sm", scale=0)
|
| 210 |
with gr.Tabs():
|
| 211 |
with gr.Tab("Workspace"):
|
|
@@ -224,14 +219,13 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 224 |
img_objs.append(img); input_objs.extend([sld, drp, chk, lbl])
|
| 225 |
with gr.Row():
|
| 226 |
b_back, b_save = gr.Button("⬅ Back"), gr.Button("💾 Save & Next", variant="primary")
|
| 227 |
-
log_box = gr.Textbox(label="
|
| 228 |
with gr.Tab("Catalog"):
|
| 229 |
with gr.Row():
|
| 230 |
num_in = gr.Number(value=1, label="Prop #", precision=0)
|
| 231 |
b_go_l, b_go_v, b_go_f = gr.Button("Go Label"), gr.Button("Go Verify"), gr.Button("Go Fix")
|
| 232 |
df_cat = gr.Dataframe(interactive=False)
|
| 233 |
b_ref_cat = gr.Button("Refresh Catalog")
|
| 234 |
-
|
| 235 |
ALL_IO = [screen_menu, screen_work, header_md, state_urls, state_hist, state_idx, top_stats, log_box] + img_objs + input_objs
|
| 236 |
b_start_l.click(lambda: "label", None, state_mode).then(render_workspace, [state_mode, state_hist], ALL_IO)
|
| 237 |
b_start_v.click(lambda: "verify", None, state_mode).then(render_workspace, [state_mode, state_hist], ALL_IO)
|
|
@@ -244,5 +238,4 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 244 |
b_go_f.click(lambda: "fix", None, state_mode).then(lambda n,m,h: render_workspace(m,h,int(n)-1), [num_in, state_mode, state_hist], ALL_IO)
|
| 245 |
b_ref_cat.click(refresh_cat, None, df_cat).then(get_stats_text, None, top_stats)
|
| 246 |
demo.load(refresh_cat, None, df_cat).then(get_stats_text, None, top_stats)
|
| 247 |
-
|
| 248 |
demo.queue().launch(server_name="0.0.0.0", server_port=7860)
|
|
|
|
| 86 |
df = pd.read_csv(filepath)
|
| 87 |
if df.empty: return df
|
| 88 |
if 'label' in df.columns: df['label'] = df['label'].astype(str).str.strip().str.lower()
|
| 89 |
+
if 'corrected_label' in df.columns: df['corrected_label'] = df['corrected_label'].astype(str).str.strip().str.lower()
|
| 90 |
if 'score' in df.columns: df['score'] = pd.to_numeric(df['score'], errors='coerce').fillna(0).astype(int)
|
| 91 |
+
if 'corrected_score' in df.columns: df['corrected_score'] = pd.to_numeric(df['corrected_score'], errors='coerce').fillna(0).astype(int)
|
| 92 |
return df.drop_duplicates(subset=['url'], keep='last')
|
| 93 |
except: return pd.DataFrame()
|
| 94 |
|
|
|
|
| 103 |
all_gids = get_ordered_groups()
|
| 104 |
flagged = get_flagged_groups()
|
| 105 |
df_l = get_clean_df(LABEL_FILE)
|
| 106 |
+
df_v = get_clean_df(VERIFY_FILE)
|
| 107 |
l_count = len(df_l['group_id'].unique()) if not df_l.empty else 0
|
| 108 |
+
v_count = len(df_v['group_id'].unique()) if not df_v.empty else 0
|
| 109 |
+
err_msg = f" | ⚠️ **Fix:** {len(flagged)}" if flagged else " | ✅ Clean"
|
| 110 |
+
return f"**Total:** {len(all_gids)} | **Labeled:** {l_count} | **Verified:** {v_count}{err_msg}"
|
| 111 |
|
| 112 |
def render_workspace(mode, history, specific_index=None, move_back=False):
|
| 113 |
all_ordered = get_ordered_groups()
|
| 114 |
flagged_pool = get_flagged_groups()
|
| 115 |
current_gid = history[-1] if history else None
|
| 116 |
target_gid = None
|
|
|
|
| 117 |
if specific_index is not None:
|
| 118 |
if 0 <= specific_index < len(all_ordered): target_gid = all_ordered[specific_index]
|
| 119 |
elif move_back and len(history) > 1:
|
|
|
|
| 129 |
v_done = set(df_v['group_id'].unique()) if not df_v.empty else set()
|
| 130 |
candidates = [g for g in all_ordered if (mode=="label" and g not in l_done) or (mode=="verify" and g in l_done and g not in v_done)]
|
| 131 |
if candidates: target_gid = random.choice(candidates)
|
| 132 |
+
if not target_gid: return {screen_menu: gr.update(visible=True), screen_work: gr.update(visible=False), log_box: "Done!"}
|
|
|
|
|
|
|
| 133 |
urls = [u for u in load_all_urls() if target_gid in u][:MAX_IMAGES]
|
| 134 |
if not history or history[-1] != target_gid: history.append(target_gid)
|
|
|
|
| 135 |
saved_vals = {}
|
| 136 |
df_mode = get_clean_df(LABEL_FILE if mode in ["label", "fix"] else VERIFY_FILE)
|
| 137 |
if not df_mode.empty:
|
| 138 |
for _, r in df_mode[df_mode['group_id'] == target_gid].iterrows():
|
| 139 |
if mode in ["label", "fix"]: saved_vals[r['url']] = {"score": r['score'], "label": r['label']}
|
| 140 |
else: saved_vals[r['url']] = {"is_correct": r['is_correct'], "label": r['corrected_label'], "score": r['corrected_score']}
|
|
|
|
| 141 |
with ThreadPoolExecutor(max_workers=MAX_IMAGES) as ex:
|
| 142 |
def fetch(u):
|
| 143 |
try:
|
|
|
|
| 145 |
img = Image.open(BytesIO(res.content)); img.thumbnail(THUMB_SIZE); return img
|
| 146 |
except: return None
|
| 147 |
processed_images = list(ex.map(fetch, urls))
|
|
|
|
| 148 |
target_idx = all_ordered.index(target_gid)
|
| 149 |
updates = {
|
| 150 |
screen_menu: gr.update(visible=False), screen_work: gr.update(visible=True),
|
| 151 |
+
header_md: f"# {mode.upper()} - Prop #{target_idx + 1} ({target_gid})",
|
| 152 |
state_urls: urls, state_hist: history, state_idx: target_idx,
|
| 153 |
+
top_stats: get_stats_text(), log_box: f"Viewing: {target_gid}"
|
| 154 |
}
|
|
|
|
| 155 |
for i in range(MAX_IMAGES):
|
| 156 |
base = i * 4
|
| 157 |
c_sld, c_drp, c_chk, c_lbl = input_objs[base:base+4]
|
|
|
|
| 164 |
if mode in ["label", "fix"]:
|
| 165 |
updates[c_sld] = gr.update(visible=True, value=v_sc, interactive=True)
|
| 166 |
updates[c_drp] = gr.update(visible=True, value=v_lbl if v_lbl in ROOM_CLASSES else "living_room", interactive=True)
|
| 167 |
+
updates[c_chk], updates[c_lbl] = gr.update(visible=False), gr.update(visible=True if is_err else False, value="<span style='color:red'>⚠️ Score 10 Only for Living Room</span>")
|
|
|
|
| 168 |
else:
|
| 169 |
updates[c_sld], updates[c_drp] = gr.update(visible=True, value=v_sc), gr.update(visible=True, value=v_lbl)
|
| 170 |
updates[c_chk], updates[c_lbl] = gr.update(visible=True, value=True), gr.update(visible=True, value=f"Prev: {v_lbl}")
|
|
|
|
| 176 |
def save_data(mode, history, urls, *args):
|
| 177 |
if not history: return
|
| 178 |
gid = history[-1]
|
| 179 |
+
if mode == "fix": FIXED_IN_SESSION.add(gid)
|
| 180 |
ts = datetime.now().isoformat(); rows = []
|
| 181 |
for i, u in enumerate(urls):
|
| 182 |
sc, lbl, chk = args[i*4], args[i*4+1], args[i*4+2]
|
| 183 |
clean_lbl = str(lbl).strip().lower()
|
| 184 |
if mode in ["label", "fix"]: rows.append([ts, "user", gid, u, int(sc), clean_lbl])
|
| 185 |
else: rows.append([ts, "user", gid, u, chk, clean_lbl, int(sc)])
|
|
|
|
| 186 |
with FileLock(LOCK_FILE):
|
| 187 |
with open(LABEL_FILE if mode in ["label", "fix"] else VERIFY_FILE, "a", newline="") as f: csv.writer(f).writerows(rows)
|
|
|
|
| 188 |
sync_push_background(LABEL_FILE if mode in ["label", "fix"] else VERIFY_FILE, os.path.basename(LABEL_FILE if mode in ["label", "fix"] else VERIFY_FILE))
|
| 189 |
return render_workspace(mode, history)
|
| 190 |
|
|
|
|
| 197 |
data = [[i+1, "⚠️ Fix Needed" if gid in flagged else "✅ Verified" if gid in v_set else "🔵 Labeled" if gid in l_set else "⚪ Pending", gid] for i, gid in enumerate(all_gids)]
|
| 198 |
return pd.DataFrame(data, columns=["#", "Status", "ID"])
|
| 199 |
|
| 200 |
+
with gr.Blocks(theme=gr.themes.Soft(), title="Labeler Pro") as demo:
|
| 201 |
state_mode, state_hist, state_urls, state_idx = gr.State("label"), gr.State([]), gr.State([]), gr.State(0)
|
| 202 |
with gr.Row():
|
| 203 |
+
top_stats = gr.Markdown("Loading...")
|
| 204 |
btn_home = gr.Button("🏠 Home", size="sm", scale=0)
|
| 205 |
with gr.Tabs():
|
| 206 |
with gr.Tab("Workspace"):
|
|
|
|
| 219 |
img_objs.append(img); input_objs.extend([sld, drp, chk, lbl])
|
| 220 |
with gr.Row():
|
| 221 |
b_back, b_save = gr.Button("⬅ Back"), gr.Button("💾 Save & Next", variant="primary")
|
| 222 |
+
log_box = gr.Textbox(label="Status", interactive=False)
|
| 223 |
with gr.Tab("Catalog"):
|
| 224 |
with gr.Row():
|
| 225 |
num_in = gr.Number(value=1, label="Prop #", precision=0)
|
| 226 |
b_go_l, b_go_v, b_go_f = gr.Button("Go Label"), gr.Button("Go Verify"), gr.Button("Go Fix")
|
| 227 |
df_cat = gr.Dataframe(interactive=False)
|
| 228 |
b_ref_cat = gr.Button("Refresh Catalog")
|
|
|
|
| 229 |
ALL_IO = [screen_menu, screen_work, header_md, state_urls, state_hist, state_idx, top_stats, log_box] + img_objs + input_objs
|
| 230 |
b_start_l.click(lambda: "label", None, state_mode).then(render_workspace, [state_mode, state_hist], ALL_IO)
|
| 231 |
b_start_v.click(lambda: "verify", None, state_mode).then(render_workspace, [state_mode, state_hist], ALL_IO)
|
|
|
|
| 238 |
b_go_f.click(lambda: "fix", None, state_mode).then(lambda n,m,h: render_workspace(m,h,int(n)-1), [num_in, state_mode, state_hist], ALL_IO)
|
| 239 |
b_ref_cat.click(refresh_cat, None, df_cat).then(get_stats_text, None, top_stats)
|
| 240 |
demo.load(refresh_cat, None, df_cat).then(get_stats_text, None, top_stats)
|
|
|
|
| 241 |
demo.queue().launch(server_name="0.0.0.0", server_port=7860)
|