File size: 13,178 Bytes
081c164
eecf956
 
 
 
620e140
1b04ad0
a854ff8
eecf956
 
 
 
 
1b04ad0
eecf956
ff75b10
 
8debba6
da36cb1
5838c7c
da36cb1
8a439bd
ff75b10
 
 
 
8debba6
eacf6ad
 
 
 
009f7e5
 
 
1b04ad0
1b6d9ec
ff75b10
1b04ad0
eacf6ad
 
 
 
1b6d9ec
ff75b10
009f7e5
 
1b04ad0
 
009f7e5
3c1b983
eacf6ad
081c164
1b04ad0
eecf956
1b04ad0
eecf956
 
3c1b983
 
 
eecf956
 
 
 
620e140
eacf6ad
620e140
 
8a439bd
eacf6ad
 
620e140
eecf956
 
 
009f7e5
 
 
 
c9e566c
eecf956
 
05521a3
 
009f7e5
05521a3
 
eacf6ad
6b53e51
eacf6ad
6b53e51
eacf6ad
 
05521a3
 
 
 
 
eacf6ad
 
eecf956
ace880e
 
 
05521a3
6b53e51
05521a3
6b53e51
 
 
ace880e
eecf956
ace880e
05521a3
c9e566c
eecf956
 
ace880e
eecf956
eacf6ad
eecf956
3c1b983
c9e566c
e172b1c
 
3c1b983
eacf6ad
 
 
3c1b983
e172b1c
6b53e51
eacf6ad
009f7e5
05521a3
 
 
eacf6ad
 
 
c9e566c
ace880e
009f7e5
ace880e
eacf6ad
009f7e5
c9e566c
eacf6ad
eecf956
3c1b983
6b53e51
ace880e
6b53e51
eecf956
 
 
009f7e5
eecf956
 
009f7e5
05521a3
e172b1c
ace880e
009f7e5
3f4e464
05521a3
6b53e51
ace880e
eacf6ad
 
eecf956
009f7e5
 
eecf956
 
 
 
eacf6ad
6b53e51
eacf6ad
eecf956
009f7e5
e172b1c
 
 
eecf956
eacf6ad
 
a854ff8
eecf956
ace880e
 
 
eacf6ad
05521a3
 
eacf6ad
ace880e
 
6b53e51
009f7e5
 
6b53e51
ace880e
 
 
 
eacf6ad
ace880e
eacf6ad
ace880e
 
 
 
 
 
 
eacf6ad
ace880e
 
eacf6ad
6b53e51
ace880e
 
 
3c1b983
ace880e
e172b1c
eecf956
 
 
009f7e5
eecf956
 
 
3c1b983
 
 
eacf6ad
ace880e
8a439bd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
import os
import gradio as gr
import pandas as pd
import requests
import csv
import json
import threading
import random
from io import BytesIO
from PIL import Image
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime
from filelock import FileLock 
from huggingface_hub import HfApi, hf_hub_download

DATASET_REPO_ID = os.environ.get("DATASET_REPO_ID", "fast-stager/property-labels") 
HF_TOKEN = os.environ.get("HF_TOKEN") 

CACHE_DIR = "/tmp/data"
os.makedirs(CACHE_DIR, exist_ok=True)

URL_FILE = "new_urls.json" 
LABEL_FILE = os.path.join(CACHE_DIR, "annotations.csv")
VERIFY_FILE = os.path.join(CACHE_DIR, "verifications.csv")
SKIP_FILE = os.path.join(CACHE_DIR, "skipped.csv")
LOCK_FILE = os.path.join(CACHE_DIR, "data.lock")

FIXED_IN_SESSION = set()
MANUAL_EXCLUDE = {"075c8bb8a73c45d71788e711edd9e8d5l", "07a0544f217db88fe2b06fd5d38f02a6l", "6bf16112723de3318c44641958638a56l"}

ROOM_CLASSES = ["living_room", "bedroom", "kitchen", "bathroom", "dining_room", "outdoor", "other"]
MAX_IMAGES = 6
THUMB_SIZE = (350, 350)

def sync_pull():
    token = HF_TOKEN if HF_TOKEN and len(HF_TOKEN) > 5 else None
    for filename in ["annotations.csv", "verifications.csv", "skipped.csv"]:
        try:
            local_path = os.path.join(CACHE_DIR, filename)
            if os.path.exists(local_path): os.remove(local_path)
            hf_hub_download(repo_id=DATASET_REPO_ID, filename=filename, repo_type="dataset", local_dir=CACHE_DIR, token=token, force_download=True)
        except: pass

def sync_push_background(local_path, remote_filename):
    token = HF_TOKEN if HF_TOKEN and len(HF_TOKEN) > 5 else None
    if not token: return
    def _push():
        try:
            api = HfApi(token=token)
            api.upload_file(path_or_fileobj=local_path, path_in_repo=remote_filename, repo_id=DATASET_REPO_ID, repo_type="dataset")
        except: pass
    threading.Thread(target=_push).start()

def init_files():
    sync_pull()
    for f in [LABEL_FILE, VERIFY_FILE, SKIP_FILE]:
        if not os.path.exists(f):
            cols = ["timestamp", "user", "group_id", "url", "score", "label"] if f == LABEL_FILE else \
                   ["timestamp", "user", "group_id", "url", "is_correct", "corrected_label", "corrected_score"] if f == VERIFY_FILE else \
                   ["timestamp", "user", "group_id"]
            pd.DataFrame(columns=cols).to_csv(f, index=False)

init_files()

def load_all_urls():
    if not os.path.exists(URL_FILE): return []
    try:
        with open(URL_FILE, 'r') as f:
            data = json.load(f)
            return [img for g in data.get("groups", []) for img in g.get("images", [])]
    except: return []

def get_ordered_groups():
    groups = []
    seen = set()
    for u in load_all_urls():
        try: gid = u.split("-m")[0].split("/")[-1]
        except: gid = "unknown"
        if gid not in seen:
            groups.append(gid); seen.add(gid)
    return groups

def get_clean_df(filepath):
    if not os.path.exists(filepath): return pd.DataFrame()
    try:
        df = pd.read_csv(filepath)
        if df.empty: return df
        if 'label' in df.columns: df['label'] = df['label'].astype(str).str.strip().str.lower()
        if 'corrected_label' in df.columns: df['corrected_label'] = df['corrected_label'].astype(str).str.strip().str.lower()
        if 'score' in df.columns: df['score'] = pd.to_numeric(df['score'], errors='coerce').fillna(0).astype(int)
        if 'corrected_score' in df.columns: df['corrected_score'] = pd.to_numeric(df['corrected_score'], errors='coerce').fillna(0).astype(int)
        return df.drop_duplicates(subset=['url'], keep='last')
    except: return pd.DataFrame()

def get_flagged_groups():
    df = get_clean_df(LABEL_FILE)
    if df.empty: return []
    errors = df[(df['score'] == 10) & (df['label'] != 'living_room')]
    flagged = errors['group_id'].unique().tolist()
    return [g for g in flagged if g not in FIXED_IN_SESSION and g not in MANUAL_EXCLUDE]

def get_stats_text():
    all_gids = get_ordered_groups()
    flagged = get_flagged_groups()
    df_l = get_clean_df(LABEL_FILE)
    df_v = get_clean_df(VERIFY_FILE)
    l_count = len(df_l['group_id'].unique()) if not df_l.empty else 0
    v_count = len(df_v['group_id'].unique()) if not df_v.empty else 0
    err_msg = f" | ⚠️ **Fix:** {len(flagged)}" if flagged else " | ✅ Clean"
    return f"**Total:** {len(all_gids)} | **Labeled:** {l_count} | **Verified:** {v_count}{err_msg}"

def render_workspace(mode, history, specific_index=None, move_back=False):
    all_ordered = get_ordered_groups()
    flagged_pool = get_flagged_groups()
    current_gid = history[-1] if history else None
    target_gid = None
    if specific_index is not None:
        if 0 <= specific_index < len(all_ordered): target_gid = all_ordered[specific_index]
    elif move_back and len(history) > 1:
        history.pop(); target_gid = history[-1]
    else:
        if mode == "fix":
            candidates = [g for g in flagged_pool if g != current_gid]
            if not candidates and flagged_pool: candidates = flagged_pool
            if candidates: target_gid = candidates[0]
        else:
            df_l, df_v = get_clean_df(LABEL_FILE), get_clean_df(VERIFY_FILE)
            l_done = set(df_l['group_id'].unique()) if not df_l.empty else set()
            v_done = set(df_v['group_id'].unique()) if not df_v.empty else set()
            candidates = [g for g in all_ordered if (mode=="label" and g not in l_done) or (mode=="verify" and g in l_done and g not in v_done)]
            if candidates: target_gid = random.choice(candidates)
    if not target_gid: return {screen_menu: gr.update(visible=True), screen_work: gr.update(visible=False), log_box: "Done!"}
    urls = [u for u in load_all_urls() if target_gid in u][:MAX_IMAGES]
    if not history or history[-1] != target_gid: history.append(target_gid)
    saved_vals = {}
    df_mode = get_clean_df(LABEL_FILE if mode in ["label", "fix"] else VERIFY_FILE)
    if not df_mode.empty:
        for _, r in df_mode[df_mode['group_id'] == target_gid].iterrows():
            if mode in ["label", "fix"]: saved_vals[r['url']] = {"score": r['score'], "label": r['label']}
            else: saved_vals[r['url']] = {"is_correct": r['is_correct'], "label": r['corrected_label'], "score": r['corrected_score']}
    with ThreadPoolExecutor(max_workers=MAX_IMAGES) as ex:
        def fetch(u):
            try:
                res = requests.get(u, timeout=3, headers={'User-Agent': 'Mozilla/5.0'})
                img = Image.open(BytesIO(res.content)); img.thumbnail(THUMB_SIZE); return img
            except: return None
        processed_images = list(ex.map(fetch, urls))
    target_idx = all_ordered.index(target_gid)
    updates = {
        screen_menu: gr.update(visible=False), screen_work: gr.update(visible=True),
        header_md: f"# {mode.upper()} - Prop #{target_idx + 1} ({target_gid})",
        state_urls: urls, state_hist: history, state_idx: target_idx,
        top_stats: get_stats_text(), log_box: f"Viewing: {target_gid}"
    }
    for i in range(MAX_IMAGES):
        base = i * 4
        c_sld, c_drp, c_chk, c_lbl = input_objs[base:base+4]
        if i < len(urls):
            u = urls[i]
            updates[img_objs[i]] = gr.update(value=processed_images[i], visible=True)
            v_sc = int(saved_vals.get(u, {}).get('score', 5))
            v_lbl = str(saved_vals.get(u, {}).get('label', "living_room")).strip().lower()
            is_err = (v_sc == 10 and v_lbl != "living_room")
            if mode in ["label", "fix"]:
                updates[c_sld] = gr.update(visible=True, value=v_sc, interactive=True)
                updates[c_drp] = gr.update(visible=True, value=v_lbl if v_lbl in ROOM_CLASSES else "living_room", interactive=True)
                updates[c_chk], updates[c_lbl] = gr.update(visible=False), gr.update(visible=True if is_err else False, value="<span style='color:red'>⚠️ Score 10 Only for Living Room</span>")
            else:
                updates[c_sld], updates[c_drp] = gr.update(visible=True, value=v_sc), gr.update(visible=True, value=v_lbl)
                updates[c_chk], updates[c_lbl] = gr.update(visible=True, value=True), gr.update(visible=True, value=f"Prev: {v_lbl}")
        else:
            updates[img_objs[i]] = gr.update(visible=False)
            for obj in [c_sld, c_drp, c_chk, c_lbl]: updates[obj] = gr.update(visible=False)
    return updates

def save_data(mode, history, urls, *args):
    if not history: return
    gid = history[-1]
    if mode == "fix": FIXED_IN_SESSION.add(gid)
    ts = datetime.now().isoformat(); rows = []
    for i, u in enumerate(urls):
        sc, lbl, chk = args[i*4], args[i*4+1], args[i*4+2]
        clean_lbl = str(lbl).strip().lower()
        if mode in ["label", "fix"]: rows.append([ts, "user", gid, u, int(sc), clean_lbl])
        else: rows.append([ts, "user", gid, u, chk, clean_lbl, int(sc)])
    with FileLock(LOCK_FILE):
        with open(LABEL_FILE if mode in ["label", "fix"] else VERIFY_FILE, "a", newline="") as f: csv.writer(f).writerows(rows)
    sync_push_background(LABEL_FILE if mode in ["label", "fix"] else VERIFY_FILE, os.path.basename(LABEL_FILE if mode in ["label", "fix"] else VERIFY_FILE))
    return render_workspace(mode, history)

def refresh_cat():
    all_gids = get_ordered_groups()
    flagged = set(get_flagged_groups())
    df_l, df_v = get_clean_df(LABEL_FILE), get_clean_df(VERIFY_FILE)
    l_set = set(df_l['group_id'].unique()) if not df_l.empty else set()
    v_set = set(df_v['group_id'].unique()) if not df_v.empty else set()
    data = [[i+1, "⚠️ Fix Needed" if gid in flagged else "✅ Verified" if gid in v_set else "🔵 Labeled" if gid in l_set else "⚪ Pending", gid] for i, gid in enumerate(all_gids)]
    return pd.DataFrame(data, columns=["#", "Status", "ID"])

with gr.Blocks(theme=gr.themes.Soft(), title="Labeler Pro") as demo:
    state_mode, state_hist, state_urls, state_idx = gr.State("label"), gr.State([]), gr.State([]), gr.State(0)
    with gr.Row():
        top_stats = gr.Markdown("Loading...")
        btn_home = gr.Button("🏠 Home", size="sm", scale=0)
    with gr.Tabs():
        with gr.Tab("Workspace"):
            with gr.Group() as screen_menu:
                gr.Markdown("# Property Labeler Pro")
                with gr.Row():
                    b_start_l, b_start_v, b_start_f = gr.Button("Label", variant="primary"), gr.Button("Verify"), gr.Button("🛠 Fix Errors", variant="secondary")
            with gr.Group(visible=False) as screen_work:
                header_md = gr.Markdown()
                img_objs, input_objs = [], []
                with gr.Row():
                    for i in range(MAX_IMAGES):
                        with gr.Column(min_width=200):
                            img = gr.Image(interactive=False, height=240)
                            sld, drp, chk, lbl = gr.Slider(1, 10, step=1, label="Score"), gr.Dropdown(ROOM_CLASSES, label="Class"), gr.Checkbox(label="Correct?"), gr.Markdown()
                            img_objs.append(img); input_objs.extend([sld, drp, chk, lbl])
                with gr.Row():
                    b_back, b_save = gr.Button("⬅ Back"), gr.Button("💾 Save & Next", variant="primary")
                log_box = gr.Textbox(label="Status", interactive=False)
        with gr.Tab("Catalog"):
            with gr.Row():
                num_in = gr.Number(value=1, label="Prop #", precision=0)
                b_go_l, b_go_v, b_go_f = gr.Button("Go Label"), gr.Button("Go Verify"), gr.Button("Go Fix")
            df_cat = gr.Dataframe(interactive=False)
            b_ref_cat = gr.Button("Refresh Catalog")
    ALL_IO = [screen_menu, screen_work, header_md, state_urls, state_hist, state_idx, top_stats, log_box] + img_objs + input_objs
    b_start_l.click(lambda: "label", None, state_mode).then(render_workspace, [state_mode, state_hist], ALL_IO)
    b_start_v.click(lambda: "verify", None, state_mode).then(render_workspace, [state_mode, state_hist], ALL_IO)
    b_start_f.click(lambda: "fix", None, state_mode).then(render_workspace, [state_mode, state_hist], ALL_IO)
    b_save.click(save_data, [state_mode, state_hist, state_urls] + input_objs, ALL_IO)
    b_back.click(lambda m, h: render_workspace(m, h, move_back=True), [state_mode, state_hist], ALL_IO)
    btn_home.click(lambda: {screen_menu: gr.update(visible=True), screen_work: gr.update(visible=False), state_hist: []}, None, [screen_menu, screen_work, state_hist])
    b_go_l.click(lambda: "label", None, state_mode).then(lambda n,m,h: render_workspace(m,h,int(n)-1), [num_in, state_mode, state_hist], ALL_IO)
    b_go_v.click(lambda: "verify", None, state_mode).then(lambda n,m,h: render_workspace(m,h,int(n)-1), [num_in, state_mode, state_hist], ALL_IO)
    b_go_f.click(lambda: "fix", None, state_mode).then(lambda n,m,h: render_workspace(m,h,int(n)-1), [num_in, state_mode, state_hist], ALL_IO)
    b_ref_cat.click(refresh_cat, None, df_cat).then(get_stats_text, None, top_stats)
    demo.load(refresh_cat, None, df_cat).then(get_stats_text, None, top_stats)
demo.queue().launch(server_name="0.0.0.0", server_port=7860)