Spaces:
Runtime error
Runtime error
Commit
·
4db7551
1
Parent(s):
bc05f02
Initial commit
Browse files
app.py
CHANGED
|
@@ -9,18 +9,21 @@ from concurrent.futures import ThreadPoolExecutor
|
|
| 9 |
from datetime import datetime
|
| 10 |
from filelock import FileLock
|
| 11 |
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
URL_FILE = "urls.txt"
|
| 14 |
-
LABEL_FILE = "annotations.csv"
|
| 15 |
-
VERIFY_FILE = "verifications.csv"
|
| 16 |
-
SKIP_FILE = "skipped.csv"
|
| 17 |
-
LOCK_FILE = "data.lock"
|
| 18 |
|
| 19 |
MAX_IMAGES = 6
|
| 20 |
-
THUMB_SIZE = (350, 350)
|
| 21 |
ROOM_CLASSES = ["living_room", "bedroom", "kitchen", "bathroom", "dining_room", "outdoor", "other"]
|
| 22 |
|
| 23 |
-
# --- INIT ---
|
| 24 |
def init_files():
|
| 25 |
for f in [LABEL_FILE, VERIFY_FILE, SKIP_FILE]:
|
| 26 |
if not os.path.exists(f):
|
|
@@ -33,29 +36,20 @@ def init_files():
|
|
| 33 |
|
| 34 |
init_files()
|
| 35 |
|
| 36 |
-
# --- FAST IMAGE ENGINE ---
|
| 37 |
-
|
| 38 |
def get_image_optimized(url):
|
| 39 |
-
"""
|
| 40 |
-
Downloads high-res image, resizes to thumbnail, returns PIL Object.
|
| 41 |
-
If fails, returns the URL string (fallback).
|
| 42 |
-
"""
|
| 43 |
if not url: return None
|
| 44 |
try:
|
| 45 |
-
# Pretend to be Chrome to avoid blocking
|
| 46 |
headers = {
|
| 47 |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
| 48 |
}
|
| 49 |
-
# 3 second timeout prevents freezing
|
| 50 |
response = requests.get(url, headers=headers, timeout=3)
|
| 51 |
if response.status_code == 200:
|
| 52 |
img = Image.open(BytesIO(response.content))
|
| 53 |
-
img.thumbnail(THUMB_SIZE, Image.Resampling.LANCZOS)
|
| 54 |
return img
|
| 55 |
except:
|
| 56 |
pass
|
| 57 |
|
| 58 |
-
# Fallback: If server blocks Python, return URL so browser tries
|
| 59 |
return url
|
| 60 |
|
| 61 |
def get_ordered_groups():
|
|
@@ -103,13 +97,10 @@ def get_stats_text():
|
|
| 103 |
except: l = 0
|
| 104 |
return f"**Total Properties:** {len(all_gids)} | **Labeled:** {l}"
|
| 105 |
|
| 106 |
-
# --- APP LOGIC ---
|
| 107 |
-
|
| 108 |
def render_workspace(mode, history, specific_index=None, move_back=False):
|
| 109 |
all_groups = get_ordered_groups()
|
| 110 |
total_groups = len(all_groups)
|
| 111 |
|
| 112 |
-
# Load Status (Cached reading could improve this further, but pandas is fast enough for <10k rows)
|
| 113 |
try: l_done = set(pd.read_csv(LABEL_FILE)['group_id'].unique())
|
| 114 |
except: l_done = set()
|
| 115 |
try: v_done = set(pd.read_csv(VERIFY_FILE)['group_id'].unique())
|
|
@@ -120,7 +111,6 @@ def render_workspace(mode, history, specific_index=None, move_back=False):
|
|
| 120 |
target_gid = None
|
| 121 |
target_idx = -1
|
| 122 |
|
| 123 |
-
# 1. NAVIGATION
|
| 124 |
if specific_index is not None:
|
| 125 |
if 0 <= specific_index < total_groups:
|
| 126 |
target_gid = all_groups[specific_index]
|
|
@@ -135,7 +125,6 @@ def render_workspace(mode, history, specific_index=None, move_back=False):
|
|
| 135 |
except: target_idx = 0
|
| 136 |
|
| 137 |
else:
|
| 138 |
-
# Auto-Next
|
| 139 |
found = False
|
| 140 |
for i, gid in enumerate(all_groups):
|
| 141 |
if gid in s_done: continue
|
|
@@ -152,7 +141,6 @@ def render_workspace(mode, history, specific_index=None, move_back=False):
|
|
| 152 |
if not found:
|
| 153 |
return {screen_menu: gr.update(visible=True), screen_work: gr.update(visible=False), log_box: "No more tasks found."}
|
| 154 |
|
| 155 |
-
# 2. PREPARE DATA
|
| 156 |
urls = get_group_urls(target_gid)
|
| 157 |
|
| 158 |
if not history or history[-1] != target_gid:
|
|
@@ -161,8 +149,6 @@ def render_workspace(mode, history, specific_index=None, move_back=False):
|
|
| 161 |
saved_vals = get_saved_values(target_gid, mode)
|
| 162 |
r1_vals = get_saved_values(target_gid, "label") if mode == "verify" else {}
|
| 163 |
|
| 164 |
-
# 3. PARALLEL IMAGE PROCESSING (THE SPEED FIX)
|
| 165 |
-
# We download and resize all 6 images at the same time
|
| 166 |
processed_images = [None] * MAX_IMAGES
|
| 167 |
with ThreadPoolExecutor(max_workers=MAX_IMAGES) as executor:
|
| 168 |
futures = {executor.submit(get_image_optimized, u): i for i, u in enumerate(urls)}
|
|
@@ -175,14 +161,13 @@ def render_workspace(mode, history, specific_index=None, move_back=False):
|
|
| 175 |
screen_menu: gr.update(visible=False),
|
| 176 |
screen_work: gr.update(visible=True),
|
| 177 |
header_md: header,
|
| 178 |
-
state_urls: urls,
|
| 179 |
state_hist: history,
|
| 180 |
state_idx: target_idx,
|
| 181 |
top_stats: get_stats_text(),
|
| 182 |
log_box: f"Loaded group {target_gid}"
|
| 183 |
}
|
| 184 |
|
| 185 |
-
# 4. UPDATE GRID
|
| 186 |
for i in range(MAX_IMAGES):
|
| 187 |
img_c = img_objs[i]
|
| 188 |
base = i * 4
|
|
@@ -190,7 +175,7 @@ def render_workspace(mode, history, specific_index=None, move_back=False):
|
|
| 190 |
|
| 191 |
if i < len(urls):
|
| 192 |
u = urls[i]
|
| 193 |
-
img_data = processed_images[i]
|
| 194 |
|
| 195 |
updates[img_c] = gr.update(value=img_data, visible=True)
|
| 196 |
|
|
@@ -262,8 +247,6 @@ def refresh_cat():
|
|
| 262 |
data.append([i+1, s, gid])
|
| 263 |
return pd.DataFrame(data, columns=["#", "Status", "ID"])
|
| 264 |
|
| 265 |
-
# --- UI ---
|
| 266 |
-
|
| 267 |
with gr.Blocks(title="Fast Labeler") as demo:
|
| 268 |
|
| 269 |
state_mode = gr.State("label")
|
|
@@ -314,7 +297,6 @@ with gr.Blocks(title="Fast Labeler") as demo:
|
|
| 314 |
df_cat = gr.Dataframe(interactive=False)
|
| 315 |
b_ref_cat = gr.Button("Refresh")
|
| 316 |
|
| 317 |
-
# WIRING
|
| 318 |
ALL_IO = [screen_menu, screen_work, header_md, state_urls, state_hist, state_idx, top_stats, log_box] + img_objs + input_objs
|
| 319 |
|
| 320 |
b_start_l.click(lambda: "label", None, state_mode).then(render_workspace, [state_mode, state_hist], ALL_IO)
|
|
@@ -332,4 +314,4 @@ with gr.Blocks(title="Fast Labeler") as demo:
|
|
| 332 |
b_ref_cat.click(refresh_cat, None, df_cat)
|
| 333 |
demo.load(refresh_cat, None, df_cat).then(get_stats_text, None, top_stats)
|
| 334 |
|
| 335 |
-
demo.queue().launch(theme=gr.themes.Soft())
|
|
|
|
| 9 |
from datetime import datetime
|
| 10 |
from filelock import FileLock
|
| 11 |
|
| 12 |
+
if os.path.exists("/data"):
|
| 13 |
+
DATA_DIR = "/data"
|
| 14 |
+
else:
|
| 15 |
+
DATA_DIR = "."
|
| 16 |
+
|
| 17 |
URL_FILE = "urls.txt"
|
| 18 |
+
LABEL_FILE = os.path.join(DATA_DIR, "annotations.csv")
|
| 19 |
+
VERIFY_FILE = os.path.join(DATA_DIR, "verifications.csv")
|
| 20 |
+
SKIP_FILE = os.path.join(DATA_DIR, "skipped.csv")
|
| 21 |
+
LOCK_FILE = os.path.join(DATA_DIR, "data.lock")
|
| 22 |
|
| 23 |
MAX_IMAGES = 6
|
| 24 |
+
THUMB_SIZE = (350, 350)
|
| 25 |
ROOM_CLASSES = ["living_room", "bedroom", "kitchen", "bathroom", "dining_room", "outdoor", "other"]
|
| 26 |
|
|
|
|
| 27 |
def init_files():
|
| 28 |
for f in [LABEL_FILE, VERIFY_FILE, SKIP_FILE]:
|
| 29 |
if not os.path.exists(f):
|
|
|
|
| 36 |
|
| 37 |
init_files()
|
| 38 |
|
|
|
|
|
|
|
| 39 |
def get_image_optimized(url):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
if not url: return None
|
| 41 |
try:
|
|
|
|
| 42 |
headers = {
|
| 43 |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
| 44 |
}
|
|
|
|
| 45 |
response = requests.get(url, headers=headers, timeout=3)
|
| 46 |
if response.status_code == 200:
|
| 47 |
img = Image.open(BytesIO(response.content))
|
| 48 |
+
img.thumbnail(THUMB_SIZE, Image.Resampling.LANCZOS)
|
| 49 |
return img
|
| 50 |
except:
|
| 51 |
pass
|
| 52 |
|
|
|
|
| 53 |
return url
|
| 54 |
|
| 55 |
def get_ordered_groups():
|
|
|
|
| 97 |
except: l = 0
|
| 98 |
return f"**Total Properties:** {len(all_gids)} | **Labeled:** {l}"
|
| 99 |
|
|
|
|
|
|
|
| 100 |
def render_workspace(mode, history, specific_index=None, move_back=False):
|
| 101 |
all_groups = get_ordered_groups()
|
| 102 |
total_groups = len(all_groups)
|
| 103 |
|
|
|
|
| 104 |
try: l_done = set(pd.read_csv(LABEL_FILE)['group_id'].unique())
|
| 105 |
except: l_done = set()
|
| 106 |
try: v_done = set(pd.read_csv(VERIFY_FILE)['group_id'].unique())
|
|
|
|
| 111 |
target_gid = None
|
| 112 |
target_idx = -1
|
| 113 |
|
|
|
|
| 114 |
if specific_index is not None:
|
| 115 |
if 0 <= specific_index < total_groups:
|
| 116 |
target_gid = all_groups[specific_index]
|
|
|
|
| 125 |
except: target_idx = 0
|
| 126 |
|
| 127 |
else:
|
|
|
|
| 128 |
found = False
|
| 129 |
for i, gid in enumerate(all_groups):
|
| 130 |
if gid in s_done: continue
|
|
|
|
| 141 |
if not found:
|
| 142 |
return {screen_menu: gr.update(visible=True), screen_work: gr.update(visible=False), log_box: "No more tasks found."}
|
| 143 |
|
|
|
|
| 144 |
urls = get_group_urls(target_gid)
|
| 145 |
|
| 146 |
if not history or history[-1] != target_gid:
|
|
|
|
| 149 |
saved_vals = get_saved_values(target_gid, mode)
|
| 150 |
r1_vals = get_saved_values(target_gid, "label") if mode == "verify" else {}
|
| 151 |
|
|
|
|
|
|
|
| 152 |
processed_images = [None] * MAX_IMAGES
|
| 153 |
with ThreadPoolExecutor(max_workers=MAX_IMAGES) as executor:
|
| 154 |
futures = {executor.submit(get_image_optimized, u): i for i, u in enumerate(urls)}
|
|
|
|
| 161 |
screen_menu: gr.update(visible=False),
|
| 162 |
screen_work: gr.update(visible=True),
|
| 163 |
header_md: header,
|
| 164 |
+
state_urls: urls,
|
| 165 |
state_hist: history,
|
| 166 |
state_idx: target_idx,
|
| 167 |
top_stats: get_stats_text(),
|
| 168 |
log_box: f"Loaded group {target_gid}"
|
| 169 |
}
|
| 170 |
|
|
|
|
| 171 |
for i in range(MAX_IMAGES):
|
| 172 |
img_c = img_objs[i]
|
| 173 |
base = i * 4
|
|
|
|
| 175 |
|
| 176 |
if i < len(urls):
|
| 177 |
u = urls[i]
|
| 178 |
+
img_data = processed_images[i]
|
| 179 |
|
| 180 |
updates[img_c] = gr.update(value=img_data, visible=True)
|
| 181 |
|
|
|
|
| 247 |
data.append([i+1, s, gid])
|
| 248 |
return pd.DataFrame(data, columns=["#", "Status", "ID"])
|
| 249 |
|
|
|
|
|
|
|
| 250 |
with gr.Blocks(title="Fast Labeler") as demo:
|
| 251 |
|
| 252 |
state_mode = gr.State("label")
|
|
|
|
| 297 |
df_cat = gr.Dataframe(interactive=False)
|
| 298 |
b_ref_cat = gr.Button("Refresh")
|
| 299 |
|
|
|
|
| 300 |
ALL_IO = [screen_menu, screen_work, header_md, state_urls, state_hist, state_idx, top_stats, log_box] + img_objs + input_objs
|
| 301 |
|
| 302 |
b_start_l.click(lambda: "label", None, state_mode).then(render_workspace, [state_mode, state_hist], ALL_IO)
|
|
|
|
| 314 |
b_ref_cat.click(refresh_cat, None, df_cat)
|
| 315 |
demo.load(refresh_cat, None, df_cat).then(get_stats_text, None, top_stats)
|
| 316 |
|
| 317 |
+
demo.queue().launch(theme=gr.themes.Soft())
|