Nightfury16 commited on
Commit
4db7551
·
1 Parent(s): bc05f02

Initial commit

Browse files
Files changed (1) hide show
  1. app.py +14 -32
app.py CHANGED
@@ -9,18 +9,21 @@ from concurrent.futures import ThreadPoolExecutor
9
  from datetime import datetime
10
  from filelock import FileLock
11
 
12
- # --- CONFIGURATION ---
 
 
 
 
13
  URL_FILE = "urls.txt"
14
- LABEL_FILE = "annotations.csv"
15
- VERIFY_FILE = "verifications.csv"
16
- SKIP_FILE = "skipped.csv"
17
- LOCK_FILE = "data.lock"
18
 
19
  MAX_IMAGES = 6
20
- THUMB_SIZE = (350, 350) # Reducing size drastically speeds up rendering
21
  ROOM_CLASSES = ["living_room", "bedroom", "kitchen", "bathroom", "dining_room", "outdoor", "other"]
22
 
23
- # --- INIT ---
24
  def init_files():
25
  for f in [LABEL_FILE, VERIFY_FILE, SKIP_FILE]:
26
  if not os.path.exists(f):
@@ -33,29 +36,20 @@ def init_files():
33
 
34
  init_files()
35
 
36
- # --- FAST IMAGE ENGINE ---
37
-
38
  def get_image_optimized(url):
39
- """
40
- Downloads high-res image, resizes to thumbnail, returns PIL Object.
41
- If fails, returns the URL string (fallback).
42
- """
43
  if not url: return None
44
  try:
45
- # Pretend to be Chrome to avoid blocking
46
  headers = {
47
  'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
48
  }
49
- # 3 second timeout prevents freezing
50
  response = requests.get(url, headers=headers, timeout=3)
51
  if response.status_code == 200:
52
  img = Image.open(BytesIO(response.content))
53
- img.thumbnail(THUMB_SIZE, Image.Resampling.LANCZOS) # Resize
54
  return img
55
  except:
56
  pass
57
 
58
- # Fallback: If server blocks Python, return URL so browser tries
59
  return url
60
 
61
  def get_ordered_groups():
@@ -103,13 +97,10 @@ def get_stats_text():
103
  except: l = 0
104
  return f"**Total Properties:** {len(all_gids)} | **Labeled:** {l}"
105
 
106
- # --- APP LOGIC ---
107
-
108
  def render_workspace(mode, history, specific_index=None, move_back=False):
109
  all_groups = get_ordered_groups()
110
  total_groups = len(all_groups)
111
 
112
- # Load Status (Cached reading could improve this further, but pandas is fast enough for <10k rows)
113
  try: l_done = set(pd.read_csv(LABEL_FILE)['group_id'].unique())
114
  except: l_done = set()
115
  try: v_done = set(pd.read_csv(VERIFY_FILE)['group_id'].unique())
@@ -120,7 +111,6 @@ def render_workspace(mode, history, specific_index=None, move_back=False):
120
  target_gid = None
121
  target_idx = -1
122
 
123
- # 1. NAVIGATION
124
  if specific_index is not None:
125
  if 0 <= specific_index < total_groups:
126
  target_gid = all_groups[specific_index]
@@ -135,7 +125,6 @@ def render_workspace(mode, history, specific_index=None, move_back=False):
135
  except: target_idx = 0
136
 
137
  else:
138
- # Auto-Next
139
  found = False
140
  for i, gid in enumerate(all_groups):
141
  if gid in s_done: continue
@@ -152,7 +141,6 @@ def render_workspace(mode, history, specific_index=None, move_back=False):
152
  if not found:
153
  return {screen_menu: gr.update(visible=True), screen_work: gr.update(visible=False), log_box: "No more tasks found."}
154
 
155
- # 2. PREPARE DATA
156
  urls = get_group_urls(target_gid)
157
 
158
  if not history or history[-1] != target_gid:
@@ -161,8 +149,6 @@ def render_workspace(mode, history, specific_index=None, move_back=False):
161
  saved_vals = get_saved_values(target_gid, mode)
162
  r1_vals = get_saved_values(target_gid, "label") if mode == "verify" else {}
163
 
164
- # 3. PARALLEL IMAGE PROCESSING (THE SPEED FIX)
165
- # We download and resize all 6 images at the same time
166
  processed_images = [None] * MAX_IMAGES
167
  with ThreadPoolExecutor(max_workers=MAX_IMAGES) as executor:
168
  futures = {executor.submit(get_image_optimized, u): i for i, u in enumerate(urls)}
@@ -175,14 +161,13 @@ def render_workspace(mode, history, specific_index=None, move_back=False):
175
  screen_menu: gr.update(visible=False),
176
  screen_work: gr.update(visible=True),
177
  header_md: header,
178
- state_urls: urls, # Keep raw URLs for saving
179
  state_hist: history,
180
  state_idx: target_idx,
181
  top_stats: get_stats_text(),
182
  log_box: f"Loaded group {target_gid}"
183
  }
184
 
185
- # 4. UPDATE GRID
186
  for i in range(MAX_IMAGES):
187
  img_c = img_objs[i]
188
  base = i * 4
@@ -190,7 +175,7 @@ def render_workspace(mode, history, specific_index=None, move_back=False):
190
 
191
  if i < len(urls):
192
  u = urls[i]
193
- img_data = processed_images[i] # This is now a small PIL Image or a URL string
194
 
195
  updates[img_c] = gr.update(value=img_data, visible=True)
196
 
@@ -262,8 +247,6 @@ def refresh_cat():
262
  data.append([i+1, s, gid])
263
  return pd.DataFrame(data, columns=["#", "Status", "ID"])
264
 
265
- # --- UI ---
266
-
267
  with gr.Blocks(title="Fast Labeler") as demo:
268
 
269
  state_mode = gr.State("label")
@@ -314,7 +297,6 @@ with gr.Blocks(title="Fast Labeler") as demo:
314
  df_cat = gr.Dataframe(interactive=False)
315
  b_ref_cat = gr.Button("Refresh")
316
 
317
- # WIRING
318
  ALL_IO = [screen_menu, screen_work, header_md, state_urls, state_hist, state_idx, top_stats, log_box] + img_objs + input_objs
319
 
320
  b_start_l.click(lambda: "label", None, state_mode).then(render_workspace, [state_mode, state_hist], ALL_IO)
@@ -332,4 +314,4 @@ with gr.Blocks(title="Fast Labeler") as demo:
332
  b_ref_cat.click(refresh_cat, None, df_cat)
333
  demo.load(refresh_cat, None, df_cat).then(get_stats_text, None, top_stats)
334
 
335
- demo.queue().launch(theme=gr.themes.Soft())
 
9
  from datetime import datetime
10
  from filelock import FileLock
11
 
12
+ if os.path.exists("/data"):
13
+ DATA_DIR = "/data"
14
+ else:
15
+ DATA_DIR = "."
16
+
17
  URL_FILE = "urls.txt"
18
+ LABEL_FILE = os.path.join(DATA_DIR, "annotations.csv")
19
+ VERIFY_FILE = os.path.join(DATA_DIR, "verifications.csv")
20
+ SKIP_FILE = os.path.join(DATA_DIR, "skipped.csv")
21
+ LOCK_FILE = os.path.join(DATA_DIR, "data.lock")
22
 
23
  MAX_IMAGES = 6
24
+ THUMB_SIZE = (350, 350)
25
  ROOM_CLASSES = ["living_room", "bedroom", "kitchen", "bathroom", "dining_room", "outdoor", "other"]
26
 
 
27
  def init_files():
28
  for f in [LABEL_FILE, VERIFY_FILE, SKIP_FILE]:
29
  if not os.path.exists(f):
 
36
 
37
  init_files()
38
 
 
 
39
  def get_image_optimized(url):
 
 
 
 
40
  if not url: return None
41
  try:
 
42
  headers = {
43
  'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
44
  }
 
45
  response = requests.get(url, headers=headers, timeout=3)
46
  if response.status_code == 200:
47
  img = Image.open(BytesIO(response.content))
48
+ img.thumbnail(THUMB_SIZE, Image.Resampling.LANCZOS)
49
  return img
50
  except:
51
  pass
52
 
 
53
  return url
54
 
55
  def get_ordered_groups():
 
97
  except: l = 0
98
  return f"**Total Properties:** {len(all_gids)} | **Labeled:** {l}"
99
 
 
 
100
  def render_workspace(mode, history, specific_index=None, move_back=False):
101
  all_groups = get_ordered_groups()
102
  total_groups = len(all_groups)
103
 
 
104
  try: l_done = set(pd.read_csv(LABEL_FILE)['group_id'].unique())
105
  except: l_done = set()
106
  try: v_done = set(pd.read_csv(VERIFY_FILE)['group_id'].unique())
 
111
  target_gid = None
112
  target_idx = -1
113
 
 
114
  if specific_index is not None:
115
  if 0 <= specific_index < total_groups:
116
  target_gid = all_groups[specific_index]
 
125
  except: target_idx = 0
126
 
127
  else:
 
128
  found = False
129
  for i, gid in enumerate(all_groups):
130
  if gid in s_done: continue
 
141
  if not found:
142
  return {screen_menu: gr.update(visible=True), screen_work: gr.update(visible=False), log_box: "No more tasks found."}
143
 
 
144
  urls = get_group_urls(target_gid)
145
 
146
  if not history or history[-1] != target_gid:
 
149
  saved_vals = get_saved_values(target_gid, mode)
150
  r1_vals = get_saved_values(target_gid, "label") if mode == "verify" else {}
151
 
 
 
152
  processed_images = [None] * MAX_IMAGES
153
  with ThreadPoolExecutor(max_workers=MAX_IMAGES) as executor:
154
  futures = {executor.submit(get_image_optimized, u): i for i, u in enumerate(urls)}
 
161
  screen_menu: gr.update(visible=False),
162
  screen_work: gr.update(visible=True),
163
  header_md: header,
164
+ state_urls: urls,
165
  state_hist: history,
166
  state_idx: target_idx,
167
  top_stats: get_stats_text(),
168
  log_box: f"Loaded group {target_gid}"
169
  }
170
 
 
171
  for i in range(MAX_IMAGES):
172
  img_c = img_objs[i]
173
  base = i * 4
 
175
 
176
  if i < len(urls):
177
  u = urls[i]
178
+ img_data = processed_images[i]
179
 
180
  updates[img_c] = gr.update(value=img_data, visible=True)
181
 
 
247
  data.append([i+1, s, gid])
248
  return pd.DataFrame(data, columns=["#", "Status", "ID"])
249
 
 
 
250
  with gr.Blocks(title="Fast Labeler") as demo:
251
 
252
  state_mode = gr.State("label")
 
297
  df_cat = gr.Dataframe(interactive=False)
298
  b_ref_cat = gr.Button("Refresh")
299
 
 
300
  ALL_IO = [screen_menu, screen_work, header_md, state_urls, state_hist, state_idx, top_stats, log_box] + img_objs + input_objs
301
 
302
  b_start_l.click(lambda: "label", None, state_mode).then(render_workspace, [state_mode, state_hist], ALL_IO)
 
314
  b_ref_cat.click(refresh_cat, None, df_cat)
315
  demo.load(refresh_cat, None, df_cat).then(get_stats_text, None, top_stats)
316
 
317
+ demo.queue().launch(theme=gr.themes.Soft())