zsc commited on
Commit
17c1f2e
·
1 Parent(s): 89f5dff
Files changed (1) hide show
  1. app.py +46 -114
app.py CHANGED
@@ -53,15 +53,16 @@ clip_model.eval()
53
  tokenizer = open_clip.get_tokenizer('ViT-B-32')
54
  print("CLIP initialized.")
55
 
56
- # --- GPU Functions ---
57
 
58
  @spaces.GPU
59
- def extract_feature_gpu(image: Image.Image) -> np.ndarray:
60
- image_tensor = clip_preprocess(image).unsqueeze(0).to(DEVICE)
 
61
  with torch.no_grad():
62
- features = clip_model.encode_image(image_tensor)
63
  features /= features.norm(dim=-1, keepdim=True)
64
- return features.cpu().numpy()[0]
65
 
66
  @spaces.GPU
67
  def encode_text_gpu(text: str) -> torch.Tensor:
@@ -98,7 +99,7 @@ def train_step_gpu(model_state, labeled_data, embed_cache):
98
 
99
  @spaces.GPU
100
  def predict_batch_gpu(model_state, text_embed, embeddings_list):
101
- # If we have a model, use it
102
  if model_state is not None:
103
  model = MLPHead(512, 2).to(DEVICE)
104
  model.load_state_dict(model_state)
@@ -111,7 +112,7 @@ def predict_batch_gpu(model_state, text_embed, embeddings_list):
111
  _, preds = torch.max(probs, 1)
112
  return probs.cpu().numpy(), preds.cpu().numpy()
113
 
114
- # Fallback: Zero-shot text
115
  if text_embed is not None:
116
  X = torch.tensor(np.array(embeddings_list), dtype=torch.float32).to(DEVICE)
117
  text_feat = text_embed.to(DEVICE)
@@ -125,7 +126,7 @@ def predict_batch_gpu(model_state, text_embed, embeddings_list):
125
  preds = (probs_pos > 0.5).long()
126
  return probs.cpu().numpy(), preds.cpu().numpy()
127
 
128
- # Fallback: Random
129
  n = len(embeddings_list)
130
  return np.ones((n, 2)) / 2, np.zeros(n)
131
 
@@ -169,18 +170,14 @@ class SessionState:
169
  unlabeled: List[int] = field(default_factory=list)
170
  embed_cache: Dict[int, np.ndarray] = field(default_factory=dict)
171
  model_state: Optional[Dict] = None
172
-
173
  text_query: Optional[str] = None
174
  text_embedding: Optional[torch.Tensor] = None
175
-
176
  current_batch_ids: List[int] = field(default_factory=list)
177
  current_batch_mode: str = "neutral"
178
-
179
  history: List[Snapshot] = field(default_factory=list)
180
 
181
  def __init__(self):
182
  self.labeled = {}
183
- # Limit pool for demo speed in Gradio
184
  self.unlabeled = list(range(2000))
185
  random.shuffle(self.unlabeled)
186
  self.embed_cache = {}
@@ -211,56 +208,52 @@ class SessionState:
211
  def init_app():
212
  return SessionState()
213
 
214
- def get_embedding_safe(idx, session):
215
- if idx not in session.embed_cache:
216
- img = data_source.get_image(idx)
217
- session.embed_cache[idx] = extract_feature_gpu(img)
218
- return session.embed_cache[idx]
 
 
 
 
 
219
 
220
  def on_set_query(session, query):
221
  if not query:
222
  session.text_query = None
223
  session.text_embedding = None
224
  return session, "Query cleared."
225
-
226
  session.text_query = query
227
  session.text_embedding = encode_text_gpu(query)
228
  return session, f"Query set: '{query}'. Use 'Verify Positives' now."
229
 
230
  def load_next_batch(session: SessionState, strategy: str):
231
- # Sample pool for prediction
232
  pool_size = min(len(session.unlabeled), 500)
233
  pool = session.unlabeled[:pool_size]
234
-
235
  if not pool:
236
  return session, [], "No more data"
237
 
238
- pool_embeds = [get_embedding_safe(i, session) for i in pool]
 
 
 
239
  probs, _ = predict_batch_gpu(session.model_state, session.text_embedding, pool_embeds)
240
 
241
- if strategy == "Random":
242
- # Fallback to model if available, else true random
243
- if session.model_state or session.text_embedding:
244
- # If model exists, random strategy usually implies diversity or just random sampling
245
- # Let's stick to simple random for diversity
246
- selected_indices = random.sample(range(len(pool)), min(BATCH_SIZE, len(pool)))
247
- else:
248
- selected_indices = random.sample(range(len(pool)), min(BATCH_SIZE, len(pool)))
249
  session.current_batch_mode = "neutral"
250
  title = "Random Batch: Select Positive items"
251
-
252
  elif strategy == "Verify Positives":
253
  sort_idx = np.argsort(probs[:, 1])[::-1]
254
  selected_indices = sort_idx[:BATCH_SIZE]
255
  session.current_batch_mode = "verify_pos"
256
  title = "Verify Positives: Select items that are NOT Positive"
257
-
258
  elif strategy == "Verify Negatives":
259
  sort_idx = np.argsort(probs[:, 1])
260
  selected_indices = sort_idx[:BATCH_SIZE]
261
  session.current_batch_mode = "verify_neg"
262
  title = "Verify Negatives: Select items that are NOT Negative"
263
-
264
  elif strategy == "Borderline":
265
  scores = np.abs(probs[:, 1] - 0.5)
266
  sort_idx = np.argsort(scores)
@@ -273,11 +266,9 @@ def load_next_batch(session: SessionState, strategy: str):
273
 
274
  session.current_batch_ids = [pool[i] for i in selected_indices]
275
 
276
- # Prepare Gallery
277
  images = []
278
  for idx in session.current_batch_ids:
279
  img = data_source.get_image(idx)
280
- # Find prob
281
  p_idx = pool.index(idx)
282
  conf = probs[p_idx][1]
283
  images.append((img, f"#{idx} ({conf:.0%})"))
@@ -288,25 +279,19 @@ def on_submit_click(session, gallery_selected):
288
  if not session.current_batch_ids:
289
  return session, [], "Load batch first", 0, 0
290
 
291
- # Save Undo
292
  session.save_snapshot()
293
-
294
- selected_indices = []
295
- if gallery_selected:
296
- selected_indices = [int(x) for x in gallery_selected]
297
 
298
  ids_to_remove = []
299
  for i, global_id in enumerate(session.current_batch_ids):
300
  is_selected = i in selected_indices
301
  label = 0
302
-
303
  if session.current_batch_mode == 'verify_pos':
304
  label = 0 if is_selected else 1
305
  elif session.current_batch_mode == 'verify_neg':
306
  label = 1 if is_selected else 0
307
  else: # Neutral
308
  label = 1 if is_selected else 0
309
-
310
  session.labeled[global_id] = label
311
  ids_to_remove.append(global_id)
312
 
@@ -314,51 +299,40 @@ def on_submit_click(session, gallery_selected):
314
  if gid in session.unlabeled:
315
  session.unlabeled.remove(gid)
316
 
317
- # Train
318
- for gid in session.labeled:
319
- get_embedding_safe(gid, session)
320
-
321
  model_state, loss = train_step_gpu(session.model_state, session.labeled, session.embed_cache)
322
  session.model_state = model_state
323
 
324
- status = f"Trained! Loss: {loss:.4f}"
325
-
326
- return session, [], status, len(session.labeled), len(session.unlabeled)
327
 
328
  def on_undo_click(session):
329
  success = session.restore_snapshot()
330
  msg = "Undo successful" if success else "Nothing to undo"
331
  return session, msg, len(session.labeled), len(session.unlabeled)
332
 
 
 
 
 
333
  def render_review_tab(session):
334
- # Group by label
335
  pos_ids = [i for i, l in session.labeled.items() if l == 1]
336
  neg_ids = [i for i, l in session.labeled.items() if l == 0]
337
-
338
  pos_imgs = [(data_source.get_image(i), f"#{i}") for i in pos_ids]
339
  neg_imgs = [(data_source.get_image(i), f"#{i}") for i in neg_ids]
340
-
341
  return pos_imgs, neg_imgs
342
 
343
  def render_autolabel_tab(session):
344
- # Predict on a chunk of unlabeled
345
- pool = session.unlabeled[:200] # Limit for display
346
  if not pool: return []
347
-
348
- embeds = [get_embedding_safe(i, session) for i in pool]
349
  probs, preds = predict_batch_gpu(session.model_state, session.text_embedding, embeds)
350
 
351
- # Sort by conf descending
352
  results = []
353
  for i, idx in enumerate(pool):
354
- results.append({
355
- "id": idx,
356
- "conf": probs[i][1],
357
- "pred": preds[i]
358
- })
359
  results.sort(key=lambda x: x["conf"], reverse=True)
360
 
361
- # Format for gallery
362
  out = []
363
  for item in results:
364
  img = data_source.get_image(item["id"])
@@ -367,7 +341,7 @@ def render_autolabel_tab(session):
367
  return out
368
 
369
  def export_json(session):
370
- data = [{"filename": f"celeba_{k}.png", "label": v} for k, v in session.labeled.items()]
371
  file_path = "/tmp/labels.json"
372
  with open(file_path, "w") as f:
373
  json.dump(data, f, indent=2)
@@ -377,40 +351,31 @@ def export_json(session):
377
 
378
  with gr.Blocks(title="FastLabel ZeroGPU") as demo:
379
  session = gr.State(init_app)
380
-
381
  gr.Markdown("# FastLabel on ZeroGPU (Multi-modal Active Learning)")
382
 
383
  with gr.Tabs():
384
- # --- TAB 1: LABELING ---
385
  with gr.Tab("Labeling"):
386
  with gr.Row():
387
  with gr.Column(scale=1):
388
  gr.Markdown("### 1. Zero-shot Init")
389
- txt_query = gr.Textbox(placeholder="e.g. 'wearing hat'", label="Text Query")
390
  btn_query = gr.Button("Set Query")
391
-
392
  gr.Markdown("### 2. Strategy")
393
  strategy_drop = gr.Dropdown(
394
  choices=["Random", "Verify Positives", "Verify Negatives", "Borderline"],
395
  value="Random", show_label=False
396
  )
397
  btn_load = gr.Button("Load Batch", variant="primary")
398
-
399
  gr.Markdown("### 3. Actions")
400
  btn_undo = gr.Button("Undo Last", variant="secondary")
401
-
402
  gr.Markdown("### Stats")
403
  lbl_count = gr.Number(value=0, label="Labeled")
404
  unlbl_count = gr.Number(value=2000, label="Unlabeled")
405
-
406
  with gr.Column(scale=3):
407
  info_box = gr.Markdown("### Ready. Set a query or just Load Batch.")
408
- gallery = gr.Gallery(
409
- label="Batch", show_label=False, columns=6, height="auto", allow_preview=False
410
- )
411
  btn_submit = gr.Button("Confirm & Train", variant="stop")
412
 
413
- # --- TAB 2: REVIEW ---
414
  with gr.Tab("Review"):
415
  btn_refresh_review = gr.Button("Refresh Review")
416
  gr.Markdown("#### Positive")
@@ -418,56 +383,23 @@ with gr.Blocks(title="FastLabel ZeroGPU") as demo:
418
  gr.Markdown("#### Negative")
419
  gallery_neg = gr.Gallery(show_label=False, columns=8, height="auto")
420
 
421
- # --- TAB 3: AUTOLABEL ---
422
  with gr.Tab("Autolabel (AI)"):
423
  btn_refresh_auto = gr.Button("Run Inference on Unlabeled Pool")
424
  gr.Markdown("Showing top 200 predictions sorted by confidence.")
425
  gallery_auto = gr.Gallery(show_label=False, columns=8, height="auto")
426
 
427
- # --- TAB 4: EXPORT ---
428
  with gr.Tab("Export"):
429
  btn_export = gr.Button("Generate JSON")
430
  file_output = gr.File(label="Download Labels")
431
 
432
  # --- Wiring ---
433
-
434
  btn_query.click(on_set_query, [session, txt_query], [session, info_box])
435
-
436
- btn_load.click(
437
- on_load_click,
438
- [session, strategy_drop],
439
- [session, gallery, info_box]
440
- )
441
-
442
- btn_submit.click(
443
- on_submit_click,
444
- [session, gallery],
445
- [session, gallery, info_box, lbl_count, unlbl_count]
446
- )
447
-
448
- btn_undo.click(
449
- on_undo_click,
450
- [session],
451
- [session, info_box, lbl_count, unlbl_count]
452
- )
453
-
454
- btn_refresh_review.click(
455
- render_review_tab,
456
- [session],
457
- [gallery_pos, gallery_neg]
458
- )
459
-
460
- btn_refresh_auto.click(
461
- render_autolabel_tab,
462
- [session],
463
- [gallery_auto]
464
- )
465
-
466
- btn_export.click(
467
- export_json,
468
- [session],
469
- [file_output]
470
- )
471
 
472
  if __name__ == "__main__":
473
- demo.launch()
 
53
  tokenizer = open_clip.get_tokenizer('ViT-B-32')
54
  print("CLIP initialized.")
55
 
56
+ # --- Batched GPU Functions ---
57
 
58
  @spaces.GPU
59
+ def extract_features_batch_gpu(images: List[Image.Image]) -> np.ndarray:
60
+ """Extract CLIP features for a batch of images."""
61
+ tensors = torch.stack([clip_preprocess(img) for img in images]).to(DEVICE)
62
  with torch.no_grad():
63
+ features = clip_model.encode_image(tensors)
64
  features /= features.norm(dim=-1, keepdim=True)
65
+ return features.cpu().numpy()
66
 
67
  @spaces.GPU
68
  def encode_text_gpu(text: str) -> torch.Tensor:
 
99
 
100
  @spaces.GPU
101
  def predict_batch_gpu(model_state, text_embed, embeddings_list):
102
+ # Case 1: Active Learning Model
103
  if model_state is not None:
104
  model = MLPHead(512, 2).to(DEVICE)
105
  model.load_state_dict(model_state)
 
112
  _, preds = torch.max(probs, 1)
113
  return probs.cpu().numpy(), preds.cpu().numpy()
114
 
115
+ # Case 2: Zero-shot Text
116
  if text_embed is not None:
117
  X = torch.tensor(np.array(embeddings_list), dtype=torch.float32).to(DEVICE)
118
  text_feat = text_embed.to(DEVICE)
 
126
  preds = (probs_pos > 0.5).long()
127
  return probs.cpu().numpy(), preds.cpu().numpy()
128
 
129
+ # Case 3: Random
130
  n = len(embeddings_list)
131
  return np.ones((n, 2)) / 2, np.zeros(n)
132
 
 
170
  unlabeled: List[int] = field(default_factory=list)
171
  embed_cache: Dict[int, np.ndarray] = field(default_factory=dict)
172
  model_state: Optional[Dict] = None
 
173
  text_query: Optional[str] = None
174
  text_embedding: Optional[torch.Tensor] = None
 
175
  current_batch_ids: List[int] = field(default_factory=list)
176
  current_batch_mode: str = "neutral"
 
177
  history: List[Snapshot] = field(default_factory=list)
178
 
179
  def __init__(self):
180
  self.labeled = {}
 
181
  self.unlabeled = list(range(2000))
182
  random.shuffle(self.unlabeled)
183
  self.embed_cache = {}
 
208
  def init_app():
209
  return SessionState()
210
 
211
+ def get_embeddings_batch(ids: List[int], session: SessionState):
212
+ """Fetch embeddings for a list of IDs, batching GPU calls for missing ones."""
213
+ missing_ids = [i for i in ids if i not in session.embed_cache]
214
+ if missing_ids:
215
+ images = [data_source.get_image(i) for i in missing_ids]
216
+ # BATCHED GPU CALL
217
+ feats = extract_features_batch_gpu(images)
218
+ for i, gid in enumerate(missing_ids):
219
+ session.embed_cache[gid] = feats[i]
220
+ return [session.embed_cache[i] for i in ids]
221
 
222
  def on_set_query(session, query):
223
  if not query:
224
  session.text_query = None
225
  session.text_embedding = None
226
  return session, "Query cleared."
 
227
  session.text_query = query
228
  session.text_embedding = encode_text_gpu(query)
229
  return session, f"Query set: '{query}'. Use 'Verify Positives' now."
230
 
231
  def load_next_batch(session: SessionState, strategy: str):
 
232
  pool_size = min(len(session.unlabeled), 500)
233
  pool = session.unlabeled[:pool_size]
 
234
  if not pool:
235
  return session, [], "No more data"
236
 
237
+ # Batched embedding retrieval
238
+ pool_embeds = get_embeddings_batch(pool, session)
239
+
240
+ # Batched prediction
241
  probs, _ = predict_batch_gpu(session.model_state, session.text_embedding, pool_embeds)
242
 
243
+ if strategy == "Random" or (session.model_state is None and session.text_embedding is None):
244
+ selected_indices = random.sample(range(len(pool)), min(BATCH_SIZE, len(pool)))
 
 
 
 
 
 
245
  session.current_batch_mode = "neutral"
246
  title = "Random Batch: Select Positive items"
 
247
  elif strategy == "Verify Positives":
248
  sort_idx = np.argsort(probs[:, 1])[::-1]
249
  selected_indices = sort_idx[:BATCH_SIZE]
250
  session.current_batch_mode = "verify_pos"
251
  title = "Verify Positives: Select items that are NOT Positive"
 
252
  elif strategy == "Verify Negatives":
253
  sort_idx = np.argsort(probs[:, 1])
254
  selected_indices = sort_idx[:BATCH_SIZE]
255
  session.current_batch_mode = "verify_neg"
256
  title = "Verify Negatives: Select items that are NOT Negative"
 
257
  elif strategy == "Borderline":
258
  scores = np.abs(probs[:, 1] - 0.5)
259
  sort_idx = np.argsort(scores)
 
266
 
267
  session.current_batch_ids = [pool[i] for i in selected_indices]
268
 
 
269
  images = []
270
  for idx in session.current_batch_ids:
271
  img = data_source.get_image(idx)
 
272
  p_idx = pool.index(idx)
273
  conf = probs[p_idx][1]
274
  images.append((img, f"#{idx} ({conf:.0%})"))
 
279
  if not session.current_batch_ids:
280
  return session, [], "Load batch first", 0, 0
281
 
 
282
  session.save_snapshot()
283
+ selected_indices = [int(x) for x in gallery_selected] if gallery_selected else []
 
 
 
284
 
285
  ids_to_remove = []
286
  for i, global_id in enumerate(session.current_batch_ids):
287
  is_selected = i in selected_indices
288
  label = 0
 
289
  if session.current_batch_mode == 'verify_pos':
290
  label = 0 if is_selected else 1
291
  elif session.current_batch_mode == 'verify_neg':
292
  label = 1 if is_selected else 0
293
  else: # Neutral
294
  label = 1 if is_selected else 0
 
295
  session.labeled[global_id] = label
296
  ids_to_remove.append(global_id)
297
 
 
299
  if gid in session.unlabeled:
300
  session.unlabeled.remove(gid)
301
 
302
+ # Batched training (training is always batched internally)
 
 
 
303
  model_state, loss = train_step_gpu(session.model_state, session.labeled, session.embed_cache)
304
  session.model_state = model_state
305
 
306
+ return session, [], f"Trained! Loss: {loss:.4f}", len(session.labeled), len(session.unlabeled)
 
 
307
 
308
  def on_undo_click(session):
309
  success = session.restore_snapshot()
310
  msg = "Undo successful" if success else "Nothing to undo"
311
  return session, msg, len(session.labeled), len(session.unlabeled)
312
 
313
+ def on_load_click(session, strategy):
314
+ session, images, title = load_next_batch(session, strategy)
315
+ return session, images, title
316
+
317
  def render_review_tab(session):
 
318
  pos_ids = [i for i, l in session.labeled.items() if l == 1]
319
  neg_ids = [i for i, l in session.labeled.items() if l == 0]
 
320
  pos_imgs = [(data_source.get_image(i), f"#{i}") for i in pos_ids]
321
  neg_imgs = [(data_source.get_image(i), f"#{i}") for i in neg_ids]
 
322
  return pos_imgs, neg_imgs
323
 
324
  def render_autolabel_tab(session):
325
+ pool = session.unlabeled[:200]
 
326
  if not pool: return []
327
+ # Batched embedding & prediction
328
+ embeds = get_embeddings_batch(pool, session)
329
  probs, preds = predict_batch_gpu(session.model_state, session.text_embedding, embeds)
330
 
 
331
  results = []
332
  for i, idx in enumerate(pool):
333
+ results.append({"id": idx, "conf": probs[i][1], "pred": preds[i]})
 
 
 
 
334
  results.sort(key=lambda x: x["conf"], reverse=True)
335
 
 
336
  out = []
337
  for item in results:
338
  img = data_source.get_image(item["id"])
 
341
  return out
342
 
343
  def export_json(session):
344
+ data = [{"id": k, "label": v} for k, v in session.labeled.items()]
345
  file_path = "/tmp/labels.json"
346
  with open(file_path, "w") as f:
347
  json.dump(data, f, indent=2)
 
351
 
352
  with gr.Blocks(title="FastLabel ZeroGPU") as demo:
353
  session = gr.State(init_app)
 
354
  gr.Markdown("# FastLabel on ZeroGPU (Multi-modal Active Learning)")
355
 
356
  with gr.Tabs():
 
357
  with gr.Tab("Labeling"):
358
  with gr.Row():
359
  with gr.Column(scale=1):
360
  gr.Markdown("### 1. Zero-shot Init")
361
+ txt_query = gr.Textbox(placeholder="e.g. 'smiling'", label="Text Query")
362
  btn_query = gr.Button("Set Query")
 
363
  gr.Markdown("### 2. Strategy")
364
  strategy_drop = gr.Dropdown(
365
  choices=["Random", "Verify Positives", "Verify Negatives", "Borderline"],
366
  value="Random", show_label=False
367
  )
368
  btn_load = gr.Button("Load Batch", variant="primary")
 
369
  gr.Markdown("### 3. Actions")
370
  btn_undo = gr.Button("Undo Last", variant="secondary")
 
371
  gr.Markdown("### Stats")
372
  lbl_count = gr.Number(value=0, label="Labeled")
373
  unlbl_count = gr.Number(value=2000, label="Unlabeled")
 
374
  with gr.Column(scale=3):
375
  info_box = gr.Markdown("### Ready. Set a query or just Load Batch.")
376
+ gallery = gr.Gallery(label="Batch", show_label=False, columns=6, height="auto", allow_preview=False, type="index")
 
 
377
  btn_submit = gr.Button("Confirm & Train", variant="stop")
378
 
 
379
  with gr.Tab("Review"):
380
  btn_refresh_review = gr.Button("Refresh Review")
381
  gr.Markdown("#### Positive")
 
383
  gr.Markdown("#### Negative")
384
  gallery_neg = gr.Gallery(show_label=False, columns=8, height="auto")
385
 
 
386
  with gr.Tab("Autolabel (AI)"):
387
  btn_refresh_auto = gr.Button("Run Inference on Unlabeled Pool")
388
  gr.Markdown("Showing top 200 predictions sorted by confidence.")
389
  gallery_auto = gr.Gallery(show_label=False, columns=8, height="auto")
390
 
 
391
  with gr.Tab("Export"):
392
  btn_export = gr.Button("Generate JSON")
393
  file_output = gr.File(label="Download Labels")
394
 
395
  # --- Wiring ---
 
396
  btn_query.click(on_set_query, [session, txt_query], [session, info_box])
397
+ btn_load.click(on_load_click, [session, strategy_drop], [session, gallery, info_box])
398
+ btn_submit.click(on_submit_click, [session, gallery], [session, gallery, info_box, lbl_count, unlbl_count])
399
+ btn_undo.click(on_undo_click, [session], [session, info_box, lbl_count, unlbl_count])
400
+ btn_refresh_review.click(render_review_tab, [session], [gallery_pos, gallery_neg])
401
+ btn_refresh_auto.click(render_autolabel_tab, [session], [gallery_auto])
402
+ btn_export.click(export_json, [session], [file_output])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
403
 
404
  if __name__ == "__main__":
405
+ demo.launch()