Spaces:

ij
/

ArtistEmbeddingClassifier

Sleeping

App Files Files Community

iljung1106 commited on Dec 14, 2025

Commit

178daad

1 Parent(s): 0ff521b

Make temporary prototype do mix and k-means embedding.

Browse files

Files changed (1) hide show

webui_gradio.py +120 -21

webui_gradio.py CHANGED Viewed

@@ -343,11 +343,59 @@ def _gallery_item_to_pil(item) -> Optional[Image.Image]:
     return None
 def add_prototype(
     label_name: str,
     images: List,
 ) -> str:
-    """Add a temporary prototype (in-memory only, not persisted to disk)."""
     if APP_STATE.lm is None or APP_STATE.db is None:
         return "❌ Click **Load** first."
     lm = APP_STATE.lm
@@ -360,8 +408,15 @@ def add_prototype(
     if not images:
         return "❌ Upload at least 1 image."
-    zs: List[torch.Tensor] = []
     errors: List[str] = []
     for i, x in enumerate(images):
         try:
             im = _gallery_item_to_pil(x)
@@ -369,33 +424,71 @@ def add_prototype(
                 errors.append(f"Image {i}: could not parse format {type(x)}")
                 continue
-            face_pil = None
-            eyes_pil = None
             if ex is not None:
                 rgb = np.array(im.convert("RGB"))
                 face_rgb, eyes_rgb = ex.extract(rgb)
                 if face_rgb is not None:
-                    face_pil = Image.fromarray(face_rgb)
                 if eyes_rgb is not None:
-                    eyes_pil = Image.fromarray(eyes_rgb)
-            wt = _pil_to_tensor(im, lm.T_w)
-            ft = _pil_to_tensor(face_pil, lm.T_f) if face_pil is not None else None
-            et = _pil_to_tensor(eyes_pil, lm.T_e) if eyes_pil is not None else None
             z = embed_triview(lm, whole=wt, face=ft, eyes=et)
             zs.append(z)
-        except Exception as e:
-            errors.append(f"Image {i}: {e}")
             continue
     if not zs:
-        err_detail = "; ".join(errors[:3]) if errors else "unknown error"
-        return f"❌ Could not embed any uploaded images. Details: {err_detail}"
-    center = torch.stack(zs, dim=0).mean(dim=0)
-    lid = db.add_center(label_name, center)
-    return f"✅ Added temporary prototype for `{label_name}` (label_id={lid}). DB now N={db.centers.shape[0]}. ⚠️ This is session-only and will be lost when the Space restarts."
 def build_ui() -> gr.Blocks:
@@ -430,14 +523,20 @@ def build_ui() -> gr.Blocks:
         with gr.Tab("Add prototype (temporary)"):
             gr.Markdown(
                 "### ⚠️ Temporary Prototypes Only\n"
-                "Add a new prototype by averaging embeddings of uploaded whole images.\n"
-                "**These prototypes are session-only** — they will be lost when the Space restarts or goes idle.\n"
-                "Multiple prototypes per label are allowed."
             )
             label = gr.Textbox(label="Label name (artist)", placeholder="e.g. new_artist")
             imgs = gr.Gallery(label="Whole images (1+)", columns=4, rows=2, height=240, allow_preview=True)
             uploader = gr.Files(label="Upload image files (whole)", file_types=["image"], file_count="multiple")
-            add_btn = gr.Button("Add temporary prototype", variant="primary")
             add_status = gr.Markdown("")
             def _files_to_gallery(files):
@@ -453,7 +552,7 @@ def build_ui() -> gr.Blocks:
                 return out
             uploader.change(_files_to_gallery, inputs=[uploader], outputs=[imgs])
-            add_btn.click(add_prototype, inputs=[label, imgs], outputs=[add_status])
     return demo

     return None
+def _kmeans_cosine(Z: torch.Tensor, K: int, iters: int = 20, seed: int = 42) -> torch.Tensor:
+    """
+    K-means clustering in cosine space (CPU only).
+    Returns K cluster centers (normalized).
+    """
+    Z = torch.nn.functional.normalize(Z, dim=1)
+    N, D = Z.shape
+    if N <= K:
+        return Z.clone()
+    # Initialize centers randomly
+    import random
+    random.seed(seed)
+    init_idx = random.sample(range(N), K)
+    C = Z[init_idx].clone()
+    for _ in range(iters):
+        # Assign each point to nearest center
+        sim = Z @ C.t()
+        assign = sim.argmax(dim=1)
+        # Recompute centers
+        new_C = torch.zeros(K, D, dtype=Z.dtype)
+        counts = torch.zeros(K, dtype=torch.long)
+        for i, c in enumerate(assign.tolist()):
+            new_C[c] += Z[i]
+            counts[c] += 1
+        # Handle empty clusters
+        for k in range(K):
+            if counts[k] == 0:
+                # Reinitialize from a random point
+                new_C[k] = Z[random.randint(0, N - 1)]
+                counts[k] = 1
+        C = new_C / counts.unsqueeze(1).clamp_min(1).float()
+        C = torch.nn.functional.normalize(C, dim=1)
+    return C
 def add_prototype(
     label_name: str,
     images: List,
+    k_prototypes: int,
+    n_triplets: int,
 ) -> str:
+    """
+    Add temporary prototypes using random triplet combinations and K-means clustering.
+    Similar to the eval process: extract views, create random triplets, embed, cluster.
+    """
+    import random
     if APP_STATE.lm is None or APP_STATE.db is None:
         return "❌ Click **Load** first."
     lm = APP_STATE.lm
     if not images:
         return "❌ Upload at least 1 image."
+    k_prototypes = max(1, int(k_prototypes))
+    n_triplets = max(1, int(n_triplets))
+    # Step 1: Extract whole/face/eyes from all uploaded images
+    wholes: List[Image.Image] = []
+    faces: List[Image.Image] = []
+    eyes_list: List[Image.Image] = []
     errors: List[str] = []
     for i, x in enumerate(images):
         try:
             im = _gallery_item_to_pil(x)
                 errors.append(f"Image {i}: could not parse format {type(x)}")
                 continue
+            wholes.append(im)
+            # Extract face and eyes
             if ex is not None:
                 rgb = np.array(im.convert("RGB"))
                 face_rgb, eyes_rgb = ex.extract(rgb)
                 if face_rgb is not None:
+                    faces.append(Image.fromarray(face_rgb))
                 if eyes_rgb is not None:
+                    eyes_list.append(Image.fromarray(eyes_rgb))
+        except Exception as e:
+            errors.append(f"Image {i}: {e}")
+            continue
+    if not wholes:
+        err_detail = "; ".join(errors[:3]) if errors else "unknown error"
+        return f"❌ Could not process any images. Details: {err_detail}"
+    # Step 2: Create random triplet combinations
+    # If we have fewer faces/eyes than wholes, we still try to make triplets
+    triplets: List[Tuple[Image.Image, Optional[Image.Image], Optional[Image.Image]]] = []
+    for _ in range(n_triplets):
+        w = random.choice(wholes)
+        f = random.choice(faces) if faces else None
+        e = random.choice(eyes_list) if eyes_list else None
+        triplets.append((w, f, e))
+    # Step 3: Embed all triplets
+    zs: List[torch.Tensor] = []
+    for w, f, e in triplets:
+        try:
+            wt = _pil_to_tensor(w, lm.T_w)
+            ft = _pil_to_tensor(f, lm.T_f) if f is not None else None
+            et = _pil_to_tensor(e, lm.T_e) if e is not None else None
             z = embed_triview(lm, whole=wt, face=ft, eyes=et)
             zs.append(z)
+        except Exception:
             continue
     if not zs:
+        return "❌ Could not embed any triplets."
+    Z = torch.stack(zs, dim=0)
+    Z = torch.nn.functional.normalize(Z, dim=1)
+    # Step 4: Run K-means to get K prototype centers
+    actual_k = min(k_prototypes, len(zs))
+    if actual_k < k_prototypes:
+        # Not enough embeddings for requested K
+        pass
+    centers = _kmeans_cosine(Z, actual_k, iters=20, seed=42)
+    # Step 5: Add all K prototypes to the DB
+    added_ids = []
+    for center in centers:
+        lid = db.add_center(label_name, center)
+        added_ids.append(lid)
+    return (
+        f"✅ Added {len(added_ids)} temporary prototype(s) for `{label_name}` "
+        f"(from {len(wholes)} images, {len(triplets)} triplets, K-means K={actual_k}). "
+        f"DB now N={db.centers.shape[0]}. "
+        f"⚠️ Session-only — lost on Space restart."
+    )
 def build_ui() -> gr.Blocks:
         with gr.Tab("Add prototype (temporary)"):
             gr.Markdown(
                 "### ⚠️ Temporary Prototypes Only\n"
+                "Add prototypes using random triplet combinations and K-means clustering (same as eval process).\n"
+                "1. Upload multiple whole images\n"
+                "2. Face/eyes are auto-extracted from each\n"
+                "3. Random triplets (whole + face + eyes) are created\n"
+                "4. K-means clustering creates K prototype centers\n\n"
+                "**These prototypes are session-only** — lost when the Space restarts."
             )
             label = gr.Textbox(label="Label name (artist)", placeholder="e.g. new_artist")
             imgs = gr.Gallery(label="Whole images (1+)", columns=4, rows=2, height=240, allow_preview=True)
             uploader = gr.Files(label="Upload image files (whole)", file_types=["image"], file_count="multiple")
+            with gr.Row():
+                k_proto = gr.Slider(1, 8, value=4, step=1, label="K (prototypes to create)")
+                n_trips = gr.Slider(4, 64, value=16, step=4, label="N (random triplets to sample)")
+            add_btn = gr.Button("Add temporary prototypes", variant="primary")
             add_status = gr.Markdown("")
             def _files_to_gallery(files):
                 return out
             uploader.change(_files_to_gallery, inputs=[uploader], outputs=[imgs])
+            add_btn.click(add_prototype, inputs=[label, imgs, k_proto, n_trips], outputs=[add_status])
     return demo