Spaces:

righthook75
/

demomule

Paused

App Files Files Community

righthook75 commited on Feb 17

Commit

26a0321

verified ·

1 Parent(s): bf335e8

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +425 -347

app.py CHANGED Viewed

@@ -1,9 +1,10 @@
 import streamlit as st
 from PIL import Image
 from streamlit_drawable_canvas import st_canvas
 from sam3_engine import get_device, load_model, load_model_for_training, combined_prompt_inference
-from viz import overlay_masks, overlay_boxes, overlay_accepted
 from manifest import build_manifest, manifest_to_json, deduplicate
 from training import SAM3FineTuneDataset, freeze_encoder, run_training, get_model_zip_bytes
@@ -15,18 +16,19 @@ CANVAS_MAX_WIDTH = 700
 # --- Session state defaults ---
 defaults = {
-    "step": 1,
     "image": None,
     "filename": None,
     "images": [],                # list of (filename, PIL.Image) tuples
     "image_index": 0,            # current position in batch
     "all_image_detections": [],   # accumulated detections across ALL images
-    "accepted_detections": [],    # per-image accumulated across rounds
-    "prompts": [],               # list of prompt dicts for current image
-    "prompt_counter": 0,         # monotonic counter for prompt IDs
-    "sam_results": [],           # latest SAM3 results for current image
     "label_round": 0,            # iteration counter for canvas key stability
     "canvas_scale": 1.0,         # image-to-canvas scale factor
     "training_loss_history": [],
     "training_complete": False,
     "finetuned_model_bytes": None,
@@ -36,16 +38,6 @@ for key, val in defaults.items():
         st.session_state[key] = val
-def _reset_per_image_state():
-    """Reset state that is specific to a single image."""
-    st.session_state.accepted_detections = []
-    st.session_state.prompts = []
-    st.session_state.prompt_counter = 0
-    st.session_state.sam_results = []
-    st.session_state.label_round = 0
-    st.session_state.canvas_scale = 1.0
 def _load_image_at_index(idx: int):
     """Load the image at the given batch index into session state."""
     filename, image = st.session_state.images[idx]
@@ -58,6 +50,26 @@ def go_to(step: int):
     st.session_state.step = step
 # --- Coordinate scaling helpers ---
 def _canvas_to_image(obj: dict, scale: float):
     """Convert a Fabric.js canvas object to image-space coordinates."""
@@ -80,7 +92,6 @@ def _canvas_to_image(obj: dict, scale: float):
             ],
         }
     elif obj_type == "circle":
-        # Points rendered as small circles
         r = obj.get("radius", 0)
         cx = (left + r * sx) / scale
         cy = (top + r * sy) / scale
@@ -91,56 +102,78 @@ def _canvas_to_image(obj: dict, scale: float):
     return None
-def _prompts_to_fabric_json(prompts: list, scale: float) -> dict:
-    """Convert prompt list to Fabric.js JSON for initial_drawing."""
-    objects = []
-    for p in prompts:
-        if p["type"] == "box":
-            x1, y1, x2, y2 = p["coords"]
-            objects.append({
-                "type": "rect",
-                "left": x1 * scale,
-                "top": y1 * scale,
-                "width": (x2 - x1) * scale,
-                "height": (y2 - y1) * scale,
-                "fill": "rgba(255, 0, 0, 0.1)",
-                "stroke": "red",
-                "strokeWidth": 2,
-                "scaleX": 1,
-                "scaleY": 1,
-            })
-        elif p["type"] == "point":
-            cx, cy = p["coords"]
-            color = "lime" if p.get("point_label", 1) == 1 else "red"
-            objects.append({
-                "type": "circle",
-                "left": cx * scale - 5,
-                "top": cy * scale - 5,
-                "radius": 5,
-                "fill": color,
-                "stroke": color,
-                "strokeWidth": 1,
-                "scaleX": 1,
-                "scaleY": 1,
-            })
-    return {"version": "5.3.0", "objects": objects}
-def _next_prompt_id() -> str:
-    st.session_state.prompt_counter += 1
-    return f"p{st.session_state.prompt_counter}"
-def _accepted_to_prompts(detections: list):
-    """Convert accepted detections into box prompts and add to prompt list."""
-    for det in detections:
-        st.session_state.prompts.append({
-            "id": _next_prompt_id(),
-            "type": "box",
-            "coords": det["box"],
-            "label": det.get("label", ""),
-            "point_label": None,
-        })
 # --- Sidebar ---
@@ -149,11 +182,14 @@ with st.sidebar:
     device = get_device()
     st.caption(f"Device: **{device}**")
     st.caption("Model: `facebook/sam3`")
     st.divider()
-    step_labels = ["Upload", "Label", "Export", "Train"]
     current = st.session_state.step
-    for i, label in enumerate(step_labels, start=1):
         if current == i:
             marker = f"-> {i}. {label}"
         else:
@@ -165,14 +201,10 @@ with st.sidebar:
         st.divider()
         st.metric("Image", f"{st.session_state.image_index + 1} of {n_images}")
-    total_all = len(st.session_state.all_image_detections) + len(st.session_state.accepted_detections)
-    accepted = st.session_state.accepted_detections
-    if total_all:
         st.divider()
-        if n_images > 1:
-            st.metric("Accepted (all images)", total_all)
-        else:
-            st.metric("Total accepted", len(accepted))
     st.divider()
     if st.button("Start over"):
@@ -182,155 +214,84 @@ with st.sidebar:
 # =============================================================================
-# Step 1: Upload
-# =============================================================================
-if st.session_state.step == 1:
-    st.header("Step 1: Upload Images")
-    uploaded_files = st.file_uploader(
-        "Choose one or more images (PNG/JPG)",
-        type=["png", "jpg", "jpeg"],
-        accept_multiple_files=True,
-    )
-    if uploaded_files:
-        images = [(f.name, Image.open(f).convert("RGB")) for f in uploaded_files]
-        st.session_state.images = images
-        st.session_state.image_index = 0
-        # Show thumbnail grid
-        n = len(images)
-        cols = st.columns(min(n, 4))
-        for i, (name, img) in enumerate(images):
-            with cols[i % len(cols)]:
-                st.image(img, caption=name, width="stretch")
-        # Load first image
-        _load_image_at_index(0)
-        label = f"Next: Label images (1 of {n})" if n > 1 else "Next: Label image"
-        if st.button(label):
-            go_to(2)
-            st.rerun()
-# =============================================================================
-# Step 2: Label (interactive canvas + prompts + SAM3)
 # =============================================================================
-elif st.session_state.step == 2:
-    image = st.session_state.image
-    if image is None:
-        st.warning("No image loaded. Go back to Upload.")
-        if st.button("Back to Upload"):
-            go_to(1)
-            st.rerun()
-    else:
-        n_images = len(st.session_state.images)
-        img_idx = st.session_state.image_index
-        img_label = f" ({img_idx + 1} of {n_images})" if n_images > 1 else ""
-        st.header(f"Step 2: Label — {st.session_state.filename}{img_label}")
-        # Compute canvas dimensions
-        img_w, img_h = image.size
-        canvas_w = min(img_w, CANVAS_MAX_WIDTH)
-        scale = canvas_w / img_w
-        canvas_h = int(img_h * scale)
-        st.session_state.canvas_scale = scale
-        # Build background image with accepted detections + SAM results overlay
-        bg = image.copy()
-        if st.session_state.accepted_detections:
-            bg = overlay_accepted(bg, st.session_state.accepted_detections)
-        if st.session_state.sam_results:
-            masks = [d["mask"] for d in st.session_state.sam_results]
-            boxes = [d["box"] for d in st.session_state.sam_results]
-            bg = overlay_masks(bg, masks)
-            bg = overlay_boxes(bg, boxes)
-        bg_rgb = bg.convert("RGB")
-        # --- Two-column layout ---
-        col_canvas, col_controls = st.columns([3, 2])
-        with col_controls:
-            st.subheader("Prompts")
-            # Text prompt input
-            text_col, btn_col = st.columns([3, 1])
-            with text_col:
-                text_input = st.text_input("Text prompt", key="text_prompt_input", label_visibility="collapsed", placeholder="Describe objects to find...")
-            with btn_col:
-                if st.button("Add text", disabled=not text_input):
-                    st.session_state.prompts.append({
-                        "id": _next_prompt_id(),
-                        "type": "text",
-                        "coords": [],
-                        "label": text_input,
-                        "point_label": None,
-                    })
-                    st.rerun()
-            # Prompt table
-            prompts = st.session_state.prompts
-            if prompts:
-                st.caption(f"{len(prompts)} prompt(s)")
-                for i, p in enumerate(prompts):
-                    pcol1, pcol2, pcol3, pcol4 = st.columns([1, 2, 3, 1])
-                    with pcol1:
-                        st.text(p["id"])
-                    with pcol2:
-                        st.text(p["type"])
-                    with pcol3:
-                        new_label = st.text_input(
-                            "label", value=p.get("label", ""), key=f"plabel_{p['id']}",
-                            label_visibility="collapsed",
-                        )
-                        if new_label != p.get("label", ""):
-                            st.session_state.prompts[i]["label"] = new_label
-                    with pcol4:
-                        if p["type"] == "point":
-                            is_pos = p.get("point_label", 1) == 1
-                            toggled = st.checkbox("+", value=is_pos, key=f"ptoggle_{p['id']}")
-                            st.session_state.prompts[i]["point_label"] = 1 if toggled else 0
-                        if st.button("X", key=f"pdel_{p['id']}"):
-                            st.session_state.prompts.pop(i)
-                            st.rerun()
-            else:
-                st.caption("No prompts yet. Draw boxes or points on the canvas, or add text prompts above.")
-            # Threshold
-            threshold = st.slider("Confidence threshold", 0.0, 1.0, 0.5, 0.05, key="label_threshold")
-            # Run SAM3 button
-            @st.fragment
-            def run_sam3():
-                prompts = st.session_state.prompts
-                has_prompts = len(prompts) > 0
-                if st.button("Run SAM3", type="primary", disabled=not has_prompts):
-                    # Gather prompts by type
-                    text_parts = [p["label"] for p in prompts if p["type"] == "text" and p["label"]]
-                    text_combined = ". ".join(text_parts) if text_parts else None
-                    box_list = [p["coords"] for p in prompts if p["type"] == "box" and len(p["coords"]) == 4]
-                    box_list = box_list if box_list else None
-                    pt_prompts = [p for p in prompts if p["type"] == "point" and len(p["coords"]) == 2]
-                    points = [p["coords"] for p in pt_prompts] if pt_prompts else None
-                    point_labels = [p.get("point_label", 1) for p in pt_prompts] if pt_prompts else None
-                    status = st.status("Running SAM3 inference...", expanded=True)
-                    status.write(f"Running on {get_device()} with {len(prompts)} prompt(s)...")
-                    results = combined_prompt_inference(
-                        image, text=text_combined, boxes=box_list,
-                        points=points, point_labels=point_labels,
-                        threshold=threshold,
-                    )
-                    status.write(f"Found {len(results)} objects!")
-                    status.update(label="Inference complete", state="complete")
-                    st.session_state.sam_results = results
-                    st.session_state.label_round += 1
-                    st.rerun(scope="app")
-            run_sam3()
-        with col_canvas:
-            # Drawing mode selector
             drawing_mode = st.radio(
                 "Drawing mode",
                 ["rect", "point", "transform"],
@@ -338,12 +299,6 @@ elif st.session_state.step == 2:
                 key="drawing_mode",
             )
-            # Build initial_drawing from existing prompts
-            initial = _prompts_to_fabric_json(
-                [p for p in st.session_state.prompts if p["type"] in ("box", "point")],
-                scale,
-            )
             canvas_result = st_canvas(
                 fill_color="rgba(255, 0, 0, 0.1)",
                 stroke_width=2,
@@ -353,118 +308,257 @@ elif st.session_state.step == 2:
                 height=canvas_h,
                 drawing_mode=drawing_mode,
                 point_display_radius=5,
-                initial_drawing=initial,
                 key=f"canvas_{img_idx}_{st.session_state.label_round}",
             )
-            # Sync canvas objects back to prompts
             if canvas_result.json_data is not None:
                 canvas_objects = canvas_result.json_data.get("objects", [])
-                # Count non-text prompts currently in state
-                spatial_prompts = [p for p in st.session_state.prompts if p["type"] in ("box", "point")]
-                n_existing = len(spatial_prompts)
                 n_canvas = len(canvas_objects)
-                if n_canvas > n_existing:
-                    # New objects drawn on canvas — append them
-                    for obj in canvas_objects[n_existing:]:
-                        converted = _canvas_to_image(obj, scale)
-                        if converted:
-                            st.session_state.prompts.append({
-                                "id": _next_prompt_id(),
-                                "type": converted["type"],
-                                "coords": converted["coords"],
-                                "label": "",
-                                "point_label": 1 if converted["type"] == "point" else None,
-                            })
-                    st.rerun()
-        # --- SAM3 Results Section ---
-        if st.session_state.sam_results:
-            st.divider()
-            st.subheader(f"SAM3 Results — {len(st.session_state.sam_results)} detections")
-            # Show overlay
-            results_vis = image.copy()
-            if st.session_state.accepted_detections:
-                results_vis = overlay_accepted(results_vis, st.session_state.accepted_detections)
-            masks = [d["mask"] for d in st.session_state.sam_results]
-            boxes = [d["box"] for d in st.session_state.sam_results]
-            det_labels = [f"Detection {i+1}" for i in range(len(boxes))]
-            results_vis = overlay_masks(results_vis, masks)
-            results_vis = overlay_boxes(results_vis, boxes, labels=det_labels)
-            st.image(results_vis, caption="SAM3 results", width="stretch")
-            # Batch accept
-            batch_label = st.text_input("Label for all detections", key="batch_accept_label")
-            col_accept, col_discard = st.columns(2)
-            with col_accept:
-                if st.button("Accept all", type="primary", disabled=not batch_label):
-                    for det in st.session_state.sam_results:
-                        det["accepted"] = True
-                        det["label"] = batch_label
-                    new_accepted = st.session_state.sam_results
-                    unique = deduplicate(new_accepted, st.session_state.accepted_detections)
-                    st.session_state.accepted_detections.extend(unique)
-                    _accepted_to_prompts(unique)
-                    st.session_state.sam_results = []
-                    st.rerun()
-            with col_discard:
-                if st.button("Discard all"):
-                    st.session_state.sam_results = []
-                    st.rerun()
-            # Individual review in expander
-            with st.expander("Review individual detections"):
-                for i, det in enumerate(st.session_state.sam_results):
-                    det_col1, det_col2 = st.columns([3, 1])
-                    with det_col1:
-                        st.text(f"Detection {i+1} — score: {det['score']:.3f} — box: [{', '.join(f'{c:.0f}' for c in det['box'])}]")
-                    with det_col2:
-                        ind_label = st.text_input("Label", key=f"det_label_{i}", label_visibility="collapsed")
-                        if st.button("Accept", key=f"det_accept_{i}"):
-                            det["accepted"] = True
-                            det["label"] = ind_label
-                            unique = deduplicate([det], st.session_state.accepted_detections)
-                            st.session_state.accepted_detections.extend(unique)
-                            _accepted_to_prompts(unique)
-                            st.session_state.sam_results.pop(i)
-                            st.rerun()
-        # Accepted count
-        if st.session_state.accepted_detections:
             st.divider()
-            st.info(f"**{len(st.session_state.accepted_detections)}** accepted detections for this image")
-        # --- Navigation ---
         st.divider()
-        nav_cols = st.columns(3)
-        with nav_cols[0]:
-            if st.button("Back to Upload"):
-                go_to(1)
-                st.rerun()
-        with nav_cols[1]:
-            has_next_image = n_images > 1 and img_idx < n_images - 1
-            if has_next_image:
-                next_name = st.session_state.images[img_idx + 1][0]
-                if st.button(f"Next image: {next_name}"):
-                    # Stamp image_path and merge
-                    for det in st.session_state.accepted_detections:
-                        det["image_path"] = st.session_state.filename
-                    st.session_state.all_image_detections.extend(st.session_state.accepted_detections)
-                    _reset_per_image_state()
-                    _load_image_at_index(img_idx + 1)
-                    st.rerun()
-        with nav_cols[2]:
-            total = len(st.session_state.all_image_detections) + len(st.session_state.accepted_detections)
-            if st.button(f"Done — Export ({total} detections)" if total else "Done — Export"):
-                # Stamp and merge current image
-                for det in st.session_state.accepted_detections:
-                    det["image_path"] = st.session_state.filename
-                st.session_state.all_image_detections.extend(st.session_state.accepted_detections)
-                st.session_state.accepted_detections = []
-                go_to(3)
-                st.rerun()
 # =============================================================================
 # Step 3: Export
@@ -473,7 +567,6 @@ elif st.session_state.step == 3:
     st.header("Step 3: Export Manifest")
     combined = list(st.session_state.all_image_detections)
-    # Re-index combined IDs
     for i, det in enumerate(combined):
         det["id"] = i
@@ -514,14 +607,11 @@ elif st.session_state.step == 3:
 elif st.session_state.step == 4:
     st.header("Step 4: Fine-Tune SAM3")
-    # Build combined detections list
     combined_dets = list(st.session_state.all_image_detections)
-    # Stamp image_path on detections if not set
     for det in combined_dets:
         if "image_path" not in det:
             det["image_path"] = st.session_state.filename
-    # Only keep detections with masks
     train_dets = [d for d in combined_dets if d.get("accepted") and d.get("mask") is not None]
     image_names = list(set(d["image_path"] for d in train_dets))
@@ -533,7 +623,6 @@ elif st.session_state.step == 4:
             go_to(3)
             st.rerun()
     else:
-        # Hyperparameters
         col_ep, col_lr = st.columns(2)
         with col_ep:
             epochs = st.slider("Epochs", 1, 50, 5, key="train_epochs")
@@ -554,7 +643,6 @@ elif st.session_state.step == 4:
                 processor = None
                 result = None
                 try:
-                    # 1. Free GPU memory from cached inference model
                     status = st.status("Preparing for training...", expanded=True)
                     status.write("Clearing cached inference model to free GPU memory...")
                     load_model.clear()
@@ -563,20 +651,16 @@ elif st.session_state.step == 4:
                     elif _torch.backends.mps.is_available():
                         _torch.mps.empty_cache()
-                    # 2. Load fresh trainable model
                     status.write("Loading fresh model for training...")
                     processor, model = load_model_for_training()
-                    # 3. Freeze encoder
                     trainable, total = freeze_encoder(model)
                     status.write(f"Frozen encoder. Trainable params: {trainable:,} / {total:,}")
-                    # 4. Build dataset
                     images_dict = {name: img for name, img in st.session_state.images}
                     dataset = SAM3FineTuneDataset(images_dict, train_dets, processor)
                     status.write(f"Dataset ready: {len(dataset)} samples")
-                    # 5. Train with progress bar
                     status.update(label="Training...", state="running")
                     progress_bar = st.progress(0, text="Starting training...")
@@ -589,14 +673,12 @@ elif st.session_state.step == 4:
                     st.session_state.training_loss_history = result["loss_history"]
-                    # 6. Save model zip
                     status.write("Packaging fine-tuned model...")
                     st.session_state.finetuned_model_bytes = get_model_zip_bytes(result["model"], processor)
                     st.session_state.training_complete = True
                     status.update(label="Training complete!", state="complete")
                 finally:
-                    # Always clean up GPU memory, even if stopped/interrupted
                     del model, processor, result
                     if _torch.cuda.is_available():
                         _torch.cuda.empty_cache()
@@ -605,17 +687,13 @@ elif st.session_state.step == 4:
                 st.rerun()
         else:
-            # Post-training UI
             st.success("Training complete!")
-            # Loss curve
             loss_hist = st.session_state.training_loss_history
             if loss_hist:
-                import pandas as pd
                 df = pd.DataFrame({"Epoch": range(1, len(loss_hist) + 1), "Avg Loss": loss_hist})
                 st.line_chart(df, x="Epoch", y="Avg Loss")
-            # Download button
             if st.session_state.finetuned_model_bytes:
                 st.download_button(
                     label="Download fine-tuned model (.zip)",

 import streamlit as st
+import pandas as pd
 from PIL import Image
 from streamlit_drawable_canvas import st_canvas
 from sam3_engine import get_device, load_model, load_model_for_training, combined_prompt_inference
+from viz import overlay_detections_by_class, _hex_to_rgb, CLASS_COLORS
 from manifest import build_manifest, manifest_to_json, deduplicate
 from training import SAM3FineTuneDataset, freeze_encoder, run_training, get_model_zip_bytes
 # --- Session state defaults ---
 defaults = {
+    "step": 2,
     "image": None,
     "filename": None,
     "images": [],                # list of (filename, PIL.Image) tuples
     "image_index": 0,            # current position in batch
     "all_image_detections": [],   # accumulated detections across ALL images
+    "classes": [],               # list of class dicts
+    "pending_box_coords": None,  # drawn box awaiting class assignment
+    "detection_id_counter": 0,   # monotonic ID for detections
     "label_round": 0,            # iteration counter for canvas key stability
     "canvas_scale": 1.0,         # image-to-canvas scale factor
+    "_last_canvas_count": 0,     # track canvas object count for new-drawing detection
+    "selected_detection_id": None, # ID of detection selected for highlighting
     "training_loss_history": [],
     "training_complete": False,
     "finetuned_model_bytes": None,
         st.session_state[key] = val
 def _load_image_at_index(idx: int):
     """Load the image at the given batch index into session state."""
     filename, image = st.session_state.images[idx]
     st.session_state.step = step
+def _next_detection_id() -> int:
+    st.session_state.detection_id_counter += 1
+    return st.session_state.detection_id_counter
+def _get_current_image_detections(visible_only=False):
+    """Get all detections for the current image across all classes."""
+    fname = st.session_state.filename
+    if not fname:
+        return []
+    dets = []
+    for cls in st.session_state.classes:
+        if visible_only and not cls["visible"]:
+            continue
+        for det in cls["detections"]:
+            if det.get("image_path") == fname:
+                dets.append(det)
+    return dets
 # --- Coordinate scaling helpers ---
 def _canvas_to_image(obj: dict, scale: float):
     """Convert a Fabric.js canvas object to image-space coordinates."""
             ],
         }
     elif obj_type == "circle":
         r = obj.get("radius", 0)
         cx = (left + r * sx) / scale
         cy = (top + r * sy) / scale
     return None
+def _add_class(name: str):
+    """Create a new class and return it."""
+    color = CLASS_COLORS[len(st.session_state.classes) % len(CLASS_COLORS)]
+    cls = {
+        "name": name,
+        "color": color,
+        "visible": True,
+        "threshold": 0.85,
+        "detections": [],
+    }
+    st.session_state.classes.append(cls)
+    return cls
+@st.dialog("Assign to Class")
+def assign_drawing_dialog():
+    """Modal dialog for assigning a drawn box/point to a class."""
+    pending = st.session_state.pending_box_coords
+    if pending is None:
+        st.warning("No pending drawing.")
+        return
+    st.write(f"New **{pending['type']}** drawn. Choose a class to assign it to:")
+    # Existing class selector
+    class_names = [c["name"] for c in st.session_state.classes]
+    chosen_existing = None
+    if class_names:
+        chosen_existing = st.selectbox("Existing class", class_names, key="dlg_class_select")
+    # Or create a new class
+    st.divider()
+    new_name = st.text_input("Or create a new class", key="dlg_new_class", placeholder="e.g. Cable, Label...")
+    st.divider()
+    assign_col, cancel_col = st.columns(2)
+    with assign_col:
+        can_assign = bool(new_name) or bool(chosen_existing)
+        if st.button("Assign", type="primary", disabled=not can_assign, use_container_width=True):
+            # Determine target class
+            if new_name:
+                existing_names = {c["name"] for c in st.session_state.classes}
+                if new_name not in existing_names:
+                    target_cls = _add_class(new_name)
+                else:
+                    target_cls = next(c for c in st.session_state.classes if c["name"] == new_name)
+            else:
+                target_cls = next(c for c in st.session_state.classes if c["name"] == chosen_existing)
+            det = {
+                "id": _next_detection_id(),
+                "mask": None,
+                "box": pending["coords"] if pending["type"] == "box" else [
+                    pending["coords"][0] - 10, pending["coords"][1] - 10,
+                    pending["coords"][0] + 10, pending["coords"][1] + 10,
+                ],
+                "score": 1.0,
+                "label": target_cls["name"],
+                "accepted": True,
+                "image_path": st.session_state.filename,
+            }
+            target_cls["detections"].append(det)
+            st.session_state.pending_box_coords = None
+            st.session_state.label_round += 1
+            st.session_state._last_canvas_count = 0
+            st.rerun()
+    with cancel_col:
+        if st.button("Cancel", use_container_width=True):
+            st.session_state.pending_box_coords = None
+            st.session_state.label_round += 1
+            st.session_state._last_canvas_count = 0
+            st.rerun()
 # --- Sidebar ---
     device = get_device()
     st.caption(f"Device: **{device}**")
     st.caption("Model: `facebook/sam3`")
+    with st.spinner("Loading SAM3 model..."):
+        load_model()
+    st.caption("Model loaded")
     st.divider()
+    step_labels = ["Label", "Export", "Train"]
     current = st.session_state.step
+    for i, label in enumerate(step_labels, start=2):
         if current == i:
             marker = f"-> {i}. {label}"
         else:
         st.divider()
         st.metric("Image", f"{st.session_state.image_index + 1} of {n_images}")
+    total_dets = sum(len(c["detections"]) for c in st.session_state.classes)
+    if total_dets:
         st.divider()
+        st.metric("Total detections", total_dets)
     st.divider()
     if st.button("Start over"):
 # =============================================================================
+# Step 2: Label (3-column class-centric layout)
 # =============================================================================
+if st.session_state.step == 2:
+    col_files, col_canvas, col_controls = st.columns([1, 3, 2])
+    # --- Left column: File list ---
+    with col_files:
+        st.subheader("Images")
+        uploaded_files = st.file_uploader(
+            "Upload images",
+            type=["png", "jpg", "jpeg"],
+            accept_multiple_files=True,
+            label_visibility="collapsed",
+        )
+        if uploaded_files:
+            existing_names = {name for name, _ in st.session_state.images}
+            for f in uploaded_files:
+                if f.name not in existing_names:
+                    st.session_state.images.append((f.name, Image.open(f).convert("RGB")))
+                    existing_names.add(f.name)
+            # Auto-load first image if none loaded
+            if st.session_state.image is None and st.session_state.images:
+                _load_image_at_index(0)
+                st.rerun()
+        # Show file list with thumbnails
+        if st.session_state.images:
+            filenames = [name for name, _ in st.session_state.images]
+            for i, (name, img) in enumerate(st.session_state.images):
+                st.image(img, width=100)
+                is_current = (i == st.session_state.image_index)
+                if st.button(
+                    name,
+                    key=f"file_select_{i}",
+                    type="primary" if is_current else "secondary",
+                    use_container_width=True,
+                ):
+                    if not is_current:
+                        _load_image_at_index(i)
+                        st.session_state.label_round += 1
+                        st.session_state._last_canvas_count = 0
+                        st.session_state.pending_box_coords = None
+                        st.session_state.selected_detection_id = None
+                        st.rerun()
+    # --- Center column: Canvas ---
+    with col_canvas:
+        image = st.session_state.image
+        if image is None:
+            st.info("Upload images in the left panel to get started.")
+        else:
+            img_idx = st.session_state.image_index
+            n_images = len(st.session_state.images)
+            img_label = f" ({img_idx + 1} of {n_images})" if n_images > 1 else ""
+            st.subheader(f"{st.session_state.filename}{img_label}")
+            # Compute canvas dimensions
+            img_w, img_h = image.size
+            canvas_w = min(img_w, CANVAS_MAX_WIDTH)
+            scale = canvas_w / img_w
+            canvas_h = int(img_h * scale)
+            st.session_state.canvas_scale = scale
+            # Build background with visible detections overlaid
+            visible_dets = _get_current_image_detections(visible_only=True)
+            bg = image.copy()
+            if visible_dets:
+                # Build color map from class definitions
+                color_map = {}
+                for cls in st.session_state.classes:
+                    if cls["visible"]:
+                        color_map[cls["name"]] = _hex_to_rgb(cls["color"])
+                color_map[""] = (180, 180, 180)
+                hl_ids = {st.session_state.selected_detection_id} if st.session_state.selected_detection_id is not None else None
+                bg = overlay_detections_by_class(bg, visible_dets, color_override=color_map, highlight_ids=hl_ids)
+            bg_rgb = bg.convert("RGB")
+            # Drawing mode
             drawing_mode = st.radio(
                 "Drawing mode",
                 ["rect", "point", "transform"],
                 key="drawing_mode",
             )
             canvas_result = st_canvas(
                 fill_color="rgba(255, 0, 0, 0.1)",
                 stroke_width=2,
                 height=canvas_h,
                 drawing_mode=drawing_mode,
                 point_display_radius=5,
                 key=f"canvas_{img_idx}_{st.session_state.label_round}",
             )
+            # Detect new drawings
             if canvas_result.json_data is not None:
                 canvas_objects = canvas_result.json_data.get("objects", [])
                 n_canvas = len(canvas_objects)
+                last_count = st.session_state._last_canvas_count
+                if n_canvas > last_count and st.session_state.pending_box_coords is None:
+                    # New object drawn — convert the last one
+                    new_obj = canvas_objects[-1]
+                    converted = _canvas_to_image(new_obj, scale)
+                    if converted:
+                        st.session_state.pending_box_coords = converted
+                        st.session_state._last_canvas_count = n_canvas
+                        st.rerun()
+            # Open assignment dialog when a new drawing is pending
+            if st.session_state.pending_box_coords is not None:
+                assign_drawing_dialog()
+    # --- Right column: Class controls ---
+    with col_controls:
+        st.subheader("Classes")
+        # Class input
+        new_class = st.text_input("New class name", key="new_class_input", placeholder="e.g. Server, Cable, Label...")
+        if new_class:
+            existing_names = {c["name"] for c in st.session_state.classes}
+            if new_class not in existing_names:
+                color = CLASS_COLORS[len(st.session_state.classes) % len(CLASS_COLORS)]
+                st.session_state.classes.append({
+                    "name": new_class,
+                    "color": color,
+                    "visible": True,
+                    "threshold": 0.85,
+                    "detections": [],
+                })
+                st.rerun()
+        # Class cards
+        classes_to_delete = []
+        dets_to_delete = []  # list of (class_idx, det_id)
+        find_single_class_idx = None  # index of class to run per-class find
+        for ci, cls in enumerate(st.session_state.classes):
+            with st.container(border=True):
+                # Header row
+                hcol_name, hcol_vis, hcol_del = st.columns([3, 1, 1])
+                with hcol_name:
+                    st.markdown(
+                        f"<span style='color:{cls['color']};font-weight:bold;font-size:1.1em'>"
+                        f"{cls['name']}</span>",
+                        unsafe_allow_html=True,
+                    )
+                with hcol_vis:
+                    vis = st.checkbox("👁", value=cls["visible"], key=f"vis_{ci}", label_visibility="collapsed")
+                    if vis != cls["visible"]:
+                        st.session_state.classes[ci]["visible"] = vis
+                        st.rerun()
+                with hcol_del:
+                    if st.button("🗑", key=f"del_class_{ci}"):
+                        classes_to_delete.append(ci)
+                # Detections for current image — colored buttons
+                fname = st.session_state.filename
+                if fname:
+                    img_dets = [d for d in cls["detections"] if d.get("image_path") == fname]
+                    if img_dets:
+                        for det in img_dets:
+                            dcol_label, dcol_del = st.columns([4, 1])
+                            with dcol_label:
+                                is_sel = st.session_state.selected_detection_id == det["id"]
+                                # Colored detection button via markdown + button
+                                border_style = "3px solid yellow" if is_sel else f"2px solid {cls['color']}"
+                                st.markdown(
+                                    f"<div style='background:{cls['color']}22;border:{border_style};"
+                                    f"border-radius:6px;padding:4px 8px;text-align:center;"
+                                    f"color:{cls['color']};font-weight:600;cursor:default'>"
+                                    f"{cls['name']} {det['id']} — {det['score']:.0%}</div>",
+                                    unsafe_allow_html=True,
+                                )
+                                if st.button(
+                                    "Select" if not is_sel else "Deselect",
+                                    key=f"sel_det_{ci}_{det['id']}",
+                                    use_container_width=True,
+                                ):
+                                    if is_sel:
+                                        st.session_state.selected_detection_id = None
+                                    else:
+                                        st.session_state.selected_detection_id = det["id"]
+                                    st.rerun()
+                            with dcol_del:
+                                if st.button("🗑", key=f"del_det_{ci}_{det['id']}"):
+                                    dets_to_delete.append((ci, det["id"]))
+                    else:
+                        st.caption("No detections on this image")
+                # Per-class confidence threshold
+                new_thresh = st.slider(
+                    "Confidence threshold", 0.0, 1.0, cls["threshold"], 0.05,
+                    key=f"thresh_{ci}",
+                )
+                st.caption(f"Default 85%")
+                if new_thresh != cls["threshold"]:
+                    st.session_state.classes[ci]["threshold"] = new_thresh
+                # Per-class Find Objects button
+                if st.session_state.image is not None:
+                    if st.button(f"🔍 Find Objects for this Class", key=f"find_class_{ci}", use_container_width=True):
+                        find_single_class_idx = ci
+        # Process deletions
+        if classes_to_delete:
+            for ci in sorted(classes_to_delete, reverse=True):
+                st.session_state.classes.pop(ci)
+            st.rerun()
+        if dets_to_delete:
+            for ci, det_id in dets_to_delete:
+                if st.session_state.selected_detection_id == det_id:
+                    st.session_state.selected_detection_id = None
+                st.session_state.classes[ci]["detections"] = [
+                    d for d in st.session_state.classes[ci]["detections"] if d["id"] != det_id
+                ]
+            st.session_state.label_round += 1
+            st.session_state._last_canvas_count = 0
+            st.rerun()
+        # --- Per-class Find Objects execution ---
+        if find_single_class_idx is not None:
+            cls = st.session_state.classes[find_single_class_idx]
+            image = st.session_state.image
+            fname = st.session_state.filename
+            status = st.status(f"Finding {cls['name']}...", expanded=True)
+            status.write(f"Running on {get_device()} (threshold {cls['threshold']:.0%})...")
+            existing_boxes = [
+                d["box"] for d in cls["detections"]
+                if d.get("image_path") == fname
+            ]
+            dets = combined_prompt_inference(
+                image,
+                text=cls["name"],
+                boxes=existing_boxes if existing_boxes else None,
+                threshold=cls["threshold"],
+            )
+            for d in dets:
+                d["label"] = cls["name"]
+                d["accepted"] = True
+                d["image_path"] = fname
+                d["id"] = _next_detection_id()
+            existing_for_class = [
+                d for d in cls["detections"]
+                if d.get("image_path") == fname
+            ]
+            unique = deduplicate(dets, existing_for_class) if existing_for_class else dets
+            cls["detections"].extend(unique)
+            status.write(f"Found {len(unique)} new {cls['name']} detection(s)")
+            status.update(label=f"Found {len(unique)} {cls['name']}", state="complete")
+            st.session_state.label_round += 1
+            st.session_state._last_canvas_count = 0
+            st.rerun()
+        # --- Find Objects for ALL classes button (with confirmation) ---
+        if st.session_state.classes and st.session_state.image is not None:
             st.divider()
+            @st.fragment
+            def find_all_objects():
+                if "confirm_find_all" not in st.session_state:
+                    st.session_state.confirm_find_all = False
+                if not st.session_state.confirm_find_all:
+                    if st.button("Find Objects for all classes", use_container_width=True):
+                        st.session_state.confirm_find_all = True
+                        st.rerun(scope="fragment")
+                else:
+                    st.warning(f"This will run SAM3 for **{len(st.session_state.classes)}** class(es). Continue?")
+                    yes_col, no_col = st.columns(2)
+                    with yes_col:
+                        if st.button("Yes, find all", type="primary", use_container_width=True):
+                            st.session_state.confirm_find_all = False
+                            image = st.session_state.image
+                            fname = st.session_state.filename
+                            status = st.status("Running SAM3 inference...", expanded=True)
+                            status.write(f"Running on {get_device()}...")
+                            for cls in st.session_state.classes:
+                                status.write(f"Finding **{cls['name']}** (threshold {cls['threshold']:.0%})...")
+                                existing_boxes = [
+                                    d["box"] for d in cls["detections"]
+                                    if d.get("image_path") == fname
+                                ]
+                                dets = combined_prompt_inference(
+                                    image,
+                                    text=cls["name"],
+                                    boxes=existing_boxes if existing_boxes else None,
+                                    threshold=cls["threshold"],
+                                )
+                                for d in dets:
+                                    d["label"] = cls["name"]
+                                    d["accepted"] = True
+                                    d["image_path"] = fname
+                                    d["id"] = _next_detection_id()
+                                existing_for_class = [
+                                    d for d in cls["detections"]
+                                    if d.get("image_path") == fname
+                                ]
+                                unique = deduplicate(dets, existing_for_class) if existing_for_class else dets
+                                cls["detections"].extend(unique)
+                                status.write(f"  → {len(unique)} new {cls['name']} detection(s)")
+                            status.update(label="Inference complete", state="complete")
+                            st.session_state.label_round += 1
+                            st.session_state._last_canvas_count = 0
+                            st.rerun(scope="app")
+                    with no_col:
+                        if st.button("Cancel", use_container_width=True):
+                            st.session_state.confirm_find_all = False
+                            st.rerun(scope="fragment")
+            find_all_objects()
+        # --- Update Label Manifest button ---
+        if st.session_state.classes:
+            st.divider()
+            if st.button("Update Label Manifest", use_container_width=True):
+                all_dets = []
+                for cls in st.session_state.classes:
+                    all_dets.extend(cls["detections"])
+                st.session_state.all_image_detections = all_dets
+                st.success(f"Manifest updated: {len(all_dets)} detections")
+    # --- Navigation ---
+    if st.session_state.image is not None:
         st.divider()
+        total = sum(len(c["detections"]) for c in st.session_state.classes)
+        if st.button(f"Done — Export ({total} detections)" if total else "Done — Export"):
+            # Flatten all class detections into all_image_detections
+            all_dets = []
+            for cls in st.session_state.classes:
+                all_dets.extend(cls["detections"])
+            st.session_state.all_image_detections = all_dets
+            go_to(3)
+            st.rerun()
 # =============================================================================
 # Step 3: Export
     st.header("Step 3: Export Manifest")
     combined = list(st.session_state.all_image_detections)
     for i, det in enumerate(combined):
         det["id"] = i
 elif st.session_state.step == 4:
     st.header("Step 4: Fine-Tune SAM3")
     combined_dets = list(st.session_state.all_image_detections)
     for det in combined_dets:
         if "image_path" not in det:
             det["image_path"] = st.session_state.filename
     train_dets = [d for d in combined_dets if d.get("accepted") and d.get("mask") is not None]
     image_names = list(set(d["image_path"] for d in train_dets))
             go_to(3)
             st.rerun()
     else:
         col_ep, col_lr = st.columns(2)
         with col_ep:
             epochs = st.slider("Epochs", 1, 50, 5, key="train_epochs")
                 processor = None
                 result = None
                 try:
                     status = st.status("Preparing for training...", expanded=True)
                     status.write("Clearing cached inference model to free GPU memory...")
                     load_model.clear()
                     elif _torch.backends.mps.is_available():
                         _torch.mps.empty_cache()
                     status.write("Loading fresh model for training...")
                     processor, model = load_model_for_training()
                     trainable, total = freeze_encoder(model)
                     status.write(f"Frozen encoder. Trainable params: {trainable:,} / {total:,}")
                     images_dict = {name: img for name, img in st.session_state.images}
                     dataset = SAM3FineTuneDataset(images_dict, train_dets, processor)
                     status.write(f"Dataset ready: {len(dataset)} samples")
                     status.update(label="Training...", state="running")
                     progress_bar = st.progress(0, text="Starting training...")
                     st.session_state.training_loss_history = result["loss_history"]
                     status.write("Packaging fine-tuned model...")
                     st.session_state.finetuned_model_bytes = get_model_zip_bytes(result["model"], processor)
                     st.session_state.training_complete = True
                     status.update(label="Training complete!", state="complete")
                 finally:
                     del model, processor, result
                     if _torch.cuda.is_available():
                         _torch.cuda.empty_cache()
                 st.rerun()
         else:
             st.success("Training complete!")
             loss_hist = st.session_state.training_loss_history
             if loss_hist:
                 df = pd.DataFrame({"Epoch": range(1, len(loss_hist) + 1), "Avg Loss": loss_hist})
                 st.line_chart(df, x="Epoch", y="Avg Loss")
             if st.session_state.finetuned_model_bytes:
                 st.download_button(
                     label="Download fine-tuned model (.zip)",