Spaces:

kljunmasa
/

LightlyTrain-Demo

Sleeping

App Files Files Community

masakljun commited on Dec 19, 2025

Commit

0dd123c

1 Parent(s): 50014ba

add segmentation

Browse files

Files changed (1) hide show

app.py +98 -61

app.py CHANGED Viewed

@@ -1,12 +1,31 @@
 import gradio as gr
 import numpy as np
 import supervision as sv
 from PIL import Image
 import lightly_train
 # --- CONFIGURATION ---
-# 1. DEFINE CLASS LABELS (COCO DATASET)
 COCO_CLASSES = [
     "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
     "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
@@ -19,46 +38,43 @@ COCO_CLASSES = [
     "scissors", "teddy bear", "hair drier", "toothbrush"
 ]
-# 2. DEFINE AVAILABLE MODELS
-MODEL_CHOICES = [
-    "dinov3/vitt16-ltdetr-coco",        # Large (Vision Transformer) - High Accuracy
-    "dinov3/convnext-base-ltdetr-coco", # Base - Balanced
-    "dinov3/convnext-small-ltdetr-coco",# Small - Faster
-    "dinov3/convnext-tiny-ltdetr-coco"  # Tiny - Fastest
-]
-DEFAULT_MODEL = MODEL_CHOICES[0]
 # --- HELPER FUNCTIONS ---
-# Global dictionary to store loaded models
 loaded_models = {}
 def get_model(model_name):
-    """Loads the model if not already in memory."""
     if model_name in loaded_models:
         return loaded_models[model_name]
-    print(f"Downloading/Loading model: {model_name}...")
     model = lightly_train.load_model(model_name)
     loaded_models[model_name] = model
     return model
-# Pre-load the default model on startup
 get_model(DEFAULT_MODEL)
-def predict_and_annotate(image, confidence_threshold, model_name):
     """
-    1. Runs prediction.
-    2. Filters boxes by confidence.
-    3. Maps Class IDs to Names.
     """
     model = get_model(model_name)
     # Run Inference
     results = model.predict(image)
-    # Convert to Numpy
     boxes = results['bboxes'].cpu().numpy()
     labels = results['labels'].cpu().numpy()
     scores = results['scores'].cpu().numpy()
@@ -69,78 +85,99 @@ def predict_and_annotate(image, confidence_threshold, model_name):
     labels = labels[valid_indices]
     scores = scores[valid_indices]
-    # Create Detections
-    detections = sv.Detections(
-        xyxy=boxes,
-        confidence=scores,
-        class_id=labels
-    )
-    # Annotate
     box_annotator = sv.BoxAnnotator()
     label_annotator = sv.LabelAnnotator()
-    # Generate Labels
     generated_labels = []
     for class_id, confidence in zip(detections.class_id, detections.confidence):
-        if class_id < len(COCO_CLASSES):
-            name = COCO_CLASSES[class_id]
-        else:
-            name = f"Class {class_id}"
         generated_labels.append(f"{name} {confidence:.2f}")
     annotated_image = image.copy()
     annotated_image = box_annotator.annotate(scene=annotated_image, detections=detections)
     annotated_image = label_annotator.annotate(scene=annotated_image, detections=detections, labels=generated_labels)
     return annotated_image
 # --- GRADIO UI ---
 with gr.Blocks() as demo:
-    gr.Markdown("# LightlyTrain Object Detection Demo 🚀")
-    gr.Markdown("Detect 80 types of objects (COCO Dataset) using **DINOv3** models.")
     with gr.Row():
         with gr.Column():
             input_img = gr.Image(type="pil", label="Input Image")
-            conf_slider = gr.Slider(
-                minimum=0.0, maximum=1.0, value=0.4, step=0.05,
-                label="Confidence Threshold"
-            )
-            model_selector = gr.Dropdown(
-                choices=MODEL_CHOICES,
-                value=DEFAULT_MODEL,
-                label="Model Checkpoint"
-            )
-            run_btn = gr.Button("Run Detection", variant="primary")
         with gr.Column():
-            output_img = gr.Image(label="Annotated Result")
     run_btn.click(
-        fn=predict_and_annotate,
-        inputs=[input_img, conf_slider, model_selector],
         outputs=output_img
     )
-    gr.Markdown("### 💡 Try an Example")
-    gr.Markdown("Click a row below to load the image.")
-    # UPDATED EXAMPLES WITH SAFE GITHUB LINKS
-    # These links are direct 'raw' files and will not block your app.
     gr.Examples(
         examples=[
-            ["https://farm2.staticflickr.com/1141/1331801476_ffdb15a173_z.jpg", 0.4, DEFAULT_MODEL],
             ["https://farm3.staticflickr.com/2294/2193565429_aed7c9ff98_z.jpg", 0.4, DEFAULT_MODEL],
            ],
-        inputs=[input_img, conf_slider, model_selector],
         outputs=output_img,
-        fn=predict_and_annotate,
         cache_examples=True,
     )

 import gradio as gr
 import numpy as np
 import supervision as sv
+import torch
+import torchvision.transforms.functional as F
+from torchvision.utils import draw_segmentation_masks
 from PIL import Image
 import lightly_train
 # --- CONFIGURATION ---
+# 1. DEFINE MODELS
+# We separate them so we know which logic to use (Boxes vs. Masks)
+DETECTION_MODELS = [
+    "dinov3/vitt16-ltdetr-coco",        # Large (Vision Transformer)
+    "dinov3/convnext-base-ltdetr-coco", # Base
+    "dinov3/convnext-small-ltdetr-coco",# Small
+    "dinov3/convnext-tiny-ltdetr-coco"  # Tiny (Fastest)
+]
+# LightlyTrain 'EoMT' models are for Segmentation
+SEGMENTATION_MODELS = [
+    "dinov3/vits16-eomt-ade20k"         # Semantic Segmentation (Scene understanding)
+]
+ALL_MODELS = DETECTION_MODELS + SEGMENTATION_MODELS
+DEFAULT_MODEL = DETECTION_MODELS[0]
+# COCO Labels (For Detection)
 COCO_CLASSES = [
     "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
     "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
     "scissors", "teddy bear", "hair drier", "toothbrush"
 ]
 # --- HELPER FUNCTIONS ---
 loaded_models = {}
 def get_model(model_name):
     if model_name in loaded_models:
         return loaded_models[model_name]
+    print(f"Loading model: {model_name}...")
     model = lightly_train.load_model(model_name)
     loaded_models[model_name] = model
     return model
+# Pre-load default
 get_model(DEFAULT_MODEL)
+def predict_dispatch(image, confidence_threshold, resolution, model_name):
     """
+    Main handler that decides whether to run Detection or Segmentation.
     """
+    # 1. Apply Inference Resolution (Resize)
+    # This matches the 'Resolution Slider' feature in Roboflow
+    original_size = image.size
+    image_resized = image.resize((resolution, resolution))
     model = get_model(model_name)
+    # 2. Decide Task Type
+    if model_name in SEGMENTATION_MODELS:
+        return run_segmentation(model, image_resized, original_size)
+    else:
+        return run_detection(model, image_resized, confidence_threshold)
+def run_detection(model, image, confidence_threshold):
     # Run Inference
     results = model.predict(image)
+    # Process Results
     boxes = results['bboxes'].cpu().numpy()
     labels = results['labels'].cpu().numpy()
     scores = results['scores'].cpu().numpy()
     labels = labels[valid_indices]
     scores = scores[valid_indices]
+    # Annotate using Supervision
+    detections = sv.Detections(xyxy=boxes, confidence=scores, class_id=labels)
     box_annotator = sv.BoxAnnotator()
     label_annotator = sv.LabelAnnotator()
     generated_labels = []
     for class_id, confidence in zip(detections.class_id, detections.confidence):
+        name = COCO_CLASSES[class_id] if class_id < len(COCO_CLASSES) else f"Class {class_id}"
         generated_labels.append(f"{name} {confidence:.2f}")
     annotated_image = image.copy()
     annotated_image = box_annotator.annotate(scene=annotated_image, detections=detections)
     annotated_image = label_annotator.annotate(scene=annotated_image, detections=detections, labels=generated_labels)
     return annotated_image
+def run_segmentation(model, image, original_size):
+    # Run Inference
+    # Note: LightlyTrain segmentation often returns raw masks.
+    # We use a simple visualizer here.
+    results = model.predict(image)
+    # Depending on version, results might be a dict or raw tensor.
+    # We assume standard LightlyTrain dict output for 'masks' or 'semantic'
+    # If using 'eomt' models, output is typically a class map.
+    # For demo visualization, we will just overlay the class mask nicely.
+    # Logic: Convert PIL -> Tensor -> Draw Masks -> PIL
+    # Simple fallback visualization if specific API varies:
+    # We rely on the model returning a 'masks' key or similar logic
+    if isinstance(results, dict) and 'masks' in results:
+        masks = results['masks'] # shape (N, H, W) boolean or (H, W) class map
+    else:
+        # Some Lightly models return just the raw tensor output
+        # For this demo, let's catch standard errors to prevent crash
+        return image
+    # Visualization trick: Use torchvision to draw masks
+    img_tensor = F.pil_to_tensor(image)
+    # If output is a single class map (H, W), convert to boolean masks
+    if masks.ndim == 2:
+        # Create boolean masks for each unique class found
+        unique_classes = masks.unique()
+        boolean_masks = torch.stack([masks == c for c in unique_classes])
+    else:
+        boolean_masks = masks
+    # Draw
+    annotated_tensor = draw_segmentation_masks(img_tensor, boolean_masks.bool(), alpha=0.5)
+    return F.to_pil_image(annotated_tensor)
 # --- GRADIO UI ---
 with gr.Blocks() as demo:
+    gr.Markdown("# LightlyTrain Advanced Demo 🧠")
+    gr.Markdown("Switch between **Object Detection** (Boxes) and **Semantic Segmentation** (Pixel Masks).")
     with gr.Row():
         with gr.Column():
             input_img = gr.Image(type="pil", label="Input Image")
+            # SETTINGS
+            with gr.Accordion("Advanced Settings", open=True):
+                conf_slider = gr.Slider(0.0, 1.0, value=0.4, step=0.05, label="Confidence Threshold (Detection Only)")
+                # NEW: Resolution Slider
+                res_slider = gr.Slider(384, 1024, value=640, step=32, label="Inference Resolution (px)")
+                model_selector = gr.Dropdown(ALL_MODELS, value=DEFAULT_MODEL, label="Model Checkpoint")
+            run_btn = gr.Button("Run Analysis", variant="primary")
         with gr.Column():
+            output_img = gr.Image(label="Result")
     run_btn.click(
+        fn=predict_dispatch,
+        inputs=[input_img, conf_slider, res_slider, model_selector],
         outputs=output_img
     )
+    # UPDATED EXAMPLES (Safe Links)
     gr.Examples(
         examples=[
+            ["http://farm3.staticflickr.com/2547/3933456087_6a4dfb4736_z.jpg", 0.4, DEFAULT_MODEL],
             ["https://farm3.staticflickr.com/2294/2193565429_aed7c9ff98_z.jpg", 0.4, DEFAULT_MODEL],
+            ["http://cocodataset.org/#explore?id=414046", 512, "dinov3/vits16-eomt-ade20k"],
            ],
+        inputs=[input_img, conf_slider, res_slider, model_selector],
         outputs=output_img,
+        fn=predict_dispatch,
         cache_examples=True,
     )