Spaces:

kljunmasa
/

LightlyTrain-Demo

Sleeping

App Files Files Community

masakljun commited on Dec 23, 2025

Commit

7e8e363

1 Parent(s): 9f1fe9a

stich to base and large segm models

Browse files

Files changed (1) hide show

app.py +15 -10

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ import lightly_train
 # --- CONFIGURATION ---
 MARKDOWN_HEADER = """
-# LightlyTrain Object Detection Demo 🚀
 [GitHub](https://github.com/lightly-ai/lightly-train) | [Documentation](https://docs.lightly.ai/train)
 This demo showcases **LightlyTrain**, a powerful library for self-supervised learning and fine-tuning.
@@ -23,14 +23,20 @@ DETECTION_MODELS = [
     "dinov3/convnext-small-ltdetr-coco",
     "dinov3/convnext-tiny-ltdetr-coco"
 ]
 SEGMENTATION_MODELS = [
-    "dinov3/vits16-eomt-coco"        # COCO-Stuff (171 Classes)
 ]
 ALL_MODELS = DETECTION_MODELS + SEGMENTATION_MODELS
 DEFAULT_MODEL = DETECTION_MODELS[0]
 # 2. CLASS LISTS
-# Standard COCO "Things" (80 classes) for Detection
 COCO_DETECTION_CLASSES = [
     "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
     "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
@@ -43,8 +49,7 @@ COCO_DETECTION_CLASSES = [
     "scissors", "teddy bear", "hair drier", "toothbrush"
 ]
-# COCO-Stuff (171 classes) for Segmentation
-# Includes the 80 "things" above + 91 "stuff" classes (sky, road, etc.)
 COCO_STUFF_CLASSES = [
     "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
     "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
@@ -106,6 +111,7 @@ def run_detection(model, image_input, original_image, confidence_threshold):
     labels = results['labels'].cpu().numpy()
     scores = results['scores'].cpu().numpy()
     valid = scores > confidence_threshold
     boxes = boxes[valid]
     labels = labels[valid]
@@ -147,7 +153,6 @@ def run_segmentation(model, image_input, original_image):
     mask_np = mask_tensor.cpu().numpy().astype(np.uint8)
     mask_np = cv2.resize(mask_np, original_image.size, interpolation=cv2.INTER_NEAREST)
-    # Use COCO-Stuff classes
     current_classes = COCO_STUFF_CLASSES
     h, w = mask_np.shape
@@ -158,10 +163,10 @@ def run_segmentation(model, image_input, original_image):
     labels_to_draw = []
     for cls_id in unique_classes:
-        # Safety check: skip 'background' class (often 255 or -1)
         if cls_id == 255 or cls_id == -1: continue
-        # Standard COCO-Stuff mapping: 0-170
         if cls_id < 0 or cls_id >= len(current_classes): continue
         class_name = current_classes[cls_id]
@@ -184,7 +189,7 @@ def run_segmentation(model, image_input, original_image):
         cv2.putText(blended, text, (cx, cy), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 3, cv2.LINE_AA)
         cv2.putText(blended, text, (cx, cy), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1, cv2.LINE_AA)
-    analytics_text = f"Scene Contains (COCO):\n" + (", ".join(sorted(list(found_classes))) if found_classes else "None")
     return Image.fromarray(blended), analytics_text, {"classes_found": list(found_classes)}
@@ -231,7 +236,7 @@ with gr.Blocks(theme=theme) as demo:
         examples=[
             ["http://farm3.staticflickr.com/2547/3933456087_6a4dfb4736_z.jpg", 0.4, 640, DEFAULT_MODEL],
             ["https://farm3.staticflickr.com/2294/2193565429_aed7c9ff98_z.jpg", 0.4, 640, DEFAULT_MODEL],
-            ["https://farm3.staticflickr.com/2294/2193565429_aed7c9ff98_z.jpg",  0.4, 512, "dinov3/vits16-eomt-coco"],
            ],
         outputs=[output_img, output_text, output_json],
         fn=run_prediction,

 # --- CONFIGURATION ---
 MARKDOWN_HEADER = """
+# LightlyTrain Detection & Segmentation Demo 🚀
 [GitHub](https://github.com/lightly-ai/lightly-train) | [Documentation](https://docs.lightly.ai/train)
 This demo showcases **LightlyTrain**, a powerful library for self-supervised learning and fine-tuning.
     "dinov3/convnext-small-ltdetr-coco",
     "dinov3/convnext-tiny-ltdetr-coco"
 ]
+# UPDATED: Added Base (vitb16) and Large (vitl16) for better accuracy
 SEGMENTATION_MODELS = [
+    "dinov3/vitb16-eomt-coco",       # Base (Recommended Balance)
+    "dinov3/vitl16-eomt-coco",       # Large (Best Accuracy, Slower)
+    "dinov3/vits16-eomt-coco"        # Small (Fastest)
 ]
 ALL_MODELS = DETECTION_MODELS + SEGMENTATION_MODELS
 DEFAULT_MODEL = DETECTION_MODELS[0]
 # 2. CLASS LISTS
+# COCO Detection (80 Classes)
 COCO_DETECTION_CLASSES = [
     "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
     "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
     "scissors", "teddy bear", "hair drier", "toothbrush"
 ]
+# COCO-Stuff (171 Classes) - Standard Mapping
 COCO_STUFF_CLASSES = [
     "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
     "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
     labels = results['labels'].cpu().numpy()
     scores = results['scores'].cpu().numpy()
+    # Filter
     valid = scores > confidence_threshold
     boxes = boxes[valid]
     labels = labels[valid]
     mask_np = mask_tensor.cpu().numpy().astype(np.uint8)
     mask_np = cv2.resize(mask_np, original_image.size, interpolation=cv2.INTER_NEAREST)
     current_classes = COCO_STUFF_CLASSES
     h, w = mask_np.shape
     labels_to_draw = []
     for cls_id in unique_classes:
+        # Safety check: skip 'background' class
         if cls_id == 255 or cls_id == -1: continue
+        # COCO-Stuff mapping
         if cls_id < 0 or cls_id >= len(current_classes): continue
         class_name = current_classes[cls_id]
         cv2.putText(blended, text, (cx, cy), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 3, cv2.LINE_AA)
         cv2.putText(blended, text, (cx, cy), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1, cv2.LINE_AA)
+    analytics_text = f"Scene Contains (Segmentation):\n" + (", ".join(sorted(list(found_classes))) if found_classes else "None")
     return Image.fromarray(blended), analytics_text, {"classes_found": list(found_classes)}
         examples=[
             ["http://farm3.staticflickr.com/2547/3933456087_6a4dfb4736_z.jpg", 0.4, 640, DEFAULT_MODEL],
             ["https://farm3.staticflickr.com/2294/2193565429_aed7c9ff98_z.jpg", 0.4, 640, DEFAULT_MODEL],
+            ["http://farm9.staticflickr.com/8092/8400332884_102a62b6c6_z.jpg", 0.6, 640, "dinov3/vits16-eomt-ade20k"],
            ],
         outputs=[output_img, output_text, output_json],
         fn=run_prediction,