Spaces:

MLBench
/

Car-Mirror-Segmentation

Sleeping

File size: 43,684 Bytes

d1af3a0
 
 
 
 
 
da8f9fb
 
d1af3a0
6e550a4
d1af3a0
 
 
da8f9fb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e6b31f2
da8f9fb
 
 
e6b31f2
da8f9fb
 
 
 
 
 
a5e3b77
da8f9fb
fcce079
 
a5e3b77
da8f9fb
e6b31f2
fcce079
 
e6b31f2
da8f9fb
 
 
 
 
e6b31f2
da8f9fb
 
a5e3b77
da8f9fb
 
a9389ab
da8f9fb
d1af3a0
da8f9fb
d1af3a0
5096687
f769ef6
 
1aaee22
f769ef6
5096687
 
 
1aaee22
b22e1a8
1aaee22
 
 
b22e1a8
 
 
 
 
1aaee22
 
d1af3a0
81349ee
d1af3a0
 
 
1aaee22
5096687
 
 
 
 
 
1aaee22
5096687
 
 
 
 
 
 
 
 
 
 
 
1aaee22
fcce079
d1af3a0
 
ffe6aa0
 
 
 
 
 
 
 
 
 
 
da8f9fb
 
 
ffe6aa0
d1af3a0
 
 
6e550a4
 
d1af3a0
 
 
 
54601eb
d1af3a0
 
 
6e550a4
 
da8f9fb
d1af3a0
622a449
d1af3a0
54601eb
6e550a4
 
ffe6aa0
 
 
6e550a4
54601eb
d1af3a0
81349ee
d1af3a0
5afc58f
 
ffe6aa0
54601eb
ffe6aa0
54601eb
d1af3a0
09e30af
ffe6aa0
d1af3a0
363b492
d1af3a0
 
da8f9fb
d1af3a0
 
 
e6b31f2
d1af3a0
 
e6b31f2
d1af3a0
 
 
 
363b492
d1af3a0
 
 
 
 
363b492
 
e6b31f2
d1af3a0
363b492
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ffe6aa0
363b492
 
d1af3a0
81349ee
d1af3a0
 
ffe6aa0
d1af3a0
 
363b492
d1af3a0
e6b31f2
d1af3a0
363b492
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e6b31f2
 
 
 
 
 
 
 
 
 
 
 
 
 
7c9920d
e6b31f2
 
 
 
 
 
 
 
 
 
 
 
 
d1af3a0
e6b31f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7c9920d
e6b31f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d1af3a0
38a20f7
 
 
 
 
 
474cd82
38a20f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eda2615
 
 
c371d9b
eda2615
 
 
 
 
 
82d9585
 
 
 
 
9d7be5a
82d9585
 
9d7be5a
 
 
 
 
 
82d9585
 
9d7be5a
 
82d9585
 
 
9d7be5a
82d9585
 
eda2615
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c371d9b
 
 
 
eda2615
c371d9b
eda2615
 
c371d9b
eda2615
 
 
fbc2557
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
876813a
fbc2557
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
da8f9fb
 
 
6e550a4
 
 
 
 
 
 
 
 
 
 
ffe6aa0
e6b31f2
6e550a4
 
 
 
d1af3a0
6a7c232
ffe6aa0
cb5086d
ffe6aa0
cb5086d
9eaca74
 
 
 
 
 
 
 
363b492
 
 
 
 
 
cb5086d
6194cd0
 
 
 
 
 
 
 
363b492
e6b31f2
 
 
 
38a20f7
 
cb5086d
c371d9b
d1af3a0
09e30af
d1af3a0
09e30af
d1af3a0
da8f9fb
 
 
 
 
6a7c232
da8f9fb
6a7c232
e6b31f2
da8f9fb
 
09e30af
 
e11c4c9
 
cd4235b
e11c4c9
09e30af
e11c4c9
da8f9fb
fbc2557
6d32870
 
 
 
 
fbc2557
 
 
16198bd
e11c4c9
269676c
 
 
 
 
 
5bd9410
9e1032b
e11c4c9
 
eda2615
 
363b492
e11c4c9
855717d
eda2615
5341ea9
 
 
 
 
 
eda2615
e11c4c9
 
fbc2557
363b492
fbc2557
 
 
 
9eaca74
6d32870
 
 
 
 
 
 
 
9eaca74
 
 
 
 
eda2615
6d32870
fbc2557
 
 
 
eda2615
 
6d32870
363b492
fbc2557
 
 
eda2615
9e1032b
6d32870
9e1032b
 
 
 
 
e11c4c9
fbc2557
e11c4c9
 
 
 
 
 
fbc2557
e11c4c9
 
 
 
 
 
fbc2557
e11c4c9
 
 
 
 
 
 
 
fbc2557
e11c4c9
 
 
 
 
 
fbc2557
e11c4c9
 
 
 
 
 
fbc2557
e11c4c9
 
 
 
 
 
5bd9410
fbc2557
5bd9410
 
6d32870
 
fbc2557
6d32870
 
 
 
5bd9410
fbc2557
 
5bd9410
 
6d32870
 
5bd9410
 
6d32870
 
fbc2557
 
6d32870
 
5bd9410
 
6d32870
 
5bd9410
 
6d32870
 
5bd9410
 
fbc2557
 
5bd9410
 
fbc2557
 
5bd9410
363b492
fbc2557
 
5bd9410
eda2615
fbc2557
 
5bd9410
e11c4c9
fbc2557
 
5bd9410
e11c4c9
fbc2557
 
5bd9410
eda2615
855717d
e11c4c9
eda2615
fbc2557
6d32870
9eaca74
fbc2557
eda2615
9e1032b
e11c4c9
 
 
 
 
 
363b492
855717d
 
363b492
d1af3a0
c6053ac

import gradio as gr
import numpy as np
import cv2
import time
import torch
import warnings
import os
import zipfile
from PIL import Image
import random

warnings.filterwarnings("ignore")

# ═══════════════════════════════════════════════════════════════════════════════
# STEP 1: Extract any .zip files in current directory
# ═══════════════════════════════════════════════════════════════════════════════
print("=" * 60)
print(f"[STARTUP] Working dir: {os.getcwd()}")
for f in os.listdir("."):
    if f.endswith(".zip"):
        try:
            with zipfile.ZipFile(f, 'r') as zf:
                zf.extractall(".")
                print(f"[ZIP] Extracted {f} OK!")
        except Exception as e:
            print(f"[ZIP] ERROR: {e}")

# ═══════════════════════════════════════════════════════════════════════════════
# STEP 2: Copy images to root
# ═══════════════════════════════════════════════════════════════════════════════
def prepare_clean_examples(src_folder, prefix, limit=10):
    results = []
    if not os.path.exists(src_folder): return results
    count = 0
    for root, dirs, files in os.walk(src_folder):
        for fname in sorted(files):
            if not fname.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp', '.webp')):
                continue
            src_path = os.path.join(root, fname)
            dst_name = f"{prefix}_{count}.jpg"
            try:
                import shutil
                shutil.copy2(src_path, dst_name)
                results.append(dst_name)
                count += 1
                if count >= limit: break
            except Exception as e: 
                print(f"Error copying {src_path}: {e}")
        if count >= limit: break
    return results

mirror_examples = []
for folder in ["test car windows", "test_car_windows", "test car windows segmentation"]:
    if os.path.exists(folder):
        mirror_examples = prepare_clean_examples(folder, "mirror", limit=15)
        break
if not mirror_examples and os.path.exists("car.jpeg"):
    mirror_examples = ["car.jpeg"]

# ═══════════════════════════════════════════════════════════════════════════════
# Global Settings
# ═══════════════════════════════════════════════════════════════════════════════
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
CONF   = 0.45

def apply_mask_overlay(img_rgb, mask_bool, color=(0, 215, 255), alpha=0.4):
    # 1. Darken the background (50% brightness, no blur)
    dark_bg = cv2.addWeighted(img_rgb, 0.5, np.zeros_like(img_rgb), 0.5, 0)
    
    # 2. For the mask area, keep original brightness and tint it
    tinted_sharp = img_rgb.copy()
    tinted_sharp[mask_bool] = color
    tinted_sharp = cv2.addWeighted(tinted_sharp, alpha, img_rgb, 1 - alpha, 0)
    
    # 3. Find and draw the boundary edge strictly inside the mask
    mask_img = (mask_bool * 255).astype(np.uint8)
    contours, _ = cv2.findContours(mask_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Draw contour on the tinted image (before blending)
    cv2.drawContours(tinted_sharp, contours, -1, color, 2, cv2.LINE_AA)
    
    # 4. Combine: Dark background outside, Bright tinted object + boundary inside
    blended = np.where(mask_bool[:, :, None], tinted_sharp, dark_bg)
    
    return blended

def draw_boxes(img_rgb, boxes, labels, color=(0, 215, 255)):
    out = img_rgb.copy()
    for box, label in zip(boxes, labels):
        x1, y1, x2, y2 = map(int, box)
        
        # Faint inner bounding box line
        cv2.rectangle(out, (x1, y1), (x2, y2), color, 1)
        
        # HUD-Style Corner Brackets
        length = int(min(x2 - x1, y2 - y1) * 0.15)
        thick = 3
        
        # Top-Left
        cv2.line(out, (x1, y1), (x1 + length, y1), color, thick, cv2.LINE_AA)
        cv2.line(out, (x1, y1), (x1, y1 + length), color, thick, cv2.LINE_AA)
        # Top-Right
        cv2.line(out, (x2, y1), (x2 - length, y1), color, thick, cv2.LINE_AA)
        cv2.line(out, (x2, y1), (x2, y1 + length), color, thick, cv2.LINE_AA)
        # Bottom-Left
        cv2.line(out, (x1, y2), (x1 + length, y2), color, thick, cv2.LINE_AA)
        cv2.line(out, (x1, y2), (x1, y2 - length), color, thick, cv2.LINE_AA)
        # Bottom-Right
        cv2.line(out, (x2, y2), (x2 - length, y2), color, thick, cv2.LINE_AA)
        cv2.line(out, (x2, y2), (x2, y2 - length), color, thick, cv2.LINE_AA)
        
        # Text labels have been removed to prevent obstructing the view of the segmentation masks.
    return out

# ═══════════════════════════════════════════════════════════════════════════════
# Morphological post-processing helper
# ═══════════════════════════════════════════════════════════════════════════════
def apply_morphology(mask_uint8, close_k=15, open_k=7):
    """Fill holes (Closing) then remove tiny blobs (Opening) on a binary mask."""
    close_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (close_k, close_k))
    open_kernel  = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (open_k,  open_k))
    closed = cv2.morphologyEx(mask_uint8, cv2.MORPH_CLOSE, close_kernel)  # fill holes
    opened = cv2.morphologyEx(closed,     cv2.MORPH_OPEN,  open_kernel)   # remove noise
    return opened

# ═══════════════════════════════════════════════════════════════════════════════
# Model Functions
# ═══════════════════════════════════════════════════════════════════════════════
def run_yolo_generic(img_rgb, model_path, target_classes, color, morph_cleanup=False):
    from ultralytics import YOLO
    t0 = time.time()
    model = YOLO(model_path)
    # Use retina_masks=True to get pixel-perfect masks at the original image resolution
    results = model(img_rgb, conf=CONF, verbose=False, retina_masks=True)
    elapsed = time.time() - t0

    result = results[0]
    h, w = img_rgb.shape[:2]
    combined_mask = np.zeros((h, w), dtype=np.uint8)
    boxes, labels = [], []

    if result.masks is not None:
        for mask, box, cls, conf in zip(
            result.masks.data, result.boxes.xyxy,
            result.boxes.cls, result.boxes.conf
        ):
            if int(cls) not in target_classes:
                continue
            
            # Since retina_masks=True, mask is already (h, w). Just threshold it.
            mask_np = mask.cpu().numpy().astype(np.uint8)
            # Optional per-instance morphological cleanup before combining
            if morph_cleanup:
                mask_np = apply_morphology(mask_np)
            combined_mask |= mask_np
                
            boxes.append(box.cpu().tolist())
            labels.append(f"glass {conf:.2f}")

    # We purposely do NOT apply morphology on the final combined_mask here,
    # otherwise it will bridge the gaps (pillars) between separate windows!

    combined_mask_bool = combined_mask > 0
    morph_note = " | Morphology: ON ✅" if morph_cleanup else ""
    out = apply_mask_overlay(img_rgb, combined_mask_bool, color=color)
    out = draw_boxes(out, boxes, labels, color=color)
    bw_mask = (combined_mask * 255).astype(np.uint8)
    return out, bw_mask, f"Found: {len(boxes)} | Inference Time: {elapsed:.2f}s{morph_note}"

def run_sam_strategy(img_rgb, yolo_model_path, target_classes, color, strategy, morph_cleanup=False):
    try:
        from segment_anything import sam_model_registry, SamPredictor
        import urllib.request

        CKPT = "sam_vit_b_01ec64.pth"
        URL  = "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth"
        if not os.path.exists(CKPT): urllib.request.urlretrieve(URL, CKPT)

        t0  = time.time()
        sam = sam_model_registry["vit_b"](checkpoint=CKPT).to(DEVICE)
        predictor = SamPredictor(sam)
        predictor.set_image(img_rgb)

        from ultralytics import YOLO as _YOLO
        yolo_res = _YOLO(yolo_model_path)(img_rgb, conf=CONF, verbose=False, retina_masks=True)[0]

        h, w = img_rgb.shape[:2]
        combined_mask = np.zeros((h, w), dtype=bool)
        boxes_list, labels = [], []

        if yolo_res.boxes is not None and yolo_res.masks is not None:
            for box, mask_data, cls, conf in zip(yolo_res.boxes.xyxy, yolo_res.masks.data, yolo_res.boxes.cls, yolo_res.boxes.conf):
                if int(cls) not in target_classes: continue
                box_np = box.cpu().numpy()
                yolo_mask = mask_data.cpu().numpy() > 0.5
                
                if strategy == 1:
                    # Strategy 1: Bbox + 5 Points
                    x1, y1, x2, y2 = map(int, box_np)
                    cx, cy = (x1+x2)//2, (y1+y2)//2
                    pts = [[cx, cy], [x1+5, y1+5], [x2-5, y1+5], [x1+5, y2-5], [x2-5, y2-5]]
                    pts_np = np.array(pts)
                    labels_np = np.ones(len(pts))
                    masks_sam, _, _ = predictor.predict(box=box_np, point_coords=pts_np, point_labels=labels_np, multimask_output=False)
                    sam_mask = masks_sam[0]
                elif strategy == 2:
                    # Strategy 2: Mask + 5 Points
                    y_coords, x_coords = np.where(yolo_mask)
                    if len(x_coords) == 0: continue
                    cx, cy = int(np.mean(x_coords)), int(np.mean(y_coords))
                    idx_top, idx_bot = np.argmin(y_coords), np.argmax(y_coords)
                    idx_lft, idx_rgt = np.argmin(x_coords), np.argmax(x_coords)
                    def get_mid(x_1, y_1, x_2, y_2, f=0.6): 
                        return int(x_1 + (x_2-x_1)*f), int(y_1 + (y_2-y_1)*f)
                    pts = []
                    if yolo_mask[cy, cx]: pts.append([cx, cy])
                    else: pts.append([x_coords[len(x_coords)//2], y_coords[len(y_coords)//2]])
                    for idx in [idx_top, idx_bot, idx_lft, idx_rgt]:
                        px, py = get_mid(cx, cy, x_coords[idx], y_coords[idx])
                        if 0 <= py < h and 0 <= px < w and yolo_mask[py, px]: pts.append([px, py])
                        else: pts.append(pts[0])
                    pts_np = np.array(pts)
                    labels_np = np.ones(len(pts))
                    masks_sam, _, _ = predictor.predict(box=box_np, point_coords=pts_np, point_labels=labels_np, multimask_output=False)
                    sam_mask = masks_sam[0]
                elif strategy == 3:
                    # Strategy 3: Direct Mask Prompting
                    yolo_mask_resized = cv2.resize((yolo_mask).astype(np.float32), (256, 256), interpolation=cv2.INTER_NEAREST)
                    mask_input = np.zeros((1, 256, 256), dtype=np.float32)
                    mask_input[0] = np.where(yolo_mask_resized > 0.5, 30.0, -30.0)
                    masks_sam, _, _ = predictor.predict(box=box_np, mask_input=mask_input, multimask_output=False)
                    
                    raw_mask = (masks_sam[0].astype(np.uint8) * 255)
                    contours, _ = cv2.findContours(raw_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
                    filled_mask = np.zeros_like(raw_mask)
                    cv2.drawContours(filled_mask, contours, -1, 255, cv2.FILLED)
                    sam_mask = (filled_mask > 0)
                else:
                    sam_mask = np.zeros((h, w), dtype=bool)

                sam_mask_uint = sam_mask.astype(np.uint8)
                if morph_cleanup:
                    sam_mask_uint = apply_morphology(sam_mask_uint)
                combined_mask |= sam_mask_uint.astype(bool)
                boxes_list.append(box_np.tolist())
                labels.append(f"glass {conf:.2f}")

        elapsed = time.time() - t0
        morph_note = " | Morphology: ON ✅" if morph_cleanup else ""
        out = apply_mask_overlay(img_rgb, combined_mask, color=color)
        out = draw_boxes(out, boxes_list, labels, color=color)
        return out, (combined_mask * 255).astype(np.uint8), f"Found: {len(boxes_list)} | Strategy: {strategy} | Inference: {elapsed:.2f}s{morph_note}"
    except ImportError:
        return img_rgb, None, "Error: segment-anything not installed"

def run_mask_rcnn(img_rgb, weights_path):
    t0 = time.time()
    try:
        from torchvision.models.detection import maskrcnn_resnet50_fpn_v2
        from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
        from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
        import torchvision.transforms.v2 as T
        
        model = maskrcnn_resnet50_fpn_v2(weights=None)
        in_features = model.roi_heads.box_predictor.cls_score.in_features
        model.roi_heads.box_predictor = FastRCNNPredictor(in_features, 2)
        in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
        model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask, 256, 2)
        
        checkpoint = torch.load(weights_path, map_location=DEVICE, weights_only=False)
        if isinstance(checkpoint, dict) and "model_state_dict" in checkpoint:
            model.load_state_dict(checkpoint["model_state_dict"])
        else:
            model.load_state_dict(checkpoint)
        
        model.to(DEVICE)
        model.eval()
        
        img_tensor = T.ToTensor()(Image.fromarray(img_rgb)).to(DEVICE)
        with torch.no_grad():
            outputs = model([img_tensor])[0]
            
        h, w = img_rgb.shape[:2]
        pred_mask = np.zeros((h, w), dtype=bool)
        boxes_list, labels_list = [], []
        
        for score, mask, box, cls in zip(outputs['scores'], outputs['masks'], outputs['boxes'], outputs['labels']):
            if score > 0.45:
                m = (mask[0].cpu().numpy() > 0.5)
                pred_mask |= m
                boxes_list.append(box.cpu().numpy().tolist())
                labels_list.append(f"glass {score:.2f}")
                
        elapsed = time.time() - t0
        out = apply_mask_overlay(img_rgb, pred_mask, color=(255, 165, 0))
        out = draw_boxes(out, boxes_list, labels_list, color=(255, 165, 0))
        bw_mask = (pred_mask * 255).astype(np.uint8)
        
        return out, bw_mask, f"Found: {len(boxes_list)} | Inference: {elapsed:.2f}s"
    except Exception as e:
        return img_rgb, None, f"Mask R-CNN Error: {e}"

def run_grounding_dino(img_rgb, text_prompt):
    try:
        from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection
        t0 = time.time()
        model_id = "IDEA-Research/grounding-dino-tiny"
        processor = AutoProcessor.from_pretrained(model_id)
        model = AutoModelForZeroShotObjectDetection.from_pretrained(model_id).to(DEVICE)
        
        inputs = processor(images=img_rgb, text=text_prompt, return_tensors="pt").to(DEVICE)
        with torch.no_grad():
            outputs = model(**inputs)
        
        h, w = img_rgb.shape[:2]
        results = processor.post_process_grounded_object_detection(
            outputs, inputs.input_ids, text_threshold=0.25, target_sizes=[(h, w)]
        )[0]
        
        boxes = results["boxes"].cpu().numpy().tolist()
        scores = results["scores"].cpu().numpy().tolist()
        labels = results["labels"]
        
        elapsed = time.time() - t0
        bw_mask = np.zeros((h, w), dtype=np.uint8) # DINO is boxes only
        str_labels = [f"{lbl} {scr:.2f}" for lbl, scr in zip(labels, scores)]
        out = draw_boxes(img_rgb.copy(), boxes, str_labels, color=(255, 100, 50))
        return out, bw_mask, f"Found: {len(boxes)} | Inference Time: {elapsed:.2f}s"
    except Exception as e:
        return img_rgb, None, f"Grounding DINO Error: {e}\n(Need transformers>=4.35)"

def run_grounded_sam(img_rgb, text_prompt):
    try:
        from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection
        from segment_anything import sam_model_registry, SamPredictor
        import urllib.request
        
        t0 = time.time()
        
        # 1. DINO Detection
        dino_id = "IDEA-Research/grounding-dino-tiny"
        processor = AutoProcessor.from_pretrained(dino_id)
        dino_model = AutoModelForZeroShotObjectDetection.from_pretrained(dino_id).to(DEVICE)
        inputs = processor(images=img_rgb, text=text_prompt, return_tensors="pt").to(DEVICE)
        with torch.no_grad():
            outputs = dino_model(**inputs)
        
        h, w = img_rgb.shape[:2]
        dino_res = processor.post_process_grounded_object_detection(
            outputs, inputs.input_ids, text_threshold=0.25, target_sizes=[(h, w)]
        )[0]
        boxes = dino_res["boxes"].cpu().numpy()
        scores = dino_res["scores"].cpu().numpy()
        labels_txt = dino_res["labels"]
        
        # 2. SAM Segmentation
        CKPT = "sam_vit_b_01ec64.pth"
        URL  = "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth"
        if not os.path.exists(CKPT): urllib.request.urlretrieve(URL, CKPT)
        
        sam = sam_model_registry["vit_b"](checkpoint=CKPT).to(DEVICE)
        predictor = SamPredictor(sam)
        predictor.set_image(img_rgb)
        
        combined_mask = np.zeros((h, w), dtype=bool)
        str_labels = []
        
        if len(boxes) > 0:
            for box, score, label in zip(boxes, scores, labels_txt):
                masks, _, _ = predictor.predict(box=box, multimask_output=False)
                combined_mask |= masks[0]
                str_labels.append(f"{label} {score:.2f}")
                
        elapsed = time.time() - t0
        out = apply_mask_overlay(img_rgb, combined_mask, color=(255, 80, 160))
        out = draw_boxes(out, boxes.tolist(), str_labels, color=(255, 80, 160))
        return out, (combined_mask * 255).astype(np.uint8), f"Found: {len(boxes)} | Inference: {elapsed:.2f}s"
    except Exception as e:
        return img_rgb, None, f"Grounded SAM Error: {e}"

def run_intelliarts_car_parts(img_rgb):
    t0 = time.time()
    try:
        import detectron2
    except ImportError:
        print("Installing detectron2... this may take a few minutes!")
        os.system('pip install git+https://github.com/facebookresearch/detectron2.git --no-build-isolation')
        
    try:
        from detectron2 import model_zoo
        from detectron2.engine import DefaultPredictor
        from detectron2.config import get_cfg
        import urllib.request
        
        model_url = "https://huggingface.co/spaces/intelliarts/Car_parts_detection/resolve/main/model_final.pth"
        model_path = "intelliarts_model_final.pth"
        if not os.path.exists(model_path):
            print("Downloading Intelliarts Car Parts weights...")
            urllib.request.urlretrieve(model_url, model_path)

        cfg = get_cfg()
        cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
        cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.45
        cfg.MODEL.ROI_HEADS.NUM_CLASSES = 19
        cfg.MODEL.WEIGHTS = model_path
        cfg.MODEL.DEVICE = DEVICE

        predictor = DefaultPredictor(cfg)
        outputs = predictor(img_rgb)
        instances = outputs["instances"].to("cpu")
        
        # Classes: 2: back_glass, 8: front_glass, 14: left_mirror, 15: right_mirror
        target_classes = [2, 8, 14, 15]
        h, w = img_rgb.shape[:2]
        combined_mask = np.zeros((h, w), dtype=bool)
        boxes_list, labels_list = [], []
        
        classes = instances.pred_classes.numpy()
        scores = instances.scores.numpy()
        boxes = instances.pred_boxes.tensor.numpy()
        masks = instances.pred_masks.numpy()
        
        class_names = ['_background_', 'back_bumper', 'back_glass', 'back_left_door', 'back_left_light', 'back_right_door', 'back_right_light', 'front_bumper', 'front_glass', 'front_left_door', 'front_left_light', 'front_right_door', 'front_right_light', 'hood', 'left_mirror', 'right_mirror', 'tailgate', 'trunk', 'wheel']
        
        for i in range(len(classes)):
            c = classes[i]
            if c in target_classes:
                combined_mask |= masks[i]
                boxes_list.append(boxes[i].tolist())
                labels_list.append(f"{class_names[c]} {scores[i]:.2f}")
                
        elapsed = time.time() - t0
        out = apply_mask_overlay(img_rgb, combined_mask, color=(50, 150, 255))
        out = draw_boxes(out, boxes_list, labels_list, color=(50, 150, 255))
        bw_mask = (combined_mask * 255).astype(np.uint8)
        
        return out, bw_mask, f"Found: {len(boxes_list)} | Inference: {elapsed:.2f}s"
    except Exception as e:
        return img_rgb, None, f"Intelliarts Detectron2 Error: {e}"

# ═══════════════════════════════════════════════════════════════════════════════
# SegFormer Function
# ═══════════════════════════════════════════════════════════════════════════════
def run_segformer(img_rgb, morph_cleanup=False):
    try:
        from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation
        import torch.nn.functional as F
        
        t0 = time.time()
        base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
        
        # Paths to try (works for both local PC and Hugging Face Cloud deployment)
        paths_to_try = [
            os.path.join(base_dir, "SegFormer_Model", "best_segformer_dice_model"), # Local PC
            "best_segformer_dice_model",                                            # Hugging Face Root
            os.path.join(os.path.dirname(__file__), "best_segformer_dice_model"),   # Next to app.py
        ]
        
        # If files were uploaded directly to the root (no folder)
        if os.path.exists("config.json"):
            paths_to_try.append(".")
        if os.path.exists(os.path.join(os.path.dirname(__file__), "config.json")):
            paths_to_try.append(os.path.dirname(__file__))
            
        model_path = None
        for p in paths_to_try:
            # For SegFormer, the path must contain config.json
            if os.path.exists(p) and os.path.exists(os.path.join(p, "config.json")):
                model_path = p
                break
                
        # Fallback
        if model_path is None:
            model_path = "best_segformer_dice_model"
            
        processor = SegformerImageProcessor.from_pretrained(model_path)
        model = SegformerForSemanticSegmentation.from_pretrained(model_path).to(DEVICE)
        
        inputs = processor(images=Image.fromarray(img_rgb), return_tensors="pt")
        inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
        
        with torch.no_grad():
            outputs = model(**inputs)
            h, w = img_rgb.shape[:2]
            logits = F.interpolate(outputs.logits, size=(h, w), mode="bilinear", align_corners=False)[0]
            
        probs = F.softmax(logits, dim=0)
        pred_mask = (probs[1] > 0.5).cpu().numpy().astype(np.uint8)
        
        # Apply morphological cleanup if requested
        if morph_cleanup:
            pred_mask = apply_morphology(pred_mask, close_k=15, open_k=7)
        
        elapsed = time.time() - t0
        morph_note = " | Morphology: ON ✅" if morph_cleanup else ""
        out = apply_mask_overlay(img_rgb, pred_mask, color=(255, 50, 50))
        bw_mask = (pred_mask * 255).astype(np.uint8)
        return out, bw_mask, f"Found: 1 (Semantic) | Inference: {elapsed:.2f}s{morph_note}"
    except Exception as e:
        return img_rgb, None, f"SegFormer Error: {e}"

# ═══════════════════════════════════════════════════════════════════════════════
# BiRefNet Function
# ═══════════════════════════════════════════════════════════════════════════════
def run_birefnet(img_rgb):
    try:
        from transformers import AutoModelForImageSegmentation
        from torchvision import transforms
        import torch.nn.functional as F
        
        t0 = time.time()
        
        base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
        
        # Paths to try (works for local PC and Hugging Face Cloud deployment)
        paths_to_try = [
            os.path.join(base_dir, "BiRefNet_Model", "best_model-20260624T051601Z-3-001", "best_model"), # Local PC
            "birefnet_model",                                                                          # Hugging Face Root / Root dir
            os.path.join(os.path.dirname(os.path.abspath(__file__)), "birefnet_model"),                # Next to app.py
            "best_birefnet_model"                                                                      # Extra fallback
        ]
        
        model_path = None
        for p in paths_to_try:
            if os.path.exists(p) and os.path.exists(os.path.join(p, "config.json")) and os.path.exists(os.path.join(p, "model.safetensors")):
                model_path = p
                break
                
        # Final fallback: Download directly from Hugging Face Model Repo!
        if model_path is None:
            model_path = "Ayesha-Majeed/birefnet_car_window" 
            
        model = AutoModelForImageSegmentation.from_pretrained(model_path, trust_remote_code=True).to(DEVICE)
        model.eval()
        
        image_transform = transforms.Compose([
            transforms.Resize((1024, 1024)),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ])
        
        from PIL import Image
        pil_img = Image.fromarray(img_rgb)
        input_tensor = image_transform(pil_img).unsqueeze(0).to(DEVICE)
        
        with torch.no_grad():
            if DEVICE == "cuda":
                with torch.amp.autocast("cuda"):
                    preds = model(input_tensor)
                    final_pred = preds[-1] if isinstance(preds, (list, tuple)) else preds
            else:
                preds = model(input_tensor)
                final_pred = preds[-1] if isinstance(preds, (list, tuple)) else preds
                
        h, w = img_rgb.shape[:2]
        final_pred = F.interpolate(final_pred, size=(h, w), mode="bilinear", align_corners=False)
        pred_mask = (torch.sigmoid(final_pred) > 0.5).squeeze().cpu().numpy().astype(np.uint8)
        
        elapsed = time.time() - t0
        out = apply_mask_overlay(img_rgb, pred_mask > 0, color=(255, 0, 0)) # Red
        bw_mask = (pred_mask * 255).astype(np.uint8)
        return out, bw_mask, f"Found: 1 (Semantic) | Inference: {elapsed:.2f}s"
    except Exception as e:
        return img_rgb, None, f"BiRefNet Error: {e}"

# ═══════════════════════════════════════════════════════════════════════════════
# Gradio Process Function
# ═══════════════════════════════════════════════════════════════════════════════
# A beautiful palette of pastel and neon colors for dynamic visualizations
PASTEL_COLORS = [
    (255, 105, 180), # Hot/Light Pink
    (180, 130, 255), # Light Purple
    (0, 215, 255),   # Light Sky Blue / Cyan
    (255, 220, 50),  # Light Yellow
    (255, 160, 50),  # Light Orange
    (150, 255, 150), # Light Mint Green
    (240, 240, 255), # Light White / Silver
]

def process_image(img_rgb, model_name, text_prompt="", morph_cleanup=False):
    if img_rgb is None: return None, None, "Please upload an image."
    
    # Pick a random color for this specific inference run
    run_color = random.choice(PASTEL_COLORS)
    
    try:
        if model_name == "YOLOv8x-seg (Custom Window)":
            return run_yolo_generic(img_rgb, "best.pt", target_classes=[0, 1], color=run_color, morph_cleanup=morph_cleanup)
        elif model_name == "YOLOv8x-seg":
            return run_yolo_generic(img_rgb, "best.pt", target_classes=[0, 1], color=(255, 215, 0), morph_cleanup=morph_cleanup)
        elif model_name == "YOLO11x-seg":
            if os.path.exists("yolo11_best.pt"):
                y11_weights = "yolo11_best.pt"
            else:
                base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
                y11_weights = os.path.join(base_dir, "runs", "segment", "runs", "car_mirror_seg", "yolo11x_seg_1024", "weights", "best.pt")
                if not os.path.exists(y11_weights):
                    y11_weights = "best.pt" # Fallback
            return run_yolo_generic(img_rgb, y11_weights, target_classes=[0, 1], color=(0, 255, 120), morph_cleanup=morph_cleanup)
        elif model_name == "SAM + YOLO (Strategy 1: Bbox + 5 Points)":
            return run_sam_strategy(img_rgb, "best.pt", target_classes=[0, 1], color=run_color, strategy=1, morph_cleanup=morph_cleanup)
        elif model_name == "SAM + YOLO (Strategy 2: Mask + 5 Points)":
            return run_sam_strategy(img_rgb, "best.pt", target_classes=[0, 1], color=run_color, strategy=2, morph_cleanup=morph_cleanup)
        elif model_name == "SAM + YOLO (Strategy 3: Direct Mask Prompting)":
            return run_sam_strategy(img_rgb, "best.pt", target_classes=[0, 1], color=run_color, strategy=3, morph_cleanup=morph_cleanup)
        elif model_name == "Mask R-CNN":
            # First check if she uploaded it directly next to app.py as "maskrcnn_best.pt"
            if os.path.exists("maskrcnn_best.pt"):
                mrcnn_weights = "maskrcnn_best.pt"
            else:
                base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
                mrcnn_weights = os.path.join(base_dir, "Mask_RCNN", "runs", "woven-sweep-5", "best.pt")
                if not os.path.exists(mrcnn_weights):
                    mrcnn_weights = "Mask_RCNN/runs/woven-sweep-5/best.pt"
            return run_mask_rcnn(img_rgb, mrcnn_weights)
        elif model_name == "Grounding DINO (Zero-Shot Detection)":
            return run_grounding_dino(img_rgb, text_prompt)
        elif model_name == "Grounded SAM (Zero-Shot Segmentation)":
            return run_grounded_sam(img_rgb, text_prompt)
        elif model_name == "Intelliarts Car Parts (Detectron2)":
            return run_intelliarts_car_parts(img_rgb)
        elif model_name == "SegFormer":
            return run_segformer(img_rgb, morph_cleanup=morph_cleanup)
        else:
            return img_rgb, None, "Model not recognized."
    except Exception as e:
        return img_rgb, None, f"Error: {str(e)}"

# ═══════════════════════════════════════════════════════════════════════════════
# Gradio UI
# ═══════════════════════════════════════════════════════════════════════════════
theme = gr.themes.Soft(primary_hue="blue", secondary_hue="indigo")

with gr.Blocks(theme=theme, title="Car Window Segmentation") as demo:
    gr.Markdown("""
    # Car Window Segmentation
    Compare your custom trained YOLOv8 model against state-of-the-art Zero-Shot models!
    """)



    # ── TAB 3: Comprehensive Evaluation ──
    with gr.Tab("Comprehensive Evaluation"):
        gr.Markdown("### Comprehensive Evaluation: Results from All Trained and Pretrained Models")
        gr.Markdown("""**The following models will run and display their results below:**

**Custom Trained Models:**

1. SegFormer
2. SegFormer + Morphological
3. YOLO11x-seg
4. YOLOv8x-seg
5. Mask R-CNN
6. BiRefNet
7. SAM + YOLO (Strategy 1: Bbox + 5 Points)
8. SAM + YOLO (Strategy 2: Mask + 5 Points)
9. SAM + YOLO (Strategy 3: Direct Mask Prompting)

**Pretrained Zero-Shot Models:**

10\. Grounding DINO

11\. Grounded SAM

12\. Intelliarts Car Parts

**Our Findings:** SegFormer and YOLO11x deliver the best performance with significantly sharper edge precision.
""")
        
        with gr.Row():
            input_image_seq = gr.Image(type="numpy", label="Upload Window Image")
        with gr.Row():
            submit_btn_seq = gr.Button("Run All Models", variant="primary", size="lg")
            stop_btn_seq = gr.Button("🛑 Stop Processing", variant="stop", size="lg")
            
        if mirror_examples:
            gr.Markdown("### Or click any example image below to load it:")
            compare_gallery = gr.Gallery(value=mirror_examples, columns=10, height=120, object_fit="cover", allow_preview=False, show_label=False)
            def load_compare_img(evt: gr.SelectData): return mirror_examples[evt.index]
            compare_gallery.select(fn=load_compare_img, inputs=None, outputs=input_image_seq)

        gr.Markdown("---")
        gr.Markdown("## 🚀 Custom Trained Models")
        
        gr.Markdown("### 1️⃣ SegFormer (Transformer)")
        with gr.Row():
            seq_segf_img = gr.Image(label="SegFormer Overlay", interactive=False)
            seq_segf_bw = gr.Image(label="SegFormer Binary Mask", interactive=False, image_mode="L")
        seq_segf_stats = gr.Textbox(label="SegFormer Stats", interactive=False)

        gr.Markdown("---")
        gr.Markdown("### 2️⃣ SegFormer + Morphological Cleanup (Holes Filled + Sharp Borders)")
        with gr.Row():
            seq_segf_morph_img = gr.Image(label="SegFormer + Morph Overlay", interactive=False)
            seq_segf_morph_bw = gr.Image(label="SegFormer + Morph Binary Mask", interactive=False, image_mode="L")
        seq_segf_morph_stats = gr.Textbox(label="SegFormer + Morph Stats", interactive=False)

        gr.Markdown("---")
        gr.Markdown("### 3️⃣ YOLO11x-seg")
        with gr.Row():
            seq_yolo11_img = gr.Image(label="YOLO11x Overlay", interactive=False)
            seq_yolo11_bw = gr.Image(label="YOLO11x Binary Mask", interactive=False, image_mode="L")
        seq_yolo11_stats = gr.Textbox(label="YOLO11x Stats", interactive=False)

        gr.Markdown("---")
        gr.Markdown("### 4️⃣ YOLOv8x-seg")
        with gr.Row():
            seq_yolo_img = gr.Image(label="YOLO Overlay", interactive=False)
            seq_yolo_bw = gr.Image(label="YOLO Binary Mask", interactive=False, image_mode="L")
        seq_yolo_stats = gr.Textbox(label="YOLO Stats", interactive=False)

        gr.Markdown("---")
        gr.Markdown("### 5️⃣ Mask R-CNN (ResNet50-FPN)")
        with gr.Row():
            seq_mrcnn_img = gr.Image(label="Mask R-CNN Overlay", interactive=False)
            seq_mrcnn_bw = gr.Image(label="Mask R-CNN Binary Mask", interactive=False, image_mode="L")
        seq_mrcnn_stats = gr.Textbox(label="Mask R-CNN Stats", interactive=False)

        gr.Markdown("---")
        gr.Markdown("### 6️⃣ BiRefNet (Boundary-Aware Model)")
        with gr.Row():
            seq_biref_img = gr.Image(label="BiRefNet Overlay", interactive=False)
            seq_biref_bw = gr.Image(label="BiRefNet Binary Mask", interactive=False, image_mode="L")
        seq_biref_stats = gr.Textbox(label="BiRefNet Stats", interactive=False)

        gr.Markdown("---")
        gr.Markdown("### 7️⃣ SAM + YOLO (Strategy 1: Bbox + 5 Points)")
        with gr.Row():
            seq_sam1_img = gr.Image(label="SAM+YOLO Strat 1 Overlay", interactive=False)
            seq_sam1_bw = gr.Image(label="SAM+YOLO Strat 1 Binary Mask", interactive=False, image_mode="L")
        seq_sam1_stats = gr.Textbox(label="SAM+YOLO Strat 1 Stats", interactive=False)

        gr.Markdown("---")
        gr.Markdown("### 8️⃣ SAM + YOLO (Strategy 2: Mask + 5 Points)")
        with gr.Row():
            seq_sam2_img = gr.Image(label="SAM+YOLO Strat 2 Overlay", interactive=False)
            seq_sam2_bw = gr.Image(label="SAM+YOLO Strat 2 Binary Mask", interactive=False, image_mode="L")
        seq_sam2_stats = gr.Textbox(label="SAM+YOLO Strat 2 Stats", interactive=False)

        gr.Markdown("---")
        gr.Markdown("### 9️⃣ SAM + YOLO (Strategy 3: Direct Mask Prompting)")
        with gr.Row():
            seq_sam3_img = gr.Image(label="SAM+YOLO Strat 3 Overlay", interactive=False)
            seq_sam3_bw = gr.Image(label="SAM+YOLO Strat 3 Binary Mask", interactive=False, image_mode="L")
        seq_sam3_stats = gr.Textbox(label="SAM+YOLO Strat 3 Stats", interactive=False)

        gr.Markdown("---")
        gr.Markdown("## 🌍 Pretrained Zero-Shot Models")

        gr.Markdown("### 🔟 Grounding DINO (Zero-Shot Detection)")
        with gr.Row():
            seq_dino_img = gr.Image(label="Grounding DINO Overlay", interactive=False)
            seq_dino_bw = gr.Image(label="Grounding DINO Binary Mask", interactive=False, image_mode="L")
        seq_dino_stats = gr.Textbox(label="Grounding DINO Stats", interactive=False)

        gr.Markdown("---")
        gr.Markdown("### 1️⃣1️⃣ Grounded SAM (Zero-Shot Segmentation)")
        with gr.Row():
            seq_gsam_img = gr.Image(label="Grounded SAM Overlay", interactive=False)
            seq_gsam_bw = gr.Image(label="Grounded SAM Binary Mask", interactive=False, image_mode="L")
        seq_gsam_stats = gr.Textbox(label="Grounded SAM Stats", interactive=False)

        gr.Markdown("---")
        gr.Markdown("### 1️⃣2️⃣ Intelliarts Car Parts (Detectron2)")
        with gr.Row():
            seq_intell_img = gr.Image(label="Intelliarts Car Parts Overlay", interactive=False)
            seq_intell_bw = gr.Image(label="Intelliarts Car Parts Binary Mask", interactive=False, image_mode="L")
        seq_intell_stats = gr.Textbox(label="Intelliarts Car Parts Stats", interactive=False)

        def run_all_models(img):
            if img is None: 
                yield tuple([None]*36)
                return
            
            # ── Step 0: Show "Processing..." in ALL textboxes immediately ──
            PENDING = "⏳ Processing..."
            results = [None] * 36
            # Set all stats textboxes to pending state
            for i in [2, 5, 8, 11, 14, 17, 20, 23, 26, 29, 32, 35]:
                results[i] = PENDING
            yield tuple(results)
            
            # 1. SegFormer
            results[0], results[1], results[2] = run_segformer(img, morph_cleanup=False)
            yield tuple(results)
            
            # 2. SegFormer + Morphology
            results[3], results[4], results[5] = run_segformer(img, morph_cleanup=True)
            yield tuple(results)
            
            # 3. YOLO11x-seg
            results[6], results[7], results[8] = process_image(img, "YOLO11x-seg", "", False)
            yield tuple(results)
            
            # 4. YOLOv8x-seg
            results[9], results[10], results[11] = process_image(img, "YOLOv8x-seg", "", False)
            yield tuple(results)
            
            # 5. Mask R-CNN
            results[12], results[13], results[14] = process_image(img, "Mask R-CNN", "", False)
            yield tuple(results)
            
            # 6. BiRefNet
            results[15], results[16], results[17] = run_birefnet(img)
            yield tuple(results)
            
            # 7. SAM + YOLO Strat 1
            results[18], results[19], results[20] = process_image(img, "SAM + YOLO (Strategy 1: Bbox + 5 Points)", "", False)
            yield tuple(results)
            
            # 8. SAM + YOLO Strat 2
            results[21], results[22], results[23] = process_image(img, "SAM + YOLO (Strategy 2: Mask + 5 Points)", "", False)
            yield tuple(results)
            
            # 9. SAM + YOLO Strat 3
            results[24], results[25], results[26] = process_image(img, "SAM + YOLO (Strategy 3: Direct Mask Prompting)", "", False)
            yield tuple(results)
            
            # 10. Grounding DINO
            results[27], results[28], results[29] = process_image(img, "Grounding DINO (Zero-Shot Detection)", "car window. car glass. windshield.", False)
            yield tuple(results)
            
            # 11. Grounded SAM
            results[30], results[31], results[32] = process_image(img, "Grounded SAM (Zero-Shot Segmentation)", "car window. car glass. windshield.", False)
            yield tuple(results)
            
            # 12. Intelliarts
            results[33], results[34], results[35] = process_image(img, "Intelliarts Car Parts (Detectron2)", "", False)
            yield tuple(results)

        run_event = submit_btn_seq.click(
            fn=run_all_models,
            inputs=[input_image_seq],
            outputs=[seq_segf_img, seq_segf_bw, seq_segf_stats,
                     seq_segf_morph_img, seq_segf_morph_bw, seq_segf_morph_stats,
                     seq_yolo11_img, seq_yolo11_bw, seq_yolo11_stats,
                     seq_yolo_img, seq_yolo_bw, seq_yolo_stats, 
                     seq_mrcnn_img, seq_mrcnn_bw, seq_mrcnn_stats, 
                     seq_biref_img, seq_biref_bw, seq_biref_stats,
                     seq_sam1_img, seq_sam1_bw, seq_sam1_stats,
                     seq_sam2_img, seq_sam2_bw, seq_sam2_stats,
                     seq_sam3_img, seq_sam3_bw, seq_sam3_stats,
                     seq_dino_img, seq_dino_bw, seq_dino_stats,
                     seq_gsam_img, seq_gsam_bw, seq_gsam_stats,
                     seq_intell_img, seq_intell_bw, seq_intell_stats]
        )
        
        stop_btn_seq.click(fn=None, inputs=None, outputs=None, cancels=[run_event])

if __name__ == "__main__":
    demo.launch()