import gradio as gr
import numpy as np
import cv2
import time
import torch
import warnings
import os
import zipfile
from PIL import Image
import random

warnings.filterwarnings("ignore")

# ═══════════════════════════════════════════════════════════════════════════════
# STEP 1: Extract any .zip files in current directory
# ═══════════════════════════════════════════════════════════════════════════════
print("=" * 60)
print(f"[STARTUP] Working dir: {os.getcwd()}")
for f in os.listdir("."):
    if f.endswith(".zip"):
        try:
            with zipfile.ZipFile(f, 'r') as zf:
                zf.extractall(".")
                print(f"[ZIP] Extracted {f} OK!")
        except Exception as e:
            print(f"[ZIP] ERROR: {e}")

# ═══════════════════════════════════════════════════════════════════════════════
# STEP 2: Copy images to root
# ═══════════════════════════════════════════════════════════════════════════════
def prepare_clean_examples(src_folder, prefix, limit=10):
    results = []
    if not os.path.exists(src_folder): return results
    count = 0
    for root, dirs, files in os.walk(src_folder):
        for fname in sorted(files):
            if not fname.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp', '.webp')):
                continue
            src_path = os.path.join(root, fname)
            dst_name = f"{prefix}_{count}.jpg"
            try:
                import shutil
                shutil.copy2(src_path, dst_name)
                results.append(dst_name)
                count += 1
                if count >= limit: break
            except Exception as e: 
                print(f"Error copying {src_path}: {e}")
        if count >= limit: break
    return results

mirror_examples = []
for folder in ["test car windows", "test_car_windows", "test car windows segmentation"]:
    if os.path.exists(folder):
        mirror_examples = prepare_clean_examples(folder, "mirror", limit=15)
        break
if not mirror_examples and os.path.exists("car.jpeg"):
    mirror_examples = ["car.jpeg"]

# ═══════════════════════════════════════════════════════════════════════════════
# Global Settings
# ═══════════════════════════════════════════════════════════════════════════════
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
CONF   = 0.45

def apply_mask_overlay(img_rgb, mask_bool, color=(0, 215, 255), alpha=0.4):
    # 1. Darken the background (50% brightness, no blur)
    dark_bg = cv2.addWeighted(img_rgb, 0.5, np.zeros_like(img_rgb), 0.5, 0)
    
    # 2. For the mask area, keep original brightness and tint it
    tinted_sharp = img_rgb.copy()
    tinted_sharp[mask_bool] = color
    tinted_sharp = cv2.addWeighted(tinted_sharp, alpha, img_rgb, 1 - alpha, 0)
    
    # 3. Find and draw the boundary edge strictly inside the mask
    mask_img = (mask_bool * 255).astype(np.uint8)
    contours, _ = cv2.findContours(mask_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Draw contour on the tinted image (before blending)
    cv2.drawContours(tinted_sharp, contours, -1, color, 2, cv2.LINE_AA)
    
    # 4. Combine: Dark background outside, Bright tinted object + boundary inside
    blended = np.where(mask_bool[:, :, None], tinted_sharp, dark_bg)
    
    return blended

def draw_boxes(img_rgb, boxes, labels, color=(0, 215, 255)):
    out = img_rgb.copy()
    for box, label in zip(boxes, labels):
        x1, y1, x2, y2 = map(int, box)
        
        # Faint inner bounding box line
        cv2.rectangle(out, (x1, y1), (x2, y2), color, 1)
        
        # HUD-Style Corner Brackets
        length = int(min(x2 - x1, y2 - y1) * 0.15)
        thick = 3
        
        # Top-Left
        cv2.line(out, (x1, y1), (x1 + length, y1), color, thick, cv2.LINE_AA)
        cv2.line(out, (x1, y1), (x1, y1 + length), color, thick, cv2.LINE_AA)
        # Top-Right
        cv2.line(out, (x2, y1), (x2 - length, y1), color, thick, cv2.LINE_AA)
        cv2.line(out, (x2, y1), (x2, y1 + length), color, thick, cv2.LINE_AA)
        # Bottom-Left
        cv2.line(out, (x1, y2), (x1 + length, y2), color, thick, cv2.LINE_AA)
        cv2.line(out, (x1, y2), (x1, y2 - length), color, thick, cv2.LINE_AA)
        # Bottom-Right
        cv2.line(out, (x2, y2), (x2 - length, y2), color, thick, cv2.LINE_AA)
        cv2.line(out, (x2, y2), (x2, y2 - length), color, thick, cv2.LINE_AA)
        
        # Text labels have been removed to prevent obstructing the view of the segmentation masks.
    return out

# ═══════════════════════════════════════════════════════════════════════════════
# Morphological post-processing helper
# ═══════════════════════════════════════════════════════════════════════════════
def apply_morphology(mask_uint8, close_k=15, open_k=7):
    """Fill holes (Closing) then remove tiny blobs (Opening) on a binary mask."""
    close_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (close_k, close_k))
    open_kernel  = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (open_k,  open_k))
    closed = cv2.morphologyEx(mask_uint8, cv2.MORPH_CLOSE, close_kernel)  # fill holes
    opened = cv2.morphologyEx(closed,     cv2.MORPH_OPEN,  open_kernel)   # remove noise
    return opened

# ═══════════════════════════════════════════════════════════════════════════════
# Model Functions
# ═══════════════════════════════════════════════════════════════════════════════
def run_yolo_generic(img_rgb, model_path, target_classes, color, morph_cleanup=False):
    from ultralytics import YOLO
    t0 = time.time()
    model = YOLO(model_path)
    # Use retina_masks=True to get pixel-perfect masks at the original image resolution
    results = model(img_rgb, conf=CONF, verbose=False, retina_masks=True)
    elapsed = time.time() - t0

    result = results[0]
    h, w = img_rgb.shape[:2]
    combined_mask = np.zeros((h, w), dtype=np.uint8)
    boxes, labels = [], []

    if result.masks is not None:
        for mask, box, cls, conf in zip(
            result.masks.data, result.boxes.xyxy,
            result.boxes.cls, result.boxes.conf
        ):
            if int(cls) not in target_classes:
                continue
            
            # Since retina_masks=True, mask is already (h, w). Just threshold it.
            mask_np = mask.cpu().numpy().astype(np.uint8)
            # Optional per-instance morphological cleanup before combining
            if morph_cleanup:
                mask_np = apply_morphology(mask_np)
            combined_mask |= mask_np
                
            boxes.append(box.cpu().tolist())
            labels.append(f"glass {conf:.2f}")

    # We purposely do NOT apply morphology on the final combined_mask here,
    # otherwise it will bridge the gaps (pillars) between separate windows!

    combined_mask_bool = combined_mask > 0
    morph_note = " | Morphology: ON ✅" if morph_cleanup else ""
    out = apply_mask_overlay(img_rgb, combined_mask_bool, color=color)
    out = draw_boxes(out, boxes, labels, color=color)
    bw_mask = (combined_mask * 255).astype(np.uint8)
    return out, bw_mask, f"Found: {len(boxes)} | Inference Time: {elapsed:.2f}s{morph_note}"

def run_sam_strategy(img_rgb, yolo_model_path, target_classes, color, strategy, morph_cleanup=False):
    try:
        from segment_anything import sam_model_registry, SamPredictor
        import urllib.request

        CKPT = "sam_vit_b_01ec64.pth"
        URL  = "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth"
        if not os.path.exists(CKPT): urllib.request.urlretrieve(URL, CKPT)

        t0  = time.time()
        sam = sam_model_registry["vit_b"](checkpoint=CKPT).to(DEVICE)
        predictor = SamPredictor(sam)
        predictor.set_image(img_rgb)

        from ultralytics import YOLO as _YOLO
        yolo_res = _YOLO(yolo_model_path)(img_rgb, conf=CONF, verbose=False, retina_masks=True)[0]

        h, w = img_rgb.shape[:2]
        combined_mask = np.zeros((h, w), dtype=bool)
        boxes_list, labels = [], []

        if yolo_res.boxes is not None and yolo_res.masks is not None:
            for box, mask_data, cls, conf in zip(yolo_res.boxes.xyxy, yolo_res.masks.data, yolo_res.boxes.cls, yolo_res.boxes.conf):
                if int(cls) not in target_classes: continue
                box_np = box.cpu().numpy()
                yolo_mask = mask_data.cpu().numpy() > 0.5
                
                if strategy == 1:
                    # Strategy 1: Bbox + 5 Points
                    x1, y1, x2, y2 = map(int, box_np)
                    cx, cy = (x1+x2)//2, (y1+y2)//2
                    pts = [[cx, cy], [x1+5, y1+5], [x2-5, y1+5], [x1+5, y2-5], [x2-5, y2-5]]
                    pts_np = np.array(pts)
                    labels_np = np.ones(len(pts))
                    masks_sam, _, _ = predictor.predict(box=box_np, point_coords=pts_np, point_labels=labels_np, multimask_output=False)
                    sam_mask = masks_sam[0]
                elif strategy == 2:
                    # Strategy 2: Mask + 5 Points
                    y_coords, x_coords = np.where(yolo_mask)
                    if len(x_coords) == 0: continue
                    cx, cy = int(np.mean(x_coords)), int(np.mean(y_coords))
                    idx_top, idx_bot = np.argmin(y_coords), np.argmax(y_coords)
                    idx_lft, idx_rgt = np.argmin(x_coords), np.argmax(x_coords)
                    def get_mid(x_1, y_1, x_2, y_2, f=0.6): 
                        return int(x_1 + (x_2-x_1)*f), int(y_1 + (y_2-y_1)*f)
                    pts = []
                    if yolo_mask[cy, cx]: pts.append([cx, cy])
                    else: pts.append([x_coords[len(x_coords)//2], y_coords[len(y_coords)//2]])
                    for idx in [idx_top, idx_bot, idx_lft, idx_rgt]:
                        px, py = get_mid(cx, cy, x_coords[idx], y_coords[idx])
                        if 0 <= py < h and 0 <= px < w and yolo_mask[py, px]: pts.append([px, py])
                        else: pts.append(pts[0])
                    pts_np = np.array(pts)
                    labels_np = np.ones(len(pts))
                    masks_sam, _, _ = predictor.predict(box=box_np, point_coords=pts_np, point_labels=labels_np, multimask_output=False)
                    sam_mask = masks_sam[0]
                elif strategy == 3:
                    # Strategy 3: Direct Mask Prompting
                    yolo_mask_resized = cv2.resize((yolo_mask).astype(np.float32), (256, 256), interpolation=cv2.INTER_NEAREST)
                    mask_input = np.zeros((1, 256, 256), dtype=np.float32)
                    mask_input[0] = np.where(yolo_mask_resized > 0.5, 30.0, -30.0)
                    masks_sam, _, _ = predictor.predict(box=box_np, mask_input=mask_input, multimask_output=False)
                    
                    raw_mask = (masks_sam[0].astype(np.uint8) * 255)
                    contours, _ = cv2.findContours(raw_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
                    filled_mask = np.zeros_like(raw_mask)
                    cv2.drawContours(filled_mask, contours, -1, 255, cv2.FILLED)
                    sam_mask = (filled_mask > 0)
                else:
                    sam_mask = np.zeros((h, w), dtype=bool)

                sam_mask_uint = sam_mask.astype(np.uint8)
                if morph_cleanup:
                    sam_mask_uint = apply_morphology(sam_mask_uint)
                combined_mask |= sam_mask_uint.astype(bool)
                boxes_list.append(box_np.tolist())
                labels.append(f"glass {conf:.2f}")

        elapsed = time.time() - t0
        morph_note = " | Morphology: ON ✅" if morph_cleanup else ""
        out = apply_mask_overlay(img_rgb, combined_mask, color=color)
        out = draw_boxes(out, boxes_list, labels, color=color)
        return out, (combined_mask * 255).astype(np.uint8), f"Found: {len(boxes_list)} | Strategy: {strategy} | Inference: {elapsed:.2f}s{morph_note}"
    except ImportError:
        return img_rgb, None, "Error: segment-anything not installed"

def run_mask_rcnn(img_rgb, weights_path):
    t0 = time.time()
    try:
        from torchvision.models.detection import maskrcnn_resnet50_fpn_v2
        from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
        from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
        import torchvision.transforms.v2 as T
        
        model = maskrcnn_resnet50_fpn_v2(weights=None)
        in_features = model.roi_heads.box_predictor.cls_score.in_features
        model.roi_heads.box_predictor = FastRCNNPredictor(in_features, 2)
        in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
        model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask, 256, 2)
        
        checkpoint = torch.load(weights_path, map_location=DEVICE, weights_only=False)
        if isinstance(checkpoint, dict) and "model_state_dict" in checkpoint:
            model.load_state_dict(checkpoint["model_state_dict"])
        else:
            model.load_state_dict(checkpoint)
        
        model.to(DEVICE)
        model.eval()
        
        img_tensor = T.ToTensor()(Image.fromarray(img_rgb)).to(DEVICE)
        with torch.no_grad():
            outputs = model([img_tensor])[0]
            
        h, w = img_rgb.shape[:2]
        pred_mask = np.zeros((h, w), dtype=bool)
        boxes_list, labels_list = [], []
        
        for score, mask, box, cls in zip(outputs['scores'], outputs['masks'], outputs['boxes'], outputs['labels']):
            if score > 0.45:
                m = (mask[0].cpu().numpy() > 0.5)
                pred_mask |= m
                boxes_list.append(box.cpu().numpy().tolist())
                labels_list.append(f"glass {score:.2f}")
                
        elapsed = time.time() - t0
        out = apply_mask_overlay(img_rgb, pred_mask, color=(255, 165, 0))
        out = draw_boxes(out, boxes_list, labels_list, color=(255, 165, 0))
        bw_mask = (pred_mask * 255).astype(np.uint8)
        
        return out, bw_mask, f"Found: {len(boxes_list)} | Inference: {elapsed:.2f}s"
    except Exception as e:
        return img_rgb, None, f"Mask R-CNN Error: {e}"

def run_grounding_dino(img_rgb, text_prompt):
    try:
        from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection
        t0 = time.time()
        model_id = "IDEA-Research/grounding-dino-tiny"
        processor = AutoProcessor.from_pretrained(model_id)
        model = AutoModelForZeroShotObjectDetection.from_pretrained(model_id).to(DEVICE)
        
        inputs = processor(images=img_rgb, text=text_prompt, return_tensors="pt").to(DEVICE)
        with torch.no_grad():
            outputs = model(**inputs)
        
        h, w = img_rgb.shape[:2]
        results = processor.post_process_grounded_object_detection(
            outputs, inputs.input_ids, text_threshold=0.25, target_sizes=[(h, w)]
        )[0]
        
        boxes = results["boxes"].cpu().numpy().tolist()
        scores = results["scores"].cpu().numpy().tolist()
        labels = results["labels"]
        
        elapsed = time.time() - t0
        bw_mask = np.zeros((h, w), dtype=np.uint8) # DINO is boxes only
        str_labels = [f"{lbl} {scr:.2f}" for lbl, scr in zip(labels, scores)]
        out = draw_boxes(img_rgb.copy(), boxes, str_labels, color=(255, 100, 50))
        return out, bw_mask, f"Found: {len(boxes)} | Inference Time: {elapsed:.2f}s"
    except Exception as e:
        return img_rgb, None, f"Grounding DINO Error: {e}\n(Need transformers>=4.35)"

def run_grounded_sam(img_rgb, text_prompt):
    try:
        from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection
        from segment_anything import sam_model_registry, SamPredictor
        import urllib.request
        
        t0 = time.time()
        
        # 1. DINO Detection
        dino_id = "IDEA-Research/grounding-dino-tiny"
        processor = AutoProcessor.from_pretrained(dino_id)
        dino_model = AutoModelForZeroShotObjectDetection.from_pretrained(dino_id).to(DEVICE)
        inputs = processor(images=img_rgb, text=text_prompt, return_tensors="pt").to(DEVICE)
        with torch.no_grad():
            outputs = dino_model(**inputs)
        
        h, w = img_rgb.shape[:2]
        dino_res = processor.post_process_grounded_object_detection(
            outputs, inputs.input_ids, text_threshold=0.25, target_sizes=[(h, w)]
        )[0]
        boxes = dino_res["boxes"].cpu().numpy()
        scores = dino_res["scores"].cpu().numpy()
        labels_txt = dino_res["labels"]
        
        # 2. SAM Segmentation
        CKPT = "sam_vit_b_01ec64.pth"
        URL  = "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth"
        if not os.path.exists(CKPT): urllib.request.urlretrieve(URL, CKPT)
        
        sam = sam_model_registry["vit_b"](checkpoint=CKPT).to(DEVICE)
        predictor = SamPredictor(sam)
        predictor.set_image(img_rgb)
        
        combined_mask = np.zeros((h, w), dtype=bool)
        str_labels = []
        
        if len(boxes) > 0:
            for box, score, label in zip(boxes, scores, labels_txt):
                masks, _, _ = predictor.predict(box=box, multimask_output=False)
                combined_mask |= masks[0]
                str_labels.append(f"{label} {score:.2f}")
                
        elapsed = time.time() - t0
        out = apply_mask_overlay(img_rgb, combined_mask, color=(255, 80, 160))
        out = draw_boxes(out, boxes.tolist(), str_labels, color=(255, 80, 160))
        return out, (combined_mask * 255).astype(np.uint8), f"Found: {len(boxes)} | Inference: {elapsed:.2f}s"
    except Exception as e:
        return img_rgb, None, f"Grounded SAM Error: {e}"

def run_intelliarts_car_parts(img_rgb):
    t0 = time.time()
    try:
        import detectron2
    except ImportError:
        print("Installing detectron2... this may take a few minutes!")
        os.system('pip install git+https://github.com/facebookresearch/detectron2.git --no-build-isolation')
        
    try:
        from detectron2 import model_zoo
        from detectron2.engine import DefaultPredictor
        from detectron2.config import get_cfg
        import urllib.request
        
        model_url = "https://huggingface.co/spaces/intelliarts/Car_parts_detection/resolve/main/model_final.pth"
        model_path = "intelliarts_model_final.pth"
        if not os.path.exists(model_path):
            print("Downloading Intelliarts Car Parts weights...")
            urllib.request.urlretrieve(model_url, model_path)

        cfg = get_cfg()
        cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
        cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.45
        cfg.MODEL.ROI_HEADS.NUM_CLASSES = 19
        cfg.MODEL.WEIGHTS = model_path
        cfg.MODEL.DEVICE = DEVICE

        predictor = DefaultPredictor(cfg)
        outputs = predictor(img_rgb)
        instances = outputs["instances"].to("cpu")
        
        # Classes: 2: back_glass, 8: front_glass, 14: left_mirror, 15: right_mirror
        target_classes = [2, 8, 14, 15]
        h, w = img_rgb.shape[:2]
        combined_mask = np.zeros((h, w), dtype=bool)
        boxes_list, labels_list = [], []
        
        classes = instances.pred_classes.numpy()
        scores = instances.scores.numpy()
        boxes = instances.pred_boxes.tensor.numpy()
        masks = instances.pred_masks.numpy()
        
        class_names = ['_background_', 'back_bumper', 'back_glass', 'back_left_door', 'back_left_light', 'back_right_door', 'back_right_light', 'front_bumper', 'front_glass', 'front_left_door', 'front_left_light', 'front_right_door', 'front_right_light', 'hood', 'left_mirror', 'right_mirror', 'tailgate', 'trunk', 'wheel']
        
        for i in range(len(classes)):
            c = classes[i]
            if c in target_classes:
                combined_mask |= masks[i]
                boxes_list.append(boxes[i].tolist())
                labels_list.append(f"{class_names[c]} {scores[i]:.2f}")
                
        elapsed = time.time() - t0
        out = apply_mask_overlay(img_rgb, combined_mask, color=(50, 150, 255))
        out = draw_boxes(out, boxes_list, labels_list, color=(50, 150, 255))
        bw_mask = (combined_mask * 255).astype(np.uint8)
        
        return out, bw_mask, f"Found: {len(boxes_list)} | Inference: {elapsed:.2f}s"
    except Exception as e:
        return img_rgb, None, f"Intelliarts Detectron2 Error: {e}"

# ═══════════════════════════════════════════════════════════════════════════════
# SegFormer Function
# ═══════════════════════════════════════════════════════════════════════════════
def run_segformer(img_rgb, morph_cleanup=False):
    try:
        from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation
        import torch.nn.functional as F
        
        t0 = time.time()
        base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
        
        # Paths to try (works for both local PC and Hugging Face Cloud deployment)
        paths_to_try = [
            os.path.join(base_dir, "SegFormer_Model", "best_segformer_dice_model"), # Local PC
            "best_segformer_dice_model",                                            # Hugging Face Root
            os.path.join(os.path.dirname(__file__), "best_segformer_dice_model"),   # Next to app.py
        ]
        
        # If files were uploaded directly to the root (no folder)
        if os.path.exists("config.json"):
            paths_to_try.append(".")
        if os.path.exists(os.path.join(os.path.dirname(__file__), "config.json")):
            paths_to_try.append(os.path.dirname(__file__))
            
        model_path = None
        for p in paths_to_try:
            # For SegFormer, the path must contain config.json
            if os.path.exists(p) and os.path.exists(os.path.join(p, "config.json")):
                model_path = p
                break
                
        # Fallback
        if model_path is None:
            model_path = "best_segformer_dice_model"
            
        processor = SegformerImageProcessor.from_pretrained(model_path)
        model = SegformerForSemanticSegmentation.from_pretrained(model_path).to(DEVICE)
        
        inputs = processor(images=Image.fromarray(img_rgb), return_tensors="pt")
        inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
        
        with torch.no_grad():
            outputs = model(**inputs)
            h, w = img_rgb.shape[:2]
            logits = F.interpolate(outputs.logits, size=(h, w), mode="bilinear", align_corners=False)[0]
            
        probs = F.softmax(logits, dim=0)
        pred_mask = (probs[1] > 0.5).cpu().numpy().astype(np.uint8)
        
        # Apply morphological cleanup if requested
        if morph_cleanup:
            pred_mask = apply_morphology(pred_mask, close_k=15, open_k=7)
        
        elapsed = time.time() - t0
        morph_note = " | Morphology: ON ✅" if morph_cleanup else ""
        out = apply_mask_overlay(img_rgb, pred_mask, color=(255, 50, 50))
        bw_mask = (pred_mask * 255).astype(np.uint8)
        return out, bw_mask, f"Found: 1 (Semantic) | Inference: {elapsed:.2f}s{morph_note}"
    except Exception as e:
        return img_rgb, None, f"SegFormer Error: {e}"

# ═══════════════════════════════════════════════════════════════════════════════
# BiRefNet Function
# ═══════════════════════════════════════════════════════════════════════════════
def run_birefnet(img_rgb):
    try:
        from transformers import AutoModelForImageSegmentation
        from torchvision import transforms
        import torch.nn.functional as F
        
        t0 = time.time()
        
        base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
        
        # Paths to try (works for local PC and Hugging Face Cloud deployment)
        paths_to_try = [
            os.path.join(base_dir, "BiRefNet_Model", "best_model-20260624T051601Z-3-001", "best_model"), # Local PC
            "birefnet_model",                                                                          # Hugging Face Root / Root dir
            os.path.join(os.path.dirname(os.path.abspath(__file__)), "birefnet_model"),                # Next to app.py
            "best_birefnet_model"                                                                      # Extra fallback
        ]
        
        model_path = None
        for p in paths_to_try:
            if os.path.exists(p) and os.path.exists(os.path.join(p, "config.json")) and os.path.exists(os.path.join(p, "model.safetensors")):
                model_path = p
                break
                
        # Final fallback: Download directly from Hugging Face Model Repo!
        if model_path is None:
            model_path = "Ayesha-Majeed/birefnet_car_window" 
            
        model = AutoModelForImageSegmentation.from_pretrained(model_path, trust_remote_code=True).to(DEVICE)
        model.eval()
        
        image_transform = transforms.Compose([
            transforms.Resize((1024, 1024)),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ])
        
        from PIL import Image
        pil_img = Image.fromarray(img_rgb)
        input_tensor = image_transform(pil_img).unsqueeze(0).to(DEVICE)
        
        with torch.no_grad():
            if DEVICE == "cuda":
                with torch.amp.autocast("cuda"):
                    preds = model(input_tensor)
                    final_pred = preds[-1] if isinstance(preds, (list, tuple)) else preds
            else:
                preds = model(input_tensor)
                final_pred = preds[-1] if isinstance(preds, (list, tuple)) else preds
                
        h, w = img_rgb.shape[:2]
        final_pred = F.interpolate(final_pred, size=(h, w), mode="bilinear", align_corners=False)
        pred_mask = (torch.sigmoid(final_pred) > 0.5).squeeze().cpu().numpy().astype(np.uint8)
        
        elapsed = time.time() - t0
        out = apply_mask_overlay(img_rgb, pred_mask > 0, color=(255, 0, 0)) # Red
        bw_mask = (pred_mask * 255).astype(np.uint8)
        return out, bw_mask, f"Found: 1 (Semantic) | Inference: {elapsed:.2f}s"
    except Exception as e:
        return img_rgb, None, f"BiRefNet Error: {e}"

# ═══════════════════════════════════════════════════════════════════════════════
# Gradio Process Function
# ═══════════════════════════════════════════════════════════════════════════════
# A beautiful palette of pastel and neon colors for dynamic visualizations
PASTEL_COLORS = [
    (255, 105, 180), # Hot/Light Pink
    (180, 130, 255), # Light Purple
    (0, 215, 255),   # Light Sky Blue / Cyan
    (255, 220, 50),  # Light Yellow
    (255, 160, 50),  # Light Orange
    (150, 255, 150), # Light Mint Green
    (240, 240, 255), # Light White / Silver
]

def process_image(img_rgb, model_name, text_prompt="", morph_cleanup=False):
    if img_rgb is None: return None, None, "Please upload an image."
    
    # Pick a random color for this specific inference run
    run_color = random.choice(PASTEL_COLORS)
    
    try:
        if model_name == "YOLOv8x-seg (Custom Window)":
            return run_yolo_generic(img_rgb, "best.pt", target_classes=[0, 1], color=run_color, morph_cleanup=morph_cleanup)
        elif model_name == "YOLOv8x-seg":
            return run_yolo_generic(img_rgb, "best.pt", target_classes=[0, 1], color=(255, 215, 0), morph_cleanup=morph_cleanup)
        elif model_name == "YOLO11x-seg":
            if os.path.exists("yolo11_best.pt"):
                y11_weights = "yolo11_best.pt"
            else:
                base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
                y11_weights = os.path.join(base_dir, "runs", "segment", "runs", "car_mirror_seg", "yolo11x_seg_1024", "weights", "best.pt")
                if not os.path.exists(y11_weights):
                    y11_weights = "best.pt" # Fallback
            return run_yolo_generic(img_rgb, y11_weights, target_classes=[0, 1], color=(0, 255, 120), morph_cleanup=morph_cleanup)
        elif model_name == "SAM + YOLO (Strategy 1: Bbox + 5 Points)":
            return run_sam_strategy(img_rgb, "best.pt", target_classes=[0, 1], color=run_color, strategy=1, morph_cleanup=morph_cleanup)
        elif model_name == "SAM + YOLO (Strategy 2: Mask + 5 Points)":
            return run_sam_strategy(img_rgb, "best.pt", target_classes=[0, 1], color=run_color, strategy=2, morph_cleanup=morph_cleanup)
        elif model_name == "SAM + YOLO (Strategy 3: Direct Mask Prompting)":
            return run_sam_strategy(img_rgb, "best.pt", target_classes=[0, 1], color=run_color, strategy=3, morph_cleanup=morph_cleanup)
        elif model_name == "Mask R-CNN":
            # First check if she uploaded it directly next to app.py as "maskrcnn_best.pt"
            if os.path.exists("maskrcnn_best.pt"):
                mrcnn_weights = "maskrcnn_best.pt"
            else:
                base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
                mrcnn_weights = os.path.join(base_dir, "Mask_RCNN", "runs", "woven-sweep-5", "best.pt")
                if not os.path.exists(mrcnn_weights):
                    mrcnn_weights = "Mask_RCNN/runs/woven-sweep-5/best.pt"
            return run_mask_rcnn(img_rgb, mrcnn_weights)
        elif model_name == "Grounding DINO (Zero-Shot Detection)":
            return run_grounding_dino(img_rgb, text_prompt)
        elif model_name == "Grounded SAM (Zero-Shot Segmentation)":
            return run_grounded_sam(img_rgb, text_prompt)
        elif model_name == "Intelliarts Car Parts (Detectron2)":
            return run_intelliarts_car_parts(img_rgb)
        elif model_name == "SegFormer":
            return run_segformer(img_rgb, morph_cleanup=morph_cleanup)
        else:
            return img_rgb, None, "Model not recognized."
    except Exception as e:
        return img_rgb, None, f"Error: {str(e)}"

# ═══════════════════════════════════════════════════════════════════════════════
# Gradio UI
# ═══════════════════════════════════════════════════════════════════════════════
theme = gr.themes.Soft(primary_hue="blue", secondary_hue="indigo")

with gr.Blocks(theme=theme, title="Car Window Segmentation") as demo:
    gr.Markdown("""
    # Car Window Segmentation
    Compare your custom trained YOLOv8 model against state-of-the-art Zero-Shot models!
    """)


    # ── TAB 3: Comprehensive Evaluation ──
    with gr.Tab("Comprehensive Evaluation"):
        gr.Markdown("### Comprehensive Evaluation: Results from All Trained and Pretrained Models")
        gr.Markdown("""**The following models will run and display their results below:**

**Custom Trained Models:**

1. SegFormer
2. SegFormer + Morphological
3. YOLO11x-seg
4. YOLOv8x-seg
5. Mask R-CNN
6. BiRefNet
7. SAM + YOLO (Strategy 1: Bbox + 5 Points)
8. SAM + YOLO (Strategy 2: Mask + 5 Points)
9. SAM + YOLO (Strategy 3: Direct Mask Prompting)

**Pretrained Zero-Shot Models:**

10\. Grounding DINO

11\. Grounded SAM

12\. Intelliarts Car Parts

**Our Findings:** SegFormer and YOLO11x deliver the best performance with significantly sharper edge precision.
""")
        
        with gr.Row():
            input_image_seq = gr.Image(type="numpy", label="Upload Window Image")
        with gr.Row():
            submit_btn_seq = gr.Button("Run All Models", variant="primary", size="lg")
            stop_btn_seq = gr.Button("🛑 Stop Processing", variant="stop", size="lg")
            
        if mirror_examples:
            gr.Markdown("### Or click any example image below to load it:")
            compare_gallery = gr.Gallery(value=mirror_examples, columns=10, height=120, object_fit="cover", allow_preview=False, show_label=False)
            def load_compare_img(evt: gr.SelectData): return mirror_examples[evt.index]
            compare_gallery.select(fn=load_compare_img, inputs=None, outputs=input_image_seq)

        gr.Markdown("---")
        gr.Markdown("## 🚀 Custom Trained Models")
        
        gr.Markdown("### 1️⃣ SegFormer (Transformer)")
        with gr.Row():
            seq_segf_img = gr.Image(label="SegFormer Overlay", interactive=False)
            seq_segf_bw = gr.Image(label="SegFormer Binary Mask", interactive=False, image_mode="L")
        seq_segf_stats = gr.Textbox(label="SegFormer Stats", interactive=False)

        gr.Markdown("---")
        gr.Markdown("### 2️⃣ SegFormer + Morphological Cleanup (Holes Filled + Sharp Borders)")
        with gr.Row():
            seq_segf_morph_img = gr.Image(label="SegFormer + Morph Overlay", interactive=False)
            seq_segf_morph_bw = gr.Image(label="SegFormer + Morph Binary Mask", interactive=False, image_mode="L")
        seq_segf_morph_stats = gr.Textbox(label="SegFormer + Morph Stats", interactive=False)

        gr.Markdown("---")
        gr.Markdown("### 3️⃣ YOLO11x-seg")
        with gr.Row():
            seq_yolo11_img = gr.Image(label="YOLO11x Overlay", interactive=False)
            seq_yolo11_bw = gr.Image(label="YOLO11x Binary Mask", interactive=False, image_mode="L")
        seq_yolo11_stats = gr.Textbox(label="YOLO11x Stats", interactive=False)

        gr.Markdown("---")
        gr.Markdown("### 4️⃣ YOLOv8x-seg")
        with gr.Row():
            seq_yolo_img = gr.Image(label="YOLO Overlay", interactive=False)
            seq_yolo_bw = gr.Image(label="YOLO Binary Mask", interactive=False, image_mode="L")
        seq_yolo_stats = gr.Textbox(label="YOLO Stats", interactive=False)

        gr.Markdown("---")
        gr.Markdown("### 5️⃣ Mask R-CNN (ResNet50-FPN)")
        with gr.Row():
            seq_mrcnn_img = gr.Image(label="Mask R-CNN Overlay", interactive=False)
            seq_mrcnn_bw = gr.Image(label="Mask R-CNN Binary Mask", interactive=False, image_mode="L")
        seq_mrcnn_stats = gr.Textbox(label="Mask R-CNN Stats", interactive=False)

        gr.Markdown("---")
        gr.Markdown("### 6️⃣ BiRefNet (Boundary-Aware Model)")
        with gr.Row():
            seq_biref_img = gr.Image(label="BiRefNet Overlay", interactive=False)
            seq_biref_bw = gr.Image(label="BiRefNet Binary Mask", interactive=False, image_mode="L")
        seq_biref_stats = gr.Textbox(label="BiRefNet Stats", interactive=False)

        gr.Markdown("---")
        gr.Markdown("### 7️⃣ SAM + YOLO (Strategy 1: Bbox + 5 Points)")
        with gr.Row():
            seq_sam1_img = gr.Image(label="SAM+YOLO Strat 1 Overlay", interactive=False)
            seq_sam1_bw = gr.Image(label="SAM+YOLO Strat 1 Binary Mask", interactive=False, image_mode="L")
        seq_sam1_stats = gr.Textbox(label="SAM+YOLO Strat 1 Stats", interactive=False)

        gr.Markdown("---")
        gr.Markdown("### 8️⃣ SAM + YOLO (Strategy 2: Mask + 5 Points)")
        with gr.Row():
            seq_sam2_img = gr.Image(label="SAM+YOLO Strat 2 Overlay", interactive=False)
            seq_sam2_bw = gr.Image(label="SAM+YOLO Strat 2 Binary Mask", interactive=False, image_mode="L")
        seq_sam2_stats = gr.Textbox(label="SAM+YOLO Strat 2 Stats", interactive=False)

        gr.Markdown("---")
        gr.Markdown("### 9️⃣ SAM + YOLO (Strategy 3: Direct Mask Prompting)")
        with gr.Row():
            seq_sam3_img = gr.Image(label="SAM+YOLO Strat 3 Overlay", interactive=False)
            seq_sam3_bw = gr.Image(label="SAM+YOLO Strat 3 Binary Mask", interactive=False, image_mode="L")
        seq_sam3_stats = gr.Textbox(label="SAM+YOLO Strat 3 Stats", interactive=False)

        gr.Markdown("---")
        gr.Markdown("## 🌍 Pretrained Zero-Shot Models")

        gr.Markdown("### 🔟 Grounding DINO (Zero-Shot Detection)")
        with gr.Row():
            seq_dino_img = gr.Image(label="Grounding DINO Overlay", interactive=False)
            seq_dino_bw = gr.Image(label="Grounding DINO Binary Mask", interactive=False, image_mode="L")
        seq_dino_stats = gr.Textbox(label="Grounding DINO Stats", interactive=False)

        gr.Markdown("---")
        gr.Markdown("### 1️⃣1️⃣ Grounded SAM (Zero-Shot Segmentation)")
        with gr.Row():
            seq_gsam_img = gr.Image(label="Grounded SAM Overlay", interactive=False)
            seq_gsam_bw = gr.Image(label="Grounded SAM Binary Mask", interactive=False, image_mode="L")
        seq_gsam_stats = gr.Textbox(label="Grounded SAM Stats", interactive=False)

        gr.Markdown("---")
        gr.Markdown("### 1️⃣2️⃣ Intelliarts Car Parts (Detectron2)")
        with gr.Row():
            seq_intell_img = gr.Image(label="Intelliarts Car Parts Overlay", interactive=False)
            seq_intell_bw = gr.Image(label="Intelliarts Car Parts Binary Mask", interactive=False, image_mode="L")
        seq_intell_stats = gr.Textbox(label="Intelliarts Car Parts Stats", interactive=False)

        def run_all_models(img):
            if img is None: 
                yield tuple([None]*36)
                return
            
            # ── Step 0: Show "Processing..." in ALL textboxes immediately ──
            PENDING = "⏳ Processing..."
            results = [None] * 36
            # Set all stats textboxes to pending state
            for i in [2, 5, 8, 11, 14, 17, 20, 23, 26, 29, 32, 35]:
                results[i] = PENDING
            yield tuple(results)
            
            # 1. SegFormer
            results[0], results[1], results[2] = run_segformer(img, morph_cleanup=False)
            yield tuple(results)
            
            # 2. SegFormer + Morphology
            results[3], results[4], results[5] = run_segformer(img, morph_cleanup=True)
            yield tuple(results)
            
            # 3. YOLO11x-seg
            results[6], results[7], results[8] = process_image(img, "YOLO11x-seg", "", False)
            yield tuple(results)
            
            # 4. YOLOv8x-seg
            results[9], results[10], results[11] = process_image(img, "YOLOv8x-seg", "", False)
            yield tuple(results)
            
            # 5. Mask R-CNN
            results[12], results[13], results[14] = process_image(img, "Mask R-CNN", "", False)
            yield tuple(results)
            
            # 6. BiRefNet
            results[15], results[16], results[17] = run_birefnet(img)
            yield tuple(results)
            
            # 7. SAM + YOLO Strat 1
            results[18], results[19], results[20] = process_image(img, "SAM + YOLO (Strategy 1: Bbox + 5 Points)", "", False)
            yield tuple(results)
            
            # 8. SAM + YOLO Strat 2
            results[21], results[22], results[23] = process_image(img, "SAM + YOLO (Strategy 2: Mask + 5 Points)", "", False)
            yield tuple(results)
            
            # 9. SAM + YOLO Strat 3
            results[24], results[25], results[26] = process_image(img, "SAM + YOLO (Strategy 3: Direct Mask Prompting)", "", False)
            yield tuple(results)
            
            # 10. Grounding DINO
            results[27], results[28], results[29] = process_image(img, "Grounding DINO (Zero-Shot Detection)", "car window. car glass. windshield.", False)
            yield tuple(results)
            
            # 11. Grounded SAM
            results[30], results[31], results[32] = process_image(img, "Grounded SAM (Zero-Shot Segmentation)", "car window. car glass. windshield.", False)
            yield tuple(results)
            
            # 12. Intelliarts
            results[33], results[34], results[35] = process_image(img, "Intelliarts Car Parts (Detectron2)", "", False)
            yield tuple(results)

        run_event = submit_btn_seq.click(
            fn=run_all_models,
            inputs=[input_image_seq],
            outputs=[seq_segf_img, seq_segf_bw, seq_segf_stats,
                     seq_segf_morph_img, seq_segf_morph_bw, seq_segf_morph_stats,
                     seq_yolo11_img, seq_yolo11_bw, seq_yolo11_stats,
                     seq_yolo_img, seq_yolo_bw, seq_yolo_stats, 
                     seq_mrcnn_img, seq_mrcnn_bw, seq_mrcnn_stats, 
                     seq_biref_img, seq_biref_bw, seq_biref_stats,
                     seq_sam1_img, seq_sam1_bw, seq_sam1_stats,
                     seq_sam2_img, seq_sam2_bw, seq_sam2_stats,
                     seq_sam3_img, seq_sam3_bw, seq_sam3_stats,
                     seq_dino_img, seq_dino_bw, seq_dino_stats,
                     seq_gsam_img, seq_gsam_bw, seq_gsam_stats,
                     seq_intell_img, seq_intell_bw, seq_intell_stats]
        )
        
        stop_btn_seq.click(fn=None, inputs=None, outputs=None, cancels=[run_event])

if __name__ == "__main__":
    demo.launch()