import gradio as gr import numpy as np import cv2 import time import torch import warnings import os import zipfile from PIL import Image import random warnings.filterwarnings("ignore") # ═══════════════════════════════════════════════════════════════════════════════ # STEP 1: Extract any .zip files in current directory # ═══════════════════════════════════════════════════════════════════════════════ print("=" * 60) print(f"[STARTUP] Working dir: {os.getcwd()}") for f in os.listdir("."): if f.endswith(".zip"): try: with zipfile.ZipFile(f, 'r') as zf: zf.extractall(".") print(f"[ZIP] Extracted {f} OK!") except Exception as e: print(f"[ZIP] ERROR: {e}") # ═══════════════════════════════════════════════════════════════════════════════ # STEP 2: Copy images to root # ═══════════════════════════════════════════════════════════════════════════════ def prepare_clean_examples(src_folder, prefix, limit=10): results = [] if not os.path.exists(src_folder): return results count = 0 for root, dirs, files in os.walk(src_folder): for fname in sorted(files): if not fname.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp', '.webp')): continue src_path = os.path.join(root, fname) dst_name = f"{prefix}_{count}.jpg" try: import shutil shutil.copy2(src_path, dst_name) results.append(dst_name) count += 1 if count >= limit: break except Exception as e: print(f"Error copying {src_path}: {e}") if count >= limit: break return results mirror_examples = [] for folder in ["test car windows", "test_car_windows", "test car windows segmentation"]: if os.path.exists(folder): mirror_examples = prepare_clean_examples(folder, "mirror", limit=15) break if not mirror_examples and os.path.exists("car.jpeg"): mirror_examples = ["car.jpeg"] # ═══════════════════════════════════════════════════════════════════════════════ # Global Settings # ═══════════════════════════════════════════════════════════════════════════════ DEVICE = "cuda" if torch.cuda.is_available() else "cpu" CONF = 0.45 def apply_mask_overlay(img_rgb, mask_bool, color=(0, 215, 255), alpha=0.4): # 1. Darken the background (50% brightness, no blur) dark_bg = cv2.addWeighted(img_rgb, 0.5, np.zeros_like(img_rgb), 0.5, 0) # 2. For the mask area, keep original brightness and tint it tinted_sharp = img_rgb.copy() tinted_sharp[mask_bool] = color tinted_sharp = cv2.addWeighted(tinted_sharp, alpha, img_rgb, 1 - alpha, 0) # 3. Find and draw the boundary edge strictly inside the mask mask_img = (mask_bool * 255).astype(np.uint8) contours, _ = cv2.findContours(mask_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # Draw contour on the tinted image (before blending) cv2.drawContours(tinted_sharp, contours, -1, color, 2, cv2.LINE_AA) # 4. Combine: Dark background outside, Bright tinted object + boundary inside blended = np.where(mask_bool[:, :, None], tinted_sharp, dark_bg) return blended def draw_boxes(img_rgb, boxes, labels, color=(0, 215, 255)): out = img_rgb.copy() for box, label in zip(boxes, labels): x1, y1, x2, y2 = map(int, box) # Faint inner bounding box line cv2.rectangle(out, (x1, y1), (x2, y2), color, 1) # HUD-Style Corner Brackets length = int(min(x2 - x1, y2 - y1) * 0.15) thick = 3 # Top-Left cv2.line(out, (x1, y1), (x1 + length, y1), color, thick, cv2.LINE_AA) cv2.line(out, (x1, y1), (x1, y1 + length), color, thick, cv2.LINE_AA) # Top-Right cv2.line(out, (x2, y1), (x2 - length, y1), color, thick, cv2.LINE_AA) cv2.line(out, (x2, y1), (x2, y1 + length), color, thick, cv2.LINE_AA) # Bottom-Left cv2.line(out, (x1, y2), (x1 + length, y2), color, thick, cv2.LINE_AA) cv2.line(out, (x1, y2), (x1, y2 - length), color, thick, cv2.LINE_AA) # Bottom-Right cv2.line(out, (x2, y2), (x2 - length, y2), color, thick, cv2.LINE_AA) cv2.line(out, (x2, y2), (x2, y2 - length), color, thick, cv2.LINE_AA) # Text labels have been removed to prevent obstructing the view of the segmentation masks. return out # ═══════════════════════════════════════════════════════════════════════════════ # Morphological post-processing helper # ═══════════════════════════════════════════════════════════════════════════════ def apply_morphology(mask_uint8, close_k=15, open_k=7): """Fill holes (Closing) then remove tiny blobs (Opening) on a binary mask.""" close_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (close_k, close_k)) open_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (open_k, open_k)) closed = cv2.morphologyEx(mask_uint8, cv2.MORPH_CLOSE, close_kernel) # fill holes opened = cv2.morphologyEx(closed, cv2.MORPH_OPEN, open_kernel) # remove noise return opened # ═══════════════════════════════════════════════════════════════════════════════ # Model Functions # ═══════════════════════════════════════════════════════════════════════════════ def run_yolo_generic(img_rgb, model_path, target_classes, color, morph_cleanup=False): from ultralytics import YOLO t0 = time.time() model = YOLO(model_path) # Use retina_masks=True to get pixel-perfect masks at the original image resolution results = model(img_rgb, conf=CONF, verbose=False, retina_masks=True) elapsed = time.time() - t0 result = results[0] h, w = img_rgb.shape[:2] combined_mask = np.zeros((h, w), dtype=np.uint8) boxes, labels = [], [] if result.masks is not None: for mask, box, cls, conf in zip( result.masks.data, result.boxes.xyxy, result.boxes.cls, result.boxes.conf ): if int(cls) not in target_classes: continue # Since retina_masks=True, mask is already (h, w). Just threshold it. mask_np = mask.cpu().numpy().astype(np.uint8) # Optional per-instance morphological cleanup before combining if morph_cleanup: mask_np = apply_morphology(mask_np) combined_mask |= mask_np boxes.append(box.cpu().tolist()) labels.append(f"glass {conf:.2f}") # We purposely do NOT apply morphology on the final combined_mask here, # otherwise it will bridge the gaps (pillars) between separate windows! combined_mask_bool = combined_mask > 0 morph_note = " | Morphology: ON ✅" if morph_cleanup else "" out = apply_mask_overlay(img_rgb, combined_mask_bool, color=color) out = draw_boxes(out, boxes, labels, color=color) bw_mask = (combined_mask * 255).astype(np.uint8) return out, bw_mask, f"Found: {len(boxes)} | Inference Time: {elapsed:.2f}s{morph_note}" def run_sam_strategy(img_rgb, yolo_model_path, target_classes, color, strategy, morph_cleanup=False): try: from segment_anything import sam_model_registry, SamPredictor import urllib.request CKPT = "sam_vit_b_01ec64.pth" URL = "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth" if not os.path.exists(CKPT): urllib.request.urlretrieve(URL, CKPT) t0 = time.time() sam = sam_model_registry["vit_b"](checkpoint=CKPT).to(DEVICE) predictor = SamPredictor(sam) predictor.set_image(img_rgb) from ultralytics import YOLO as _YOLO yolo_res = _YOLO(yolo_model_path)(img_rgb, conf=CONF, verbose=False, retina_masks=True)[0] h, w = img_rgb.shape[:2] combined_mask = np.zeros((h, w), dtype=bool) boxes_list, labels = [], [] if yolo_res.boxes is not None and yolo_res.masks is not None: for box, mask_data, cls, conf in zip(yolo_res.boxes.xyxy, yolo_res.masks.data, yolo_res.boxes.cls, yolo_res.boxes.conf): if int(cls) not in target_classes: continue box_np = box.cpu().numpy() yolo_mask = mask_data.cpu().numpy() > 0.5 if strategy == 1: # Strategy 1: Bbox + 5 Points x1, y1, x2, y2 = map(int, box_np) cx, cy = (x1+x2)//2, (y1+y2)//2 pts = [[cx, cy], [x1+5, y1+5], [x2-5, y1+5], [x1+5, y2-5], [x2-5, y2-5]] pts_np = np.array(pts) labels_np = np.ones(len(pts)) masks_sam, _, _ = predictor.predict(box=box_np, point_coords=pts_np, point_labels=labels_np, multimask_output=False) sam_mask = masks_sam[0] elif strategy == 2: # Strategy 2: Mask + 5 Points y_coords, x_coords = np.where(yolo_mask) if len(x_coords) == 0: continue cx, cy = int(np.mean(x_coords)), int(np.mean(y_coords)) idx_top, idx_bot = np.argmin(y_coords), np.argmax(y_coords) idx_lft, idx_rgt = np.argmin(x_coords), np.argmax(x_coords) def get_mid(x_1, y_1, x_2, y_2, f=0.6): return int(x_1 + (x_2-x_1)*f), int(y_1 + (y_2-y_1)*f) pts = [] if yolo_mask[cy, cx]: pts.append([cx, cy]) else: pts.append([x_coords[len(x_coords)//2], y_coords[len(y_coords)//2]]) for idx in [idx_top, idx_bot, idx_lft, idx_rgt]: px, py = get_mid(cx, cy, x_coords[idx], y_coords[idx]) if 0 <= py < h and 0 <= px < w and yolo_mask[py, px]: pts.append([px, py]) else: pts.append(pts[0]) pts_np = np.array(pts) labels_np = np.ones(len(pts)) masks_sam, _, _ = predictor.predict(box=box_np, point_coords=pts_np, point_labels=labels_np, multimask_output=False) sam_mask = masks_sam[0] elif strategy == 3: # Strategy 3: Direct Mask Prompting yolo_mask_resized = cv2.resize((yolo_mask).astype(np.float32), (256, 256), interpolation=cv2.INTER_NEAREST) mask_input = np.zeros((1, 256, 256), dtype=np.float32) mask_input[0] = np.where(yolo_mask_resized > 0.5, 30.0, -30.0) masks_sam, _, _ = predictor.predict(box=box_np, mask_input=mask_input, multimask_output=False) raw_mask = (masks_sam[0].astype(np.uint8) * 255) contours, _ = cv2.findContours(raw_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) filled_mask = np.zeros_like(raw_mask) cv2.drawContours(filled_mask, contours, -1, 255, cv2.FILLED) sam_mask = (filled_mask > 0) else: sam_mask = np.zeros((h, w), dtype=bool) sam_mask_uint = sam_mask.astype(np.uint8) if morph_cleanup: sam_mask_uint = apply_morphology(sam_mask_uint) combined_mask |= sam_mask_uint.astype(bool) boxes_list.append(box_np.tolist()) labels.append(f"glass {conf:.2f}") elapsed = time.time() - t0 morph_note = " | Morphology: ON ✅" if morph_cleanup else "" out = apply_mask_overlay(img_rgb, combined_mask, color=color) out = draw_boxes(out, boxes_list, labels, color=color) return out, (combined_mask * 255).astype(np.uint8), f"Found: {len(boxes_list)} | Strategy: {strategy} | Inference: {elapsed:.2f}s{morph_note}" except ImportError: return img_rgb, None, "Error: segment-anything not installed" def run_mask_rcnn(img_rgb, weights_path): t0 = time.time() try: from torchvision.models.detection import maskrcnn_resnet50_fpn_v2 from torchvision.models.detection.faster_rcnn import FastRCNNPredictor from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor import torchvision.transforms.v2 as T model = maskrcnn_resnet50_fpn_v2(weights=None) in_features = model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor = FastRCNNPredictor(in_features, 2) in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask, 256, 2) checkpoint = torch.load(weights_path, map_location=DEVICE, weights_only=False) if isinstance(checkpoint, dict) and "model_state_dict" in checkpoint: model.load_state_dict(checkpoint["model_state_dict"]) else: model.load_state_dict(checkpoint) model.to(DEVICE) model.eval() img_tensor = T.ToTensor()(Image.fromarray(img_rgb)).to(DEVICE) with torch.no_grad(): outputs = model([img_tensor])[0] h, w = img_rgb.shape[:2] pred_mask = np.zeros((h, w), dtype=bool) boxes_list, labels_list = [], [] for score, mask, box, cls in zip(outputs['scores'], outputs['masks'], outputs['boxes'], outputs['labels']): if score > 0.45: m = (mask[0].cpu().numpy() > 0.5) pred_mask |= m boxes_list.append(box.cpu().numpy().tolist()) labels_list.append(f"glass {score:.2f}") elapsed = time.time() - t0 out = apply_mask_overlay(img_rgb, pred_mask, color=(255, 165, 0)) out = draw_boxes(out, boxes_list, labels_list, color=(255, 165, 0)) bw_mask = (pred_mask * 255).astype(np.uint8) return out, bw_mask, f"Found: {len(boxes_list)} | Inference: {elapsed:.2f}s" except Exception as e: return img_rgb, None, f"Mask R-CNN Error: {e}" def run_grounding_dino(img_rgb, text_prompt): try: from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection t0 = time.time() model_id = "IDEA-Research/grounding-dino-tiny" processor = AutoProcessor.from_pretrained(model_id) model = AutoModelForZeroShotObjectDetection.from_pretrained(model_id).to(DEVICE) inputs = processor(images=img_rgb, text=text_prompt, return_tensors="pt").to(DEVICE) with torch.no_grad(): outputs = model(**inputs) h, w = img_rgb.shape[:2] results = processor.post_process_grounded_object_detection( outputs, inputs.input_ids, text_threshold=0.25, target_sizes=[(h, w)] )[0] boxes = results["boxes"].cpu().numpy().tolist() scores = results["scores"].cpu().numpy().tolist() labels = results["labels"] elapsed = time.time() - t0 bw_mask = np.zeros((h, w), dtype=np.uint8) # DINO is boxes only str_labels = [f"{lbl} {scr:.2f}" for lbl, scr in zip(labels, scores)] out = draw_boxes(img_rgb.copy(), boxes, str_labels, color=(255, 100, 50)) return out, bw_mask, f"Found: {len(boxes)} | Inference Time: {elapsed:.2f}s" except Exception as e: return img_rgb, None, f"Grounding DINO Error: {e}\n(Need transformers>=4.35)" def run_grounded_sam(img_rgb, text_prompt): try: from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection from segment_anything import sam_model_registry, SamPredictor import urllib.request t0 = time.time() # 1. DINO Detection dino_id = "IDEA-Research/grounding-dino-tiny" processor = AutoProcessor.from_pretrained(dino_id) dino_model = AutoModelForZeroShotObjectDetection.from_pretrained(dino_id).to(DEVICE) inputs = processor(images=img_rgb, text=text_prompt, return_tensors="pt").to(DEVICE) with torch.no_grad(): outputs = dino_model(**inputs) h, w = img_rgb.shape[:2] dino_res = processor.post_process_grounded_object_detection( outputs, inputs.input_ids, text_threshold=0.25, target_sizes=[(h, w)] )[0] boxes = dino_res["boxes"].cpu().numpy() scores = dino_res["scores"].cpu().numpy() labels_txt = dino_res["labels"] # 2. SAM Segmentation CKPT = "sam_vit_b_01ec64.pth" URL = "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth" if not os.path.exists(CKPT): urllib.request.urlretrieve(URL, CKPT) sam = sam_model_registry["vit_b"](checkpoint=CKPT).to(DEVICE) predictor = SamPredictor(sam) predictor.set_image(img_rgb) combined_mask = np.zeros((h, w), dtype=bool) str_labels = [] if len(boxes) > 0: for box, score, label in zip(boxes, scores, labels_txt): masks, _, _ = predictor.predict(box=box, multimask_output=False) combined_mask |= masks[0] str_labels.append(f"{label} {score:.2f}") elapsed = time.time() - t0 out = apply_mask_overlay(img_rgb, combined_mask, color=(255, 80, 160)) out = draw_boxes(out, boxes.tolist(), str_labels, color=(255, 80, 160)) return out, (combined_mask * 255).astype(np.uint8), f"Found: {len(boxes)} | Inference: {elapsed:.2f}s" except Exception as e: return img_rgb, None, f"Grounded SAM Error: {e}" def run_intelliarts_car_parts(img_rgb): t0 = time.time() try: import detectron2 except ImportError: print("Installing detectron2... this may take a few minutes!") os.system('pip install git+https://github.com/facebookresearch/detectron2.git --no-build-isolation') try: from detectron2 import model_zoo from detectron2.engine import DefaultPredictor from detectron2.config import get_cfg import urllib.request model_url = "https://huggingface.co/spaces/intelliarts/Car_parts_detection/resolve/main/model_final.pth" model_path = "intelliarts_model_final.pth" if not os.path.exists(model_path): print("Downloading Intelliarts Car Parts weights...") urllib.request.urlretrieve(model_url, model_path) cfg = get_cfg() cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")) cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.45 cfg.MODEL.ROI_HEADS.NUM_CLASSES = 19 cfg.MODEL.WEIGHTS = model_path cfg.MODEL.DEVICE = DEVICE predictor = DefaultPredictor(cfg) outputs = predictor(img_rgb) instances = outputs["instances"].to("cpu") # Classes: 2: back_glass, 8: front_glass, 14: left_mirror, 15: right_mirror target_classes = [2, 8, 14, 15] h, w = img_rgb.shape[:2] combined_mask = np.zeros((h, w), dtype=bool) boxes_list, labels_list = [], [] classes = instances.pred_classes.numpy() scores = instances.scores.numpy() boxes = instances.pred_boxes.tensor.numpy() masks = instances.pred_masks.numpy() class_names = ['_background_', 'back_bumper', 'back_glass', 'back_left_door', 'back_left_light', 'back_right_door', 'back_right_light', 'front_bumper', 'front_glass', 'front_left_door', 'front_left_light', 'front_right_door', 'front_right_light', 'hood', 'left_mirror', 'right_mirror', 'tailgate', 'trunk', 'wheel'] for i in range(len(classes)): c = classes[i] if c in target_classes: combined_mask |= masks[i] boxes_list.append(boxes[i].tolist()) labels_list.append(f"{class_names[c]} {scores[i]:.2f}") elapsed = time.time() - t0 out = apply_mask_overlay(img_rgb, combined_mask, color=(50, 150, 255)) out = draw_boxes(out, boxes_list, labels_list, color=(50, 150, 255)) bw_mask = (combined_mask * 255).astype(np.uint8) return out, bw_mask, f"Found: {len(boxes_list)} | Inference: {elapsed:.2f}s" except Exception as e: return img_rgb, None, f"Intelliarts Detectron2 Error: {e}" # ═══════════════════════════════════════════════════════════════════════════════ # SegFormer Function # ═══════════════════════════════════════════════════════════════════════════════ def run_segformer(img_rgb, morph_cleanup=False): try: from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation import torch.nn.functional as F t0 = time.time() base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) # Paths to try (works for both local PC and Hugging Face Cloud deployment) paths_to_try = [ os.path.join(base_dir, "SegFormer_Model", "best_segformer_dice_model"), # Local PC "best_segformer_dice_model", # Hugging Face Root os.path.join(os.path.dirname(__file__), "best_segformer_dice_model"), # Next to app.py ] # If files were uploaded directly to the root (no folder) if os.path.exists("config.json"): paths_to_try.append(".") if os.path.exists(os.path.join(os.path.dirname(__file__), "config.json")): paths_to_try.append(os.path.dirname(__file__)) model_path = None for p in paths_to_try: # For SegFormer, the path must contain config.json if os.path.exists(p) and os.path.exists(os.path.join(p, "config.json")): model_path = p break # Fallback if model_path is None: model_path = "best_segformer_dice_model" processor = SegformerImageProcessor.from_pretrained(model_path) model = SegformerForSemanticSegmentation.from_pretrained(model_path).to(DEVICE) inputs = processor(images=Image.fromarray(img_rgb), return_tensors="pt") inputs = {k: v.to(DEVICE) for k, v in inputs.items()} with torch.no_grad(): outputs = model(**inputs) h, w = img_rgb.shape[:2] logits = F.interpolate(outputs.logits, size=(h, w), mode="bilinear", align_corners=False)[0] probs = F.softmax(logits, dim=0) pred_mask = (probs[1] > 0.5).cpu().numpy().astype(np.uint8) # Apply morphological cleanup if requested if morph_cleanup: pred_mask = apply_morphology(pred_mask, close_k=15, open_k=7) elapsed = time.time() - t0 morph_note = " | Morphology: ON ✅" if morph_cleanup else "" out = apply_mask_overlay(img_rgb, pred_mask, color=(255, 50, 50)) bw_mask = (pred_mask * 255).astype(np.uint8) return out, bw_mask, f"Found: 1 (Semantic) | Inference: {elapsed:.2f}s{morph_note}" except Exception as e: return img_rgb, None, f"SegFormer Error: {e}" # ═══════════════════════════════════════════════════════════════════════════════ # BiRefNet Function # ═══════════════════════════════════════════════════════════════════════════════ def run_birefnet(img_rgb): try: from transformers import AutoModelForImageSegmentation from torchvision import transforms import torch.nn.functional as F t0 = time.time() base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) # Paths to try (works for local PC and Hugging Face Cloud deployment) paths_to_try = [ os.path.join(base_dir, "BiRefNet_Model", "best_model-20260624T051601Z-3-001", "best_model"), # Local PC "birefnet_model", # Hugging Face Root / Root dir os.path.join(os.path.dirname(os.path.abspath(__file__)), "birefnet_model"), # Next to app.py "best_birefnet_model" # Extra fallback ] model_path = None for p in paths_to_try: if os.path.exists(p) and os.path.exists(os.path.join(p, "config.json")) and os.path.exists(os.path.join(p, "model.safetensors")): model_path = p break # Final fallback: Download directly from Hugging Face Model Repo! if model_path is None: model_path = "Ayesha-Majeed/birefnet_car_window" model = AutoModelForImageSegmentation.from_pretrained(model_path, trust_remote_code=True).to(DEVICE) model.eval() image_transform = transforms.Compose([ transforms.Resize((1024, 1024)), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]) from PIL import Image pil_img = Image.fromarray(img_rgb) input_tensor = image_transform(pil_img).unsqueeze(0).to(DEVICE) with torch.no_grad(): if DEVICE == "cuda": with torch.amp.autocast("cuda"): preds = model(input_tensor) final_pred = preds[-1] if isinstance(preds, (list, tuple)) else preds else: preds = model(input_tensor) final_pred = preds[-1] if isinstance(preds, (list, tuple)) else preds h, w = img_rgb.shape[:2] final_pred = F.interpolate(final_pred, size=(h, w), mode="bilinear", align_corners=False) pred_mask = (torch.sigmoid(final_pred) > 0.5).squeeze().cpu().numpy().astype(np.uint8) elapsed = time.time() - t0 out = apply_mask_overlay(img_rgb, pred_mask > 0, color=(255, 0, 0)) # Red bw_mask = (pred_mask * 255).astype(np.uint8) return out, bw_mask, f"Found: 1 (Semantic) | Inference: {elapsed:.2f}s" except Exception as e: return img_rgb, None, f"BiRefNet Error: {e}" # ═══════════════════════════════════════════════════════════════════════════════ # Gradio Process Function # ═══════════════════════════════════════════════════════════════════════════════ # A beautiful palette of pastel and neon colors for dynamic visualizations PASTEL_COLORS = [ (255, 105, 180), # Hot/Light Pink (180, 130, 255), # Light Purple (0, 215, 255), # Light Sky Blue / Cyan (255, 220, 50), # Light Yellow (255, 160, 50), # Light Orange (150, 255, 150), # Light Mint Green (240, 240, 255), # Light White / Silver ] def process_image(img_rgb, model_name, text_prompt="", morph_cleanup=False): if img_rgb is None: return None, None, "Please upload an image." # Pick a random color for this specific inference run run_color = random.choice(PASTEL_COLORS) try: if model_name == "YOLOv8x-seg (Custom Window)": return run_yolo_generic(img_rgb, "best.pt", target_classes=[0, 1], color=run_color, morph_cleanup=morph_cleanup) elif model_name == "YOLOv8x-seg": return run_yolo_generic(img_rgb, "best.pt", target_classes=[0, 1], color=(255, 215, 0), morph_cleanup=morph_cleanup) elif model_name == "YOLO11x-seg": if os.path.exists("yolo11_best.pt"): y11_weights = "yolo11_best.pt" else: base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) y11_weights = os.path.join(base_dir, "runs", "segment", "runs", "car_mirror_seg", "yolo11x_seg_1024", "weights", "best.pt") if not os.path.exists(y11_weights): y11_weights = "best.pt" # Fallback return run_yolo_generic(img_rgb, y11_weights, target_classes=[0, 1], color=(0, 255, 120), morph_cleanup=morph_cleanup) elif model_name == "SAM + YOLO (Strategy 1: Bbox + 5 Points)": return run_sam_strategy(img_rgb, "best.pt", target_classes=[0, 1], color=run_color, strategy=1, morph_cleanup=morph_cleanup) elif model_name == "SAM + YOLO (Strategy 2: Mask + 5 Points)": return run_sam_strategy(img_rgb, "best.pt", target_classes=[0, 1], color=run_color, strategy=2, morph_cleanup=morph_cleanup) elif model_name == "SAM + YOLO (Strategy 3: Direct Mask Prompting)": return run_sam_strategy(img_rgb, "best.pt", target_classes=[0, 1], color=run_color, strategy=3, morph_cleanup=morph_cleanup) elif model_name == "Mask R-CNN": # First check if she uploaded it directly next to app.py as "maskrcnn_best.pt" if os.path.exists("maskrcnn_best.pt"): mrcnn_weights = "maskrcnn_best.pt" else: base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) mrcnn_weights = os.path.join(base_dir, "Mask_RCNN", "runs", "woven-sweep-5", "best.pt") if not os.path.exists(mrcnn_weights): mrcnn_weights = "Mask_RCNN/runs/woven-sweep-5/best.pt" return run_mask_rcnn(img_rgb, mrcnn_weights) elif model_name == "Grounding DINO (Zero-Shot Detection)": return run_grounding_dino(img_rgb, text_prompt) elif model_name == "Grounded SAM (Zero-Shot Segmentation)": return run_grounded_sam(img_rgb, text_prompt) elif model_name == "Intelliarts Car Parts (Detectron2)": return run_intelliarts_car_parts(img_rgb) elif model_name == "SegFormer": return run_segformer(img_rgb, morph_cleanup=morph_cleanup) else: return img_rgb, None, "Model not recognized." except Exception as e: return img_rgb, None, f"Error: {str(e)}" # ═══════════════════════════════════════════════════════════════════════════════ # Gradio UI # ═══════════════════════════════════════════════════════════════════════════════ theme = gr.themes.Soft(primary_hue="blue", secondary_hue="indigo") with gr.Blocks(theme=theme, title="Car Window Segmentation") as demo: gr.Markdown(""" # Car Window Segmentation Compare your custom trained YOLOv8 model against state-of-the-art Zero-Shot models! """) # ── TAB 3: Comprehensive Evaluation ── with gr.Tab("Comprehensive Evaluation"): gr.Markdown("### Comprehensive Evaluation: Results from All Trained and Pretrained Models") gr.Markdown("""**The following models will run and display their results below:** **Custom Trained Models:** 1. SegFormer 2. SegFormer + Morphological 3. YOLO11x-seg 4. YOLOv8x-seg 5. Mask R-CNN 6. BiRefNet 7. SAM + YOLO (Strategy 1: Bbox + 5 Points) 8. SAM + YOLO (Strategy 2: Mask + 5 Points) 9. SAM + YOLO (Strategy 3: Direct Mask Prompting) **Pretrained Zero-Shot Models:** 10\. Grounding DINO 11\. Grounded SAM 12\. Intelliarts Car Parts **Our Findings:** SegFormer and YOLO11x deliver the best performance with significantly sharper edge precision. """) with gr.Row(): input_image_seq = gr.Image(type="numpy", label="Upload Window Image") with gr.Row(): submit_btn_seq = gr.Button("Run All Models", variant="primary", size="lg") stop_btn_seq = gr.Button("🛑 Stop Processing", variant="stop", size="lg") if mirror_examples: gr.Markdown("### Or click any example image below to load it:") compare_gallery = gr.Gallery(value=mirror_examples, columns=10, height=120, object_fit="cover", allow_preview=False, show_label=False) def load_compare_img(evt: gr.SelectData): return mirror_examples[evt.index] compare_gallery.select(fn=load_compare_img, inputs=None, outputs=input_image_seq) gr.Markdown("---") gr.Markdown("## 🚀 Custom Trained Models") gr.Markdown("### 1️⃣ SegFormer (Transformer)") with gr.Row(): seq_segf_img = gr.Image(label="SegFormer Overlay", interactive=False) seq_segf_bw = gr.Image(label="SegFormer Binary Mask", interactive=False, image_mode="L") seq_segf_stats = gr.Textbox(label="SegFormer Stats", interactive=False) gr.Markdown("---") gr.Markdown("### 2️⃣ SegFormer + Morphological Cleanup (Holes Filled + Sharp Borders)") with gr.Row(): seq_segf_morph_img = gr.Image(label="SegFormer + Morph Overlay", interactive=False) seq_segf_morph_bw = gr.Image(label="SegFormer + Morph Binary Mask", interactive=False, image_mode="L") seq_segf_morph_stats = gr.Textbox(label="SegFormer + Morph Stats", interactive=False) gr.Markdown("---") gr.Markdown("### 3️⃣ YOLO11x-seg") with gr.Row(): seq_yolo11_img = gr.Image(label="YOLO11x Overlay", interactive=False) seq_yolo11_bw = gr.Image(label="YOLO11x Binary Mask", interactive=False, image_mode="L") seq_yolo11_stats = gr.Textbox(label="YOLO11x Stats", interactive=False) gr.Markdown("---") gr.Markdown("### 4️⃣ YOLOv8x-seg") with gr.Row(): seq_yolo_img = gr.Image(label="YOLO Overlay", interactive=False) seq_yolo_bw = gr.Image(label="YOLO Binary Mask", interactive=False, image_mode="L") seq_yolo_stats = gr.Textbox(label="YOLO Stats", interactive=False) gr.Markdown("---") gr.Markdown("### 5️⃣ Mask R-CNN (ResNet50-FPN)") with gr.Row(): seq_mrcnn_img = gr.Image(label="Mask R-CNN Overlay", interactive=False) seq_mrcnn_bw = gr.Image(label="Mask R-CNN Binary Mask", interactive=False, image_mode="L") seq_mrcnn_stats = gr.Textbox(label="Mask R-CNN Stats", interactive=False) gr.Markdown("---") gr.Markdown("### 6️⃣ BiRefNet (Boundary-Aware Model)") with gr.Row(): seq_biref_img = gr.Image(label="BiRefNet Overlay", interactive=False) seq_biref_bw = gr.Image(label="BiRefNet Binary Mask", interactive=False, image_mode="L") seq_biref_stats = gr.Textbox(label="BiRefNet Stats", interactive=False) gr.Markdown("---") gr.Markdown("### 7️⃣ SAM + YOLO (Strategy 1: Bbox + 5 Points)") with gr.Row(): seq_sam1_img = gr.Image(label="SAM+YOLO Strat 1 Overlay", interactive=False) seq_sam1_bw = gr.Image(label="SAM+YOLO Strat 1 Binary Mask", interactive=False, image_mode="L") seq_sam1_stats = gr.Textbox(label="SAM+YOLO Strat 1 Stats", interactive=False) gr.Markdown("---") gr.Markdown("### 8️⃣ SAM + YOLO (Strategy 2: Mask + 5 Points)") with gr.Row(): seq_sam2_img = gr.Image(label="SAM+YOLO Strat 2 Overlay", interactive=False) seq_sam2_bw = gr.Image(label="SAM+YOLO Strat 2 Binary Mask", interactive=False, image_mode="L") seq_sam2_stats = gr.Textbox(label="SAM+YOLO Strat 2 Stats", interactive=False) gr.Markdown("---") gr.Markdown("### 9️⃣ SAM + YOLO (Strategy 3: Direct Mask Prompting)") with gr.Row(): seq_sam3_img = gr.Image(label="SAM+YOLO Strat 3 Overlay", interactive=False) seq_sam3_bw = gr.Image(label="SAM+YOLO Strat 3 Binary Mask", interactive=False, image_mode="L") seq_sam3_stats = gr.Textbox(label="SAM+YOLO Strat 3 Stats", interactive=False) gr.Markdown("---") gr.Markdown("## 🌍 Pretrained Zero-Shot Models") gr.Markdown("### 🔟 Grounding DINO (Zero-Shot Detection)") with gr.Row(): seq_dino_img = gr.Image(label="Grounding DINO Overlay", interactive=False) seq_dino_bw = gr.Image(label="Grounding DINO Binary Mask", interactive=False, image_mode="L") seq_dino_stats = gr.Textbox(label="Grounding DINO Stats", interactive=False) gr.Markdown("---") gr.Markdown("### 1️⃣1️⃣ Grounded SAM (Zero-Shot Segmentation)") with gr.Row(): seq_gsam_img = gr.Image(label="Grounded SAM Overlay", interactive=False) seq_gsam_bw = gr.Image(label="Grounded SAM Binary Mask", interactive=False, image_mode="L") seq_gsam_stats = gr.Textbox(label="Grounded SAM Stats", interactive=False) gr.Markdown("---") gr.Markdown("### 1️⃣2️⃣ Intelliarts Car Parts (Detectron2)") with gr.Row(): seq_intell_img = gr.Image(label="Intelliarts Car Parts Overlay", interactive=False) seq_intell_bw = gr.Image(label="Intelliarts Car Parts Binary Mask", interactive=False, image_mode="L") seq_intell_stats = gr.Textbox(label="Intelliarts Car Parts Stats", interactive=False) def run_all_models(img): if img is None: yield tuple([None]*36) return # ── Step 0: Show "Processing..." in ALL textboxes immediately ── PENDING = "⏳ Processing..." results = [None] * 36 # Set all stats textboxes to pending state for i in [2, 5, 8, 11, 14, 17, 20, 23, 26, 29, 32, 35]: results[i] = PENDING yield tuple(results) # 1. SegFormer results[0], results[1], results[2] = run_segformer(img, morph_cleanup=False) yield tuple(results) # 2. SegFormer + Morphology results[3], results[4], results[5] = run_segformer(img, morph_cleanup=True) yield tuple(results) # 3. YOLO11x-seg results[6], results[7], results[8] = process_image(img, "YOLO11x-seg", "", False) yield tuple(results) # 4. YOLOv8x-seg results[9], results[10], results[11] = process_image(img, "YOLOv8x-seg", "", False) yield tuple(results) # 5. Mask R-CNN results[12], results[13], results[14] = process_image(img, "Mask R-CNN", "", False) yield tuple(results) # 6. BiRefNet results[15], results[16], results[17] = run_birefnet(img) yield tuple(results) # 7. SAM + YOLO Strat 1 results[18], results[19], results[20] = process_image(img, "SAM + YOLO (Strategy 1: Bbox + 5 Points)", "", False) yield tuple(results) # 8. SAM + YOLO Strat 2 results[21], results[22], results[23] = process_image(img, "SAM + YOLO (Strategy 2: Mask + 5 Points)", "", False) yield tuple(results) # 9. SAM + YOLO Strat 3 results[24], results[25], results[26] = process_image(img, "SAM + YOLO (Strategy 3: Direct Mask Prompting)", "", False) yield tuple(results) # 10. Grounding DINO results[27], results[28], results[29] = process_image(img, "Grounding DINO (Zero-Shot Detection)", "car window. car glass. windshield.", False) yield tuple(results) # 11. Grounded SAM results[30], results[31], results[32] = process_image(img, "Grounded SAM (Zero-Shot Segmentation)", "car window. car glass. windshield.", False) yield tuple(results) # 12. Intelliarts results[33], results[34], results[35] = process_image(img, "Intelliarts Car Parts (Detectron2)", "", False) yield tuple(results) run_event = submit_btn_seq.click( fn=run_all_models, inputs=[input_image_seq], outputs=[seq_segf_img, seq_segf_bw, seq_segf_stats, seq_segf_morph_img, seq_segf_morph_bw, seq_segf_morph_stats, seq_yolo11_img, seq_yolo11_bw, seq_yolo11_stats, seq_yolo_img, seq_yolo_bw, seq_yolo_stats, seq_mrcnn_img, seq_mrcnn_bw, seq_mrcnn_stats, seq_biref_img, seq_biref_bw, seq_biref_stats, seq_sam1_img, seq_sam1_bw, seq_sam1_stats, seq_sam2_img, seq_sam2_bw, seq_sam2_stats, seq_sam3_img, seq_sam3_bw, seq_sam3_stats, seq_dino_img, seq_dino_bw, seq_dino_stats, seq_gsam_img, seq_gsam_bw, seq_gsam_stats, seq_intell_img, seq_intell_bw, seq_intell_stats] ) stop_btn_seq.click(fn=None, inputs=None, outputs=None, cancels=[run_event]) if __name__ == "__main__": demo.launch()