Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import numpy as np | |
| import cv2 | |
| import time | |
| import torch | |
| import warnings | |
| import os | |
| import zipfile | |
| from PIL import Image | |
| import random | |
| warnings.filterwarnings("ignore") | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| # STEP 1: Extract any .zip files in current directory | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| print("=" * 60) | |
| print(f"[STARTUP] Working dir: {os.getcwd()}") | |
| for f in os.listdir("."): | |
| if f.endswith(".zip"): | |
| try: | |
| with zipfile.ZipFile(f, 'r') as zf: | |
| zf.extractall(".") | |
| print(f"[ZIP] Extracted {f} OK!") | |
| except Exception as e: | |
| print(f"[ZIP] ERROR: {e}") | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| # STEP 2: Copy images to root | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| def prepare_clean_examples(src_folder, prefix, limit=10): | |
| results = [] | |
| if not os.path.exists(src_folder): return results | |
| count = 0 | |
| for root, dirs, files in os.walk(src_folder): | |
| for fname in sorted(files): | |
| if not fname.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp', '.webp')): | |
| continue | |
| src_path = os.path.join(root, fname) | |
| dst_name = f"{prefix}_{count}.jpg" | |
| try: | |
| import shutil | |
| shutil.copy2(src_path, dst_name) | |
| results.append(dst_name) | |
| count += 1 | |
| if count >= limit: break | |
| except Exception as e: | |
| print(f"Error copying {src_path}: {e}") | |
| if count >= limit: break | |
| return results | |
| mirror_examples = [] | |
| for folder in ["test car windows", "test_car_windows", "test car windows segmentation"]: | |
| if os.path.exists(folder): | |
| mirror_examples = prepare_clean_examples(folder, "mirror", limit=15) | |
| break | |
| if not mirror_examples and os.path.exists("car.jpeg"): | |
| mirror_examples = ["car.jpeg"] | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| # Global Settings | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
| CONF = 0.45 | |
| def apply_mask_overlay(img_rgb, mask_bool, color=(0, 215, 255), alpha=0.4): | |
| # 1. Darken the background (50% brightness, no blur) | |
| dark_bg = cv2.addWeighted(img_rgb, 0.5, np.zeros_like(img_rgb), 0.5, 0) | |
| # 2. For the mask area, keep original brightness and tint it | |
| tinted_sharp = img_rgb.copy() | |
| tinted_sharp[mask_bool] = color | |
| tinted_sharp = cv2.addWeighted(tinted_sharp, alpha, img_rgb, 1 - alpha, 0) | |
| # 3. Find and draw the boundary edge strictly inside the mask | |
| mask_img = (mask_bool * 255).astype(np.uint8) | |
| contours, _ = cv2.findContours(mask_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) | |
| # Draw contour on the tinted image (before blending) | |
| cv2.drawContours(tinted_sharp, contours, -1, color, 2, cv2.LINE_AA) | |
| # 4. Combine: Dark background outside, Bright tinted object + boundary inside | |
| blended = np.where(mask_bool[:, :, None], tinted_sharp, dark_bg) | |
| return blended | |
| def draw_boxes(img_rgb, boxes, labels, color=(0, 215, 255)): | |
| out = img_rgb.copy() | |
| for box, label in zip(boxes, labels): | |
| x1, y1, x2, y2 = map(int, box) | |
| # Faint inner bounding box line | |
| cv2.rectangle(out, (x1, y1), (x2, y2), color, 1) | |
| # HUD-Style Corner Brackets | |
| length = int(min(x2 - x1, y2 - y1) * 0.15) | |
| thick = 3 | |
| # Top-Left | |
| cv2.line(out, (x1, y1), (x1 + length, y1), color, thick, cv2.LINE_AA) | |
| cv2.line(out, (x1, y1), (x1, y1 + length), color, thick, cv2.LINE_AA) | |
| # Top-Right | |
| cv2.line(out, (x2, y1), (x2 - length, y1), color, thick, cv2.LINE_AA) | |
| cv2.line(out, (x2, y1), (x2, y1 + length), color, thick, cv2.LINE_AA) | |
| # Bottom-Left | |
| cv2.line(out, (x1, y2), (x1 + length, y2), color, thick, cv2.LINE_AA) | |
| cv2.line(out, (x1, y2), (x1, y2 - length), color, thick, cv2.LINE_AA) | |
| # Bottom-Right | |
| cv2.line(out, (x2, y2), (x2 - length, y2), color, thick, cv2.LINE_AA) | |
| cv2.line(out, (x2, y2), (x2, y2 - length), color, thick, cv2.LINE_AA) | |
| # Text labels have been removed to prevent obstructing the view of the segmentation masks. | |
| return out | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| # Morphological post-processing helper | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| def apply_morphology(mask_uint8, close_k=15, open_k=7): | |
| """Fill holes (Closing) then remove tiny blobs (Opening) on a binary mask.""" | |
| close_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (close_k, close_k)) | |
| open_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (open_k, open_k)) | |
| closed = cv2.morphologyEx(mask_uint8, cv2.MORPH_CLOSE, close_kernel) # fill holes | |
| opened = cv2.morphologyEx(closed, cv2.MORPH_OPEN, open_kernel) # remove noise | |
| return opened | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| # Model Functions | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| def run_yolo_generic(img_rgb, model_path, target_classes, color, morph_cleanup=False): | |
| from ultralytics import YOLO | |
| t0 = time.time() | |
| model = YOLO(model_path) | |
| # Use retina_masks=True to get pixel-perfect masks at the original image resolution | |
| results = model(img_rgb, conf=CONF, verbose=False, retina_masks=True) | |
| elapsed = time.time() - t0 | |
| result = results[0] | |
| h, w = img_rgb.shape[:2] | |
| combined_mask = np.zeros((h, w), dtype=np.uint8) | |
| boxes, labels = [], [] | |
| if result.masks is not None: | |
| for mask, box, cls, conf in zip( | |
| result.masks.data, result.boxes.xyxy, | |
| result.boxes.cls, result.boxes.conf | |
| ): | |
| if int(cls) not in target_classes: | |
| continue | |
| # Since retina_masks=True, mask is already (h, w). Just threshold it. | |
| mask_np = mask.cpu().numpy().astype(np.uint8) | |
| # Optional per-instance morphological cleanup before combining | |
| if morph_cleanup: | |
| mask_np = apply_morphology(mask_np) | |
| combined_mask |= mask_np | |
| boxes.append(box.cpu().tolist()) | |
| labels.append(f"glass {conf:.2f}") | |
| # We purposely do NOT apply morphology on the final combined_mask here, | |
| # otherwise it will bridge the gaps (pillars) between separate windows! | |
| combined_mask_bool = combined_mask > 0 | |
| morph_note = " | Morphology: ON ✅" if morph_cleanup else "" | |
| out = apply_mask_overlay(img_rgb, combined_mask_bool, color=color) | |
| out = draw_boxes(out, boxes, labels, color=color) | |
| bw_mask = (combined_mask * 255).astype(np.uint8) | |
| return out, bw_mask, f"Found: {len(boxes)} | Inference Time: {elapsed:.2f}s{morph_note}" | |
| def run_sam_strategy(img_rgb, yolo_model_path, target_classes, color, strategy, morph_cleanup=False): | |
| try: | |
| from segment_anything import sam_model_registry, SamPredictor | |
| import urllib.request | |
| CKPT = "sam_vit_b_01ec64.pth" | |
| URL = "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth" | |
| if not os.path.exists(CKPT): urllib.request.urlretrieve(URL, CKPT) | |
| t0 = time.time() | |
| sam = sam_model_registry["vit_b"](checkpoint=CKPT).to(DEVICE) | |
| predictor = SamPredictor(sam) | |
| predictor.set_image(img_rgb) | |
| from ultralytics import YOLO as _YOLO | |
| yolo_res = _YOLO(yolo_model_path)(img_rgb, conf=CONF, verbose=False, retina_masks=True)[0] | |
| h, w = img_rgb.shape[:2] | |
| combined_mask = np.zeros((h, w), dtype=bool) | |
| boxes_list, labels = [], [] | |
| if yolo_res.boxes is not None and yolo_res.masks is not None: | |
| for box, mask_data, cls, conf in zip(yolo_res.boxes.xyxy, yolo_res.masks.data, yolo_res.boxes.cls, yolo_res.boxes.conf): | |
| if int(cls) not in target_classes: continue | |
| box_np = box.cpu().numpy() | |
| yolo_mask = mask_data.cpu().numpy() > 0.5 | |
| if strategy == 1: | |
| # Strategy 1: Bbox + 5 Points | |
| x1, y1, x2, y2 = map(int, box_np) | |
| cx, cy = (x1+x2)//2, (y1+y2)//2 | |
| pts = [[cx, cy], [x1+5, y1+5], [x2-5, y1+5], [x1+5, y2-5], [x2-5, y2-5]] | |
| pts_np = np.array(pts) | |
| labels_np = np.ones(len(pts)) | |
| masks_sam, _, _ = predictor.predict(box=box_np, point_coords=pts_np, point_labels=labels_np, multimask_output=False) | |
| sam_mask = masks_sam[0] | |
| elif strategy == 2: | |
| # Strategy 2: Mask + 5 Points | |
| y_coords, x_coords = np.where(yolo_mask) | |
| if len(x_coords) == 0: continue | |
| cx, cy = int(np.mean(x_coords)), int(np.mean(y_coords)) | |
| idx_top, idx_bot = np.argmin(y_coords), np.argmax(y_coords) | |
| idx_lft, idx_rgt = np.argmin(x_coords), np.argmax(x_coords) | |
| def get_mid(x_1, y_1, x_2, y_2, f=0.6): | |
| return int(x_1 + (x_2-x_1)*f), int(y_1 + (y_2-y_1)*f) | |
| pts = [] | |
| if yolo_mask[cy, cx]: pts.append([cx, cy]) | |
| else: pts.append([x_coords[len(x_coords)//2], y_coords[len(y_coords)//2]]) | |
| for idx in [idx_top, idx_bot, idx_lft, idx_rgt]: | |
| px, py = get_mid(cx, cy, x_coords[idx], y_coords[idx]) | |
| if 0 <= py < h and 0 <= px < w and yolo_mask[py, px]: pts.append([px, py]) | |
| else: pts.append(pts[0]) | |
| pts_np = np.array(pts) | |
| labels_np = np.ones(len(pts)) | |
| masks_sam, _, _ = predictor.predict(box=box_np, point_coords=pts_np, point_labels=labels_np, multimask_output=False) | |
| sam_mask = masks_sam[0] | |
| elif strategy == 3: | |
| # Strategy 3: Direct Mask Prompting | |
| yolo_mask_resized = cv2.resize((yolo_mask).astype(np.float32), (256, 256), interpolation=cv2.INTER_NEAREST) | |
| mask_input = np.zeros((1, 256, 256), dtype=np.float32) | |
| mask_input[0] = np.where(yolo_mask_resized > 0.5, 30.0, -30.0) | |
| masks_sam, _, _ = predictor.predict(box=box_np, mask_input=mask_input, multimask_output=False) | |
| raw_mask = (masks_sam[0].astype(np.uint8) * 255) | |
| contours, _ = cv2.findContours(raw_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) | |
| filled_mask = np.zeros_like(raw_mask) | |
| cv2.drawContours(filled_mask, contours, -1, 255, cv2.FILLED) | |
| sam_mask = (filled_mask > 0) | |
| else: | |
| sam_mask = np.zeros((h, w), dtype=bool) | |
| sam_mask_uint = sam_mask.astype(np.uint8) | |
| if morph_cleanup: | |
| sam_mask_uint = apply_morphology(sam_mask_uint) | |
| combined_mask |= sam_mask_uint.astype(bool) | |
| boxes_list.append(box_np.tolist()) | |
| labels.append(f"glass {conf:.2f}") | |
| elapsed = time.time() - t0 | |
| morph_note = " | Morphology: ON ✅" if morph_cleanup else "" | |
| out = apply_mask_overlay(img_rgb, combined_mask, color=color) | |
| out = draw_boxes(out, boxes_list, labels, color=color) | |
| return out, (combined_mask * 255).astype(np.uint8), f"Found: {len(boxes_list)} | Strategy: {strategy} | Inference: {elapsed:.2f}s{morph_note}" | |
| except ImportError: | |
| return img_rgb, None, "Error: segment-anything not installed" | |
| def run_mask_rcnn(img_rgb, weights_path): | |
| t0 = time.time() | |
| try: | |
| from torchvision.models.detection import maskrcnn_resnet50_fpn_v2 | |
| from torchvision.models.detection.faster_rcnn import FastRCNNPredictor | |
| from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor | |
| import torchvision.transforms.v2 as T | |
| model = maskrcnn_resnet50_fpn_v2(weights=None) | |
| in_features = model.roi_heads.box_predictor.cls_score.in_features | |
| model.roi_heads.box_predictor = FastRCNNPredictor(in_features, 2) | |
| in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels | |
| model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask, 256, 2) | |
| checkpoint = torch.load(weights_path, map_location=DEVICE, weights_only=False) | |
| if isinstance(checkpoint, dict) and "model_state_dict" in checkpoint: | |
| model.load_state_dict(checkpoint["model_state_dict"]) | |
| else: | |
| model.load_state_dict(checkpoint) | |
| model.to(DEVICE) | |
| model.eval() | |
| img_tensor = T.ToTensor()(Image.fromarray(img_rgb)).to(DEVICE) | |
| with torch.no_grad(): | |
| outputs = model([img_tensor])[0] | |
| h, w = img_rgb.shape[:2] | |
| pred_mask = np.zeros((h, w), dtype=bool) | |
| boxes_list, labels_list = [], [] | |
| for score, mask, box, cls in zip(outputs['scores'], outputs['masks'], outputs['boxes'], outputs['labels']): | |
| if score > 0.45: | |
| m = (mask[0].cpu().numpy() > 0.5) | |
| pred_mask |= m | |
| boxes_list.append(box.cpu().numpy().tolist()) | |
| labels_list.append(f"glass {score:.2f}") | |
| elapsed = time.time() - t0 | |
| out = apply_mask_overlay(img_rgb, pred_mask, color=(255, 165, 0)) | |
| out = draw_boxes(out, boxes_list, labels_list, color=(255, 165, 0)) | |
| bw_mask = (pred_mask * 255).astype(np.uint8) | |
| return out, bw_mask, f"Found: {len(boxes_list)} | Inference: {elapsed:.2f}s" | |
| except Exception as e: | |
| return img_rgb, None, f"Mask R-CNN Error: {e}" | |
| def run_grounding_dino(img_rgb, text_prompt): | |
| try: | |
| from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection | |
| t0 = time.time() | |
| model_id = "IDEA-Research/grounding-dino-tiny" | |
| processor = AutoProcessor.from_pretrained(model_id) | |
| model = AutoModelForZeroShotObjectDetection.from_pretrained(model_id).to(DEVICE) | |
| inputs = processor(images=img_rgb, text=text_prompt, return_tensors="pt").to(DEVICE) | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| h, w = img_rgb.shape[:2] | |
| results = processor.post_process_grounded_object_detection( | |
| outputs, inputs.input_ids, text_threshold=0.25, target_sizes=[(h, w)] | |
| )[0] | |
| boxes = results["boxes"].cpu().numpy().tolist() | |
| scores = results["scores"].cpu().numpy().tolist() | |
| labels = results["labels"] | |
| elapsed = time.time() - t0 | |
| bw_mask = np.zeros((h, w), dtype=np.uint8) # DINO is boxes only | |
| str_labels = [f"{lbl} {scr:.2f}" for lbl, scr in zip(labels, scores)] | |
| out = draw_boxes(img_rgb.copy(), boxes, str_labels, color=(255, 100, 50)) | |
| return out, bw_mask, f"Found: {len(boxes)} | Inference Time: {elapsed:.2f}s" | |
| except Exception as e: | |
| return img_rgb, None, f"Grounding DINO Error: {e}\n(Need transformers>=4.35)" | |
| def run_grounded_sam(img_rgb, text_prompt): | |
| try: | |
| from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection | |
| from segment_anything import sam_model_registry, SamPredictor | |
| import urllib.request | |
| t0 = time.time() | |
| # 1. DINO Detection | |
| dino_id = "IDEA-Research/grounding-dino-tiny" | |
| processor = AutoProcessor.from_pretrained(dino_id) | |
| dino_model = AutoModelForZeroShotObjectDetection.from_pretrained(dino_id).to(DEVICE) | |
| inputs = processor(images=img_rgb, text=text_prompt, return_tensors="pt").to(DEVICE) | |
| with torch.no_grad(): | |
| outputs = dino_model(**inputs) | |
| h, w = img_rgb.shape[:2] | |
| dino_res = processor.post_process_grounded_object_detection( | |
| outputs, inputs.input_ids, text_threshold=0.25, target_sizes=[(h, w)] | |
| )[0] | |
| boxes = dino_res["boxes"].cpu().numpy() | |
| scores = dino_res["scores"].cpu().numpy() | |
| labels_txt = dino_res["labels"] | |
| # 2. SAM Segmentation | |
| CKPT = "sam_vit_b_01ec64.pth" | |
| URL = "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth" | |
| if not os.path.exists(CKPT): urllib.request.urlretrieve(URL, CKPT) | |
| sam = sam_model_registry["vit_b"](checkpoint=CKPT).to(DEVICE) | |
| predictor = SamPredictor(sam) | |
| predictor.set_image(img_rgb) | |
| combined_mask = np.zeros((h, w), dtype=bool) | |
| str_labels = [] | |
| if len(boxes) > 0: | |
| for box, score, label in zip(boxes, scores, labels_txt): | |
| masks, _, _ = predictor.predict(box=box, multimask_output=False) | |
| combined_mask |= masks[0] | |
| str_labels.append(f"{label} {score:.2f}") | |
| elapsed = time.time() - t0 | |
| out = apply_mask_overlay(img_rgb, combined_mask, color=(255, 80, 160)) | |
| out = draw_boxes(out, boxes.tolist(), str_labels, color=(255, 80, 160)) | |
| return out, (combined_mask * 255).astype(np.uint8), f"Found: {len(boxes)} | Inference: {elapsed:.2f}s" | |
| except Exception as e: | |
| return img_rgb, None, f"Grounded SAM Error: {e}" | |
| def run_intelliarts_car_parts(img_rgb): | |
| t0 = time.time() | |
| try: | |
| import detectron2 | |
| except ImportError: | |
| print("Installing detectron2... this may take a few minutes!") | |
| os.system('pip install git+https://github.com/facebookresearch/detectron2.git --no-build-isolation') | |
| try: | |
| from detectron2 import model_zoo | |
| from detectron2.engine import DefaultPredictor | |
| from detectron2.config import get_cfg | |
| import urllib.request | |
| model_url = "https://huggingface.co/spaces/intelliarts/Car_parts_detection/resolve/main/model_final.pth" | |
| model_path = "intelliarts_model_final.pth" | |
| if not os.path.exists(model_path): | |
| print("Downloading Intelliarts Car Parts weights...") | |
| urllib.request.urlretrieve(model_url, model_path) | |
| cfg = get_cfg() | |
| cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")) | |
| cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.45 | |
| cfg.MODEL.ROI_HEADS.NUM_CLASSES = 19 | |
| cfg.MODEL.WEIGHTS = model_path | |
| cfg.MODEL.DEVICE = DEVICE | |
| predictor = DefaultPredictor(cfg) | |
| outputs = predictor(img_rgb) | |
| instances = outputs["instances"].to("cpu") | |
| # Classes: 2: back_glass, 8: front_glass, 14: left_mirror, 15: right_mirror | |
| target_classes = [2, 8, 14, 15] | |
| h, w = img_rgb.shape[:2] | |
| combined_mask = np.zeros((h, w), dtype=bool) | |
| boxes_list, labels_list = [], [] | |
| classes = instances.pred_classes.numpy() | |
| scores = instances.scores.numpy() | |
| boxes = instances.pred_boxes.tensor.numpy() | |
| masks = instances.pred_masks.numpy() | |
| class_names = ['_background_', 'back_bumper', 'back_glass', 'back_left_door', 'back_left_light', 'back_right_door', 'back_right_light', 'front_bumper', 'front_glass', 'front_left_door', 'front_left_light', 'front_right_door', 'front_right_light', 'hood', 'left_mirror', 'right_mirror', 'tailgate', 'trunk', 'wheel'] | |
| for i in range(len(classes)): | |
| c = classes[i] | |
| if c in target_classes: | |
| combined_mask |= masks[i] | |
| boxes_list.append(boxes[i].tolist()) | |
| labels_list.append(f"{class_names[c]} {scores[i]:.2f}") | |
| elapsed = time.time() - t0 | |
| out = apply_mask_overlay(img_rgb, combined_mask, color=(50, 150, 255)) | |
| out = draw_boxes(out, boxes_list, labels_list, color=(50, 150, 255)) | |
| bw_mask = (combined_mask * 255).astype(np.uint8) | |
| return out, bw_mask, f"Found: {len(boxes_list)} | Inference: {elapsed:.2f}s" | |
| except Exception as e: | |
| return img_rgb, None, f"Intelliarts Detectron2 Error: {e}" | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| # SegFormer Function | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| def run_segformer(img_rgb, morph_cleanup=False): | |
| try: | |
| from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation | |
| import torch.nn.functional as F | |
| t0 = time.time() | |
| base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | |
| # Paths to try (works for both local PC and Hugging Face Cloud deployment) | |
| paths_to_try = [ | |
| os.path.join(base_dir, "SegFormer_Model", "best_segformer_dice_model"), # Local PC | |
| "best_segformer_dice_model", # Hugging Face Root | |
| os.path.join(os.path.dirname(__file__), "best_segformer_dice_model"), # Next to app.py | |
| ] | |
| # If files were uploaded directly to the root (no folder) | |
| if os.path.exists("config.json"): | |
| paths_to_try.append(".") | |
| if os.path.exists(os.path.join(os.path.dirname(__file__), "config.json")): | |
| paths_to_try.append(os.path.dirname(__file__)) | |
| model_path = None | |
| for p in paths_to_try: | |
| # For SegFormer, the path must contain config.json | |
| if os.path.exists(p) and os.path.exists(os.path.join(p, "config.json")): | |
| model_path = p | |
| break | |
| # Fallback | |
| if model_path is None: | |
| model_path = "best_segformer_dice_model" | |
| processor = SegformerImageProcessor.from_pretrained(model_path) | |
| model = SegformerForSemanticSegmentation.from_pretrained(model_path).to(DEVICE) | |
| inputs = processor(images=Image.fromarray(img_rgb), return_tensors="pt") | |
| inputs = {k: v.to(DEVICE) for k, v in inputs.items()} | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| h, w = img_rgb.shape[:2] | |
| logits = F.interpolate(outputs.logits, size=(h, w), mode="bilinear", align_corners=False)[0] | |
| probs = F.softmax(logits, dim=0) | |
| pred_mask = (probs[1] > 0.5).cpu().numpy().astype(np.uint8) | |
| # Apply morphological cleanup if requested | |
| if morph_cleanup: | |
| pred_mask = apply_morphology(pred_mask, close_k=15, open_k=7) | |
| elapsed = time.time() - t0 | |
| morph_note = " | Morphology: ON ✅" if morph_cleanup else "" | |
| out = apply_mask_overlay(img_rgb, pred_mask, color=(255, 50, 50)) | |
| bw_mask = (pred_mask * 255).astype(np.uint8) | |
| return out, bw_mask, f"Found: 1 (Semantic) | Inference: {elapsed:.2f}s{morph_note}" | |
| except Exception as e: | |
| return img_rgb, None, f"SegFormer Error: {e}" | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| # BiRefNet Function | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| def run_birefnet(img_rgb): | |
| try: | |
| from transformers import AutoModelForImageSegmentation | |
| from torchvision import transforms | |
| import torch.nn.functional as F | |
| t0 = time.time() | |
| base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | |
| # Paths to try (works for local PC and Hugging Face Cloud deployment) | |
| paths_to_try = [ | |
| os.path.join(base_dir, "BiRefNet_Model", "best_model-20260624T051601Z-3-001", "best_model"), # Local PC | |
| "birefnet_model", # Hugging Face Root / Root dir | |
| os.path.join(os.path.dirname(os.path.abspath(__file__)), "birefnet_model"), # Next to app.py | |
| "best_birefnet_model" # Extra fallback | |
| ] | |
| model_path = None | |
| for p in paths_to_try: | |
| if os.path.exists(p) and os.path.exists(os.path.join(p, "config.json")) and os.path.exists(os.path.join(p, "model.safetensors")): | |
| model_path = p | |
| break | |
| # Final fallback: Download directly from Hugging Face Model Repo! | |
| if model_path is None: | |
| model_path = "Ayesha-Majeed/birefnet_car_window" | |
| model = AutoModelForImageSegmentation.from_pretrained(model_path, trust_remote_code=True).to(DEVICE) | |
| model.eval() | |
| image_transform = transforms.Compose([ | |
| transforms.Resize((1024, 1024)), | |
| transforms.ToTensor(), | |
| transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), | |
| ]) | |
| from PIL import Image | |
| pil_img = Image.fromarray(img_rgb) | |
| input_tensor = image_transform(pil_img).unsqueeze(0).to(DEVICE) | |
| with torch.no_grad(): | |
| if DEVICE == "cuda": | |
| with torch.amp.autocast("cuda"): | |
| preds = model(input_tensor) | |
| final_pred = preds[-1] if isinstance(preds, (list, tuple)) else preds | |
| else: | |
| preds = model(input_tensor) | |
| final_pred = preds[-1] if isinstance(preds, (list, tuple)) else preds | |
| h, w = img_rgb.shape[:2] | |
| final_pred = F.interpolate(final_pred, size=(h, w), mode="bilinear", align_corners=False) | |
| pred_mask = (torch.sigmoid(final_pred) > 0.5).squeeze().cpu().numpy().astype(np.uint8) | |
| elapsed = time.time() - t0 | |
| out = apply_mask_overlay(img_rgb, pred_mask > 0, color=(255, 0, 0)) # Red | |
| bw_mask = (pred_mask * 255).astype(np.uint8) | |
| return out, bw_mask, f"Found: 1 (Semantic) | Inference: {elapsed:.2f}s" | |
| except Exception as e: | |
| return img_rgb, None, f"BiRefNet Error: {e}" | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| # Gradio Process Function | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| # A beautiful palette of pastel and neon colors for dynamic visualizations | |
| PASTEL_COLORS = [ | |
| (255, 105, 180), # Hot/Light Pink | |
| (180, 130, 255), # Light Purple | |
| (0, 215, 255), # Light Sky Blue / Cyan | |
| (255, 220, 50), # Light Yellow | |
| (255, 160, 50), # Light Orange | |
| (150, 255, 150), # Light Mint Green | |
| (240, 240, 255), # Light White / Silver | |
| ] | |
| def process_image(img_rgb, model_name, text_prompt="", morph_cleanup=False): | |
| if img_rgb is None: return None, None, "Please upload an image." | |
| # Pick a random color for this specific inference run | |
| run_color = random.choice(PASTEL_COLORS) | |
| try: | |
| if model_name == "YOLOv8x-seg (Custom Window)": | |
| return run_yolo_generic(img_rgb, "best.pt", target_classes=[0, 1], color=run_color, morph_cleanup=morph_cleanup) | |
| elif model_name == "YOLOv8x-seg": | |
| return run_yolo_generic(img_rgb, "best.pt", target_classes=[0, 1], color=(255, 215, 0), morph_cleanup=morph_cleanup) | |
| elif model_name == "YOLO11x-seg": | |
| if os.path.exists("yolo11_best.pt"): | |
| y11_weights = "yolo11_best.pt" | |
| else: | |
| base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | |
| y11_weights = os.path.join(base_dir, "runs", "segment", "runs", "car_mirror_seg", "yolo11x_seg_1024", "weights", "best.pt") | |
| if not os.path.exists(y11_weights): | |
| y11_weights = "best.pt" # Fallback | |
| return run_yolo_generic(img_rgb, y11_weights, target_classes=[0, 1], color=(0, 255, 120), morph_cleanup=morph_cleanup) | |
| elif model_name == "SAM + YOLO (Strategy 1: Bbox + 5 Points)": | |
| return run_sam_strategy(img_rgb, "best.pt", target_classes=[0, 1], color=run_color, strategy=1, morph_cleanup=morph_cleanup) | |
| elif model_name == "SAM + YOLO (Strategy 2: Mask + 5 Points)": | |
| return run_sam_strategy(img_rgb, "best.pt", target_classes=[0, 1], color=run_color, strategy=2, morph_cleanup=morph_cleanup) | |
| elif model_name == "SAM + YOLO (Strategy 3: Direct Mask Prompting)": | |
| return run_sam_strategy(img_rgb, "best.pt", target_classes=[0, 1], color=run_color, strategy=3, morph_cleanup=morph_cleanup) | |
| elif model_name == "Mask R-CNN": | |
| # First check if she uploaded it directly next to app.py as "maskrcnn_best.pt" | |
| if os.path.exists("maskrcnn_best.pt"): | |
| mrcnn_weights = "maskrcnn_best.pt" | |
| else: | |
| base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | |
| mrcnn_weights = os.path.join(base_dir, "Mask_RCNN", "runs", "woven-sweep-5", "best.pt") | |
| if not os.path.exists(mrcnn_weights): | |
| mrcnn_weights = "Mask_RCNN/runs/woven-sweep-5/best.pt" | |
| return run_mask_rcnn(img_rgb, mrcnn_weights) | |
| elif model_name == "Grounding DINO (Zero-Shot Detection)": | |
| return run_grounding_dino(img_rgb, text_prompt) | |
| elif model_name == "Grounded SAM (Zero-Shot Segmentation)": | |
| return run_grounded_sam(img_rgb, text_prompt) | |
| elif model_name == "Intelliarts Car Parts (Detectron2)": | |
| return run_intelliarts_car_parts(img_rgb) | |
| elif model_name == "SegFormer": | |
| return run_segformer(img_rgb, morph_cleanup=morph_cleanup) | |
| else: | |
| return img_rgb, None, "Model not recognized." | |
| except Exception as e: | |
| return img_rgb, None, f"Error: {str(e)}" | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| # Gradio UI | |
| # ═══════════════════════════════════════════════════════════════════════════════ | |
| theme = gr.themes.Soft(primary_hue="blue", secondary_hue="indigo") | |
| with gr.Blocks(theme=theme, title="Car Window Segmentation") as demo: | |
| gr.Markdown(""" | |
| # Car Window Segmentation | |
| Compare your custom trained YOLOv8 model against state-of-the-art Zero-Shot models! | |
| """) | |
| # ── TAB 3: Comprehensive Evaluation ── | |
| with gr.Tab("Comprehensive Evaluation"): | |
| gr.Markdown("### Comprehensive Evaluation: Results from All Trained and Pretrained Models") | |
| gr.Markdown("""**The following models will run and display their results below:** | |
| **Custom Trained Models:** | |
| 1. SegFormer | |
| 2. SegFormer + Morphological | |
| 3. YOLO11x-seg | |
| 4. YOLOv8x-seg | |
| 5. Mask R-CNN | |
| 6. BiRefNet | |
| 7. SAM + YOLO (Strategy 1: Bbox + 5 Points) | |
| 8. SAM + YOLO (Strategy 2: Mask + 5 Points) | |
| 9. SAM + YOLO (Strategy 3: Direct Mask Prompting) | |
| **Pretrained Zero-Shot Models:** | |
| 10\. Grounding DINO | |
| 11\. Grounded SAM | |
| 12\. Intelliarts Car Parts | |
| **Our Findings:** SegFormer and YOLO11x deliver the best performance with significantly sharper edge precision. | |
| """) | |
| with gr.Row(): | |
| input_image_seq = gr.Image(type="numpy", label="Upload Window Image") | |
| with gr.Row(): | |
| submit_btn_seq = gr.Button("Run All Models", variant="primary", size="lg") | |
| stop_btn_seq = gr.Button("🛑 Stop Processing", variant="stop", size="lg") | |
| if mirror_examples: | |
| gr.Markdown("### Or click any example image below to load it:") | |
| compare_gallery = gr.Gallery(value=mirror_examples, columns=10, height=120, object_fit="cover", allow_preview=False, show_label=False) | |
| def load_compare_img(evt: gr.SelectData): return mirror_examples[evt.index] | |
| compare_gallery.select(fn=load_compare_img, inputs=None, outputs=input_image_seq) | |
| gr.Markdown("---") | |
| gr.Markdown("## 🚀 Custom Trained Models") | |
| gr.Markdown("### 1️⃣ SegFormer (Transformer)") | |
| with gr.Row(): | |
| seq_segf_img = gr.Image(label="SegFormer Overlay", interactive=False) | |
| seq_segf_bw = gr.Image(label="SegFormer Binary Mask", interactive=False, image_mode="L") | |
| seq_segf_stats = gr.Textbox(label="SegFormer Stats", interactive=False) | |
| gr.Markdown("---") | |
| gr.Markdown("### 2️⃣ SegFormer + Morphological Cleanup (Holes Filled + Sharp Borders)") | |
| with gr.Row(): | |
| seq_segf_morph_img = gr.Image(label="SegFormer + Morph Overlay", interactive=False) | |
| seq_segf_morph_bw = gr.Image(label="SegFormer + Morph Binary Mask", interactive=False, image_mode="L") | |
| seq_segf_morph_stats = gr.Textbox(label="SegFormer + Morph Stats", interactive=False) | |
| gr.Markdown("---") | |
| gr.Markdown("### 3️⃣ YOLO11x-seg") | |
| with gr.Row(): | |
| seq_yolo11_img = gr.Image(label="YOLO11x Overlay", interactive=False) | |
| seq_yolo11_bw = gr.Image(label="YOLO11x Binary Mask", interactive=False, image_mode="L") | |
| seq_yolo11_stats = gr.Textbox(label="YOLO11x Stats", interactive=False) | |
| gr.Markdown("---") | |
| gr.Markdown("### 4️⃣ YOLOv8x-seg") | |
| with gr.Row(): | |
| seq_yolo_img = gr.Image(label="YOLO Overlay", interactive=False) | |
| seq_yolo_bw = gr.Image(label="YOLO Binary Mask", interactive=False, image_mode="L") | |
| seq_yolo_stats = gr.Textbox(label="YOLO Stats", interactive=False) | |
| gr.Markdown("---") | |
| gr.Markdown("### 5️⃣ Mask R-CNN (ResNet50-FPN)") | |
| with gr.Row(): | |
| seq_mrcnn_img = gr.Image(label="Mask R-CNN Overlay", interactive=False) | |
| seq_mrcnn_bw = gr.Image(label="Mask R-CNN Binary Mask", interactive=False, image_mode="L") | |
| seq_mrcnn_stats = gr.Textbox(label="Mask R-CNN Stats", interactive=False) | |
| gr.Markdown("---") | |
| gr.Markdown("### 6️⃣ BiRefNet (Boundary-Aware Model)") | |
| with gr.Row(): | |
| seq_biref_img = gr.Image(label="BiRefNet Overlay", interactive=False) | |
| seq_biref_bw = gr.Image(label="BiRefNet Binary Mask", interactive=False, image_mode="L") | |
| seq_biref_stats = gr.Textbox(label="BiRefNet Stats", interactive=False) | |
| gr.Markdown("---") | |
| gr.Markdown("### 7️⃣ SAM + YOLO (Strategy 1: Bbox + 5 Points)") | |
| with gr.Row(): | |
| seq_sam1_img = gr.Image(label="SAM+YOLO Strat 1 Overlay", interactive=False) | |
| seq_sam1_bw = gr.Image(label="SAM+YOLO Strat 1 Binary Mask", interactive=False, image_mode="L") | |
| seq_sam1_stats = gr.Textbox(label="SAM+YOLO Strat 1 Stats", interactive=False) | |
| gr.Markdown("---") | |
| gr.Markdown("### 8️⃣ SAM + YOLO (Strategy 2: Mask + 5 Points)") | |
| with gr.Row(): | |
| seq_sam2_img = gr.Image(label="SAM+YOLO Strat 2 Overlay", interactive=False) | |
| seq_sam2_bw = gr.Image(label="SAM+YOLO Strat 2 Binary Mask", interactive=False, image_mode="L") | |
| seq_sam2_stats = gr.Textbox(label="SAM+YOLO Strat 2 Stats", interactive=False) | |
| gr.Markdown("---") | |
| gr.Markdown("### 9️⃣ SAM + YOLO (Strategy 3: Direct Mask Prompting)") | |
| with gr.Row(): | |
| seq_sam3_img = gr.Image(label="SAM+YOLO Strat 3 Overlay", interactive=False) | |
| seq_sam3_bw = gr.Image(label="SAM+YOLO Strat 3 Binary Mask", interactive=False, image_mode="L") | |
| seq_sam3_stats = gr.Textbox(label="SAM+YOLO Strat 3 Stats", interactive=False) | |
| gr.Markdown("---") | |
| gr.Markdown("## 🌍 Pretrained Zero-Shot Models") | |
| gr.Markdown("### 🔟 Grounding DINO (Zero-Shot Detection)") | |
| with gr.Row(): | |
| seq_dino_img = gr.Image(label="Grounding DINO Overlay", interactive=False) | |
| seq_dino_bw = gr.Image(label="Grounding DINO Binary Mask", interactive=False, image_mode="L") | |
| seq_dino_stats = gr.Textbox(label="Grounding DINO Stats", interactive=False) | |
| gr.Markdown("---") | |
| gr.Markdown("### 1️⃣1️⃣ Grounded SAM (Zero-Shot Segmentation)") | |
| with gr.Row(): | |
| seq_gsam_img = gr.Image(label="Grounded SAM Overlay", interactive=False) | |
| seq_gsam_bw = gr.Image(label="Grounded SAM Binary Mask", interactive=False, image_mode="L") | |
| seq_gsam_stats = gr.Textbox(label="Grounded SAM Stats", interactive=False) | |
| gr.Markdown("---") | |
| gr.Markdown("### 1️⃣2️⃣ Intelliarts Car Parts (Detectron2)") | |
| with gr.Row(): | |
| seq_intell_img = gr.Image(label="Intelliarts Car Parts Overlay", interactive=False) | |
| seq_intell_bw = gr.Image(label="Intelliarts Car Parts Binary Mask", interactive=False, image_mode="L") | |
| seq_intell_stats = gr.Textbox(label="Intelliarts Car Parts Stats", interactive=False) | |
| def run_all_models(img): | |
| if img is None: | |
| yield tuple([None]*36) | |
| return | |
| # ── Step 0: Show "Processing..." in ALL textboxes immediately ── | |
| PENDING = "⏳ Processing..." | |
| results = [None] * 36 | |
| # Set all stats textboxes to pending state | |
| for i in [2, 5, 8, 11, 14, 17, 20, 23, 26, 29, 32, 35]: | |
| results[i] = PENDING | |
| yield tuple(results) | |
| # 1. SegFormer | |
| results[0], results[1], results[2] = run_segformer(img, morph_cleanup=False) | |
| yield tuple(results) | |
| # 2. SegFormer + Morphology | |
| results[3], results[4], results[5] = run_segformer(img, morph_cleanup=True) | |
| yield tuple(results) | |
| # 3. YOLO11x-seg | |
| results[6], results[7], results[8] = process_image(img, "YOLO11x-seg", "", False) | |
| yield tuple(results) | |
| # 4. YOLOv8x-seg | |
| results[9], results[10], results[11] = process_image(img, "YOLOv8x-seg", "", False) | |
| yield tuple(results) | |
| # 5. Mask R-CNN | |
| results[12], results[13], results[14] = process_image(img, "Mask R-CNN", "", False) | |
| yield tuple(results) | |
| # 6. BiRefNet | |
| results[15], results[16], results[17] = run_birefnet(img) | |
| yield tuple(results) | |
| # 7. SAM + YOLO Strat 1 | |
| results[18], results[19], results[20] = process_image(img, "SAM + YOLO (Strategy 1: Bbox + 5 Points)", "", False) | |
| yield tuple(results) | |
| # 8. SAM + YOLO Strat 2 | |
| results[21], results[22], results[23] = process_image(img, "SAM + YOLO (Strategy 2: Mask + 5 Points)", "", False) | |
| yield tuple(results) | |
| # 9. SAM + YOLO Strat 3 | |
| results[24], results[25], results[26] = process_image(img, "SAM + YOLO (Strategy 3: Direct Mask Prompting)", "", False) | |
| yield tuple(results) | |
| # 10. Grounding DINO | |
| results[27], results[28], results[29] = process_image(img, "Grounding DINO (Zero-Shot Detection)", "car window. car glass. windshield.", False) | |
| yield tuple(results) | |
| # 11. Grounded SAM | |
| results[30], results[31], results[32] = process_image(img, "Grounded SAM (Zero-Shot Segmentation)", "car window. car glass. windshield.", False) | |
| yield tuple(results) | |
| # 12. Intelliarts | |
| results[33], results[34], results[35] = process_image(img, "Intelliarts Car Parts (Detectron2)", "", False) | |
| yield tuple(results) | |
| run_event = submit_btn_seq.click( | |
| fn=run_all_models, | |
| inputs=[input_image_seq], | |
| outputs=[seq_segf_img, seq_segf_bw, seq_segf_stats, | |
| seq_segf_morph_img, seq_segf_morph_bw, seq_segf_morph_stats, | |
| seq_yolo11_img, seq_yolo11_bw, seq_yolo11_stats, | |
| seq_yolo_img, seq_yolo_bw, seq_yolo_stats, | |
| seq_mrcnn_img, seq_mrcnn_bw, seq_mrcnn_stats, | |
| seq_biref_img, seq_biref_bw, seq_biref_stats, | |
| seq_sam1_img, seq_sam1_bw, seq_sam1_stats, | |
| seq_sam2_img, seq_sam2_bw, seq_sam2_stats, | |
| seq_sam3_img, seq_sam3_bw, seq_sam3_stats, | |
| seq_dino_img, seq_dino_bw, seq_dino_stats, | |
| seq_gsam_img, seq_gsam_bw, seq_gsam_stats, | |
| seq_intell_img, seq_intell_bw, seq_intell_stats] | |
| ) | |
| stop_btn_seq.click(fn=None, inputs=None, outputs=None, cancels=[run_event]) | |
| if __name__ == "__main__": | |
| demo.launch() | |