Spaces:

MLBench
/

Car-Mirror-Segmentation

Sleeping

App Files Files Community

Car-Mirror-Segmentation / app.py

Ayesha-Majeed

Upload app.py

269676c verified 3 days ago

Raw

History Blame Contribute Delete

43.7 kB

	import gradio as gr
	import numpy as np
	import cv2
	import time
	import torch
	import warnings
	import os
	import zipfile
	from PIL import Image
	import random

	warnings.filterwarnings("ignore")

	# ═══════════════════════════════════════════════════════════════════════════════
	# STEP 1: Extract any .zip files in current directory
	# ═══════════════════════════════════════════════════════════════════════════════
	print("=" * 60)
	print(f"[STARTUP] Working dir: {os.getcwd()}")
	for f in os.listdir("."):
	if f.endswith(".zip"):
	try:
	with zipfile.ZipFile(f, 'r') as zf:
	zf.extractall(".")
	print(f"[ZIP] Extracted {f} OK!")
	except Exception as e:
	print(f"[ZIP] ERROR: {e}")

	# ═══════════════════════════════════════════════════════════════════════════════
	# STEP 2: Copy images to root
	# ═══════════════════════════════════════════════════════════════════════════════
	def prepare_clean_examples(src_folder, prefix, limit=10):
	results = []
	if not os.path.exists(src_folder): return results
	count = 0
	for root, dirs, files in os.walk(src_folder):
	for fname in sorted(files):
	if not fname.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp', '.webp')):
	continue
	src_path = os.path.join(root, fname)
	dst_name = f"{prefix}_{count}.jpg"
	try:
	import shutil
	shutil.copy2(src_path, dst_name)
	results.append(dst_name)
	count += 1
	if count >= limit: break
	except Exception as e:
	print(f"Error copying {src_path}: {e}")
	if count >= limit: break
	return results

	mirror_examples = []
	for folder in ["test car windows", "test_car_windows", "test car windows segmentation"]:
	if os.path.exists(folder):
	mirror_examples = prepare_clean_examples(folder, "mirror", limit=15)
	break
	if not mirror_examples and os.path.exists("car.jpeg"):
	mirror_examples = ["car.jpeg"]

	# ═══════════════════════════════════════════════════════════════════════════════
	# Global Settings
	# ═══════════════════════════════════════════════════════════════════════════════
	DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
	CONF = 0.45

	def apply_mask_overlay(img_rgb, mask_bool, color=(0, 215, 255), alpha=0.4):
	# 1. Darken the background (50% brightness, no blur)
	dark_bg = cv2.addWeighted(img_rgb, 0.5, np.zeros_like(img_rgb), 0.5, 0)

	# 2. For the mask area, keep original brightness and tint it
	tinted_sharp = img_rgb.copy()
	tinted_sharp[mask_bool] = color
	tinted_sharp = cv2.addWeighted(tinted_sharp, alpha, img_rgb, 1 - alpha, 0)

	# 3. Find and draw the boundary edge strictly inside the mask
	mask_img = (mask_bool * 255).astype(np.uint8)
	contours, _ = cv2.findContours(mask_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

	# Draw contour on the tinted image (before blending)
	cv2.drawContours(tinted_sharp, contours, -1, color, 2, cv2.LINE_AA)

	# 4. Combine: Dark background outside, Bright tinted object + boundary inside
	blended = np.where(mask_bool[:, :, None], tinted_sharp, dark_bg)

	return blended

	def draw_boxes(img_rgb, boxes, labels, color=(0, 215, 255)):
	out = img_rgb.copy()
	for box, label in zip(boxes, labels):
	x1, y1, x2, y2 = map(int, box)

	# Faint inner bounding box line
	cv2.rectangle(out, (x1, y1), (x2, y2), color, 1)

	# HUD-Style Corner Brackets
	length = int(min(x2 - x1, y2 - y1) * 0.15)
	thick = 3

	# Top-Left
	cv2.line(out, (x1, y1), (x1 + length, y1), color, thick, cv2.LINE_AA)
	cv2.line(out, (x1, y1), (x1, y1 + length), color, thick, cv2.LINE_AA)
	# Top-Right
	cv2.line(out, (x2, y1), (x2 - length, y1), color, thick, cv2.LINE_AA)
	cv2.line(out, (x2, y1), (x2, y1 + length), color, thick, cv2.LINE_AA)
	# Bottom-Left
	cv2.line(out, (x1, y2), (x1 + length, y2), color, thick, cv2.LINE_AA)
	cv2.line(out, (x1, y2), (x1, y2 - length), color, thick, cv2.LINE_AA)
	# Bottom-Right
	cv2.line(out, (x2, y2), (x2 - length, y2), color, thick, cv2.LINE_AA)
	cv2.line(out, (x2, y2), (x2, y2 - length), color, thick, cv2.LINE_AA)

	# Text labels have been removed to prevent obstructing the view of the segmentation masks.
	return out

	# ═══════════════════════════════════════════════════════════════════════════════
	# Morphological post-processing helper
	# ═══════════════════════════════════════════════════════════════════════════════
	def apply_morphology(mask_uint8, close_k=15, open_k=7):
	"""Fill holes (Closing) then remove tiny blobs (Opening) on a binary mask."""
	close_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (close_k, close_k))
	open_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (open_k, open_k))
	closed = cv2.morphologyEx(mask_uint8, cv2.MORPH_CLOSE, close_kernel) # fill holes
	opened = cv2.morphologyEx(closed, cv2.MORPH_OPEN, open_kernel) # remove noise
	return opened

	# ═══════════════════════════════════════════════════════════════════════════════
	# Model Functions
	# ═══════════════════════════════════════════════════════════════════════════════
	def run_yolo_generic(img_rgb, model_path, target_classes, color, morph_cleanup=False):
	from ultralytics import YOLO
	t0 = time.time()
	model = YOLO(model_path)
	# Use retina_masks=True to get pixel-perfect masks at the original image resolution
	results = model(img_rgb, conf=CONF, verbose=False, retina_masks=True)
	elapsed = time.time() - t0

	result = results[0]
	h, w = img_rgb.shape[:2]
	combined_mask = np.zeros((h, w), dtype=np.uint8)
	boxes, labels = [], []

	if result.masks is not None:
	for mask, box, cls, conf in zip(
	result.masks.data, result.boxes.xyxy,
	result.boxes.cls, result.boxes.conf
	):
	if int(cls) not in target_classes:
	continue

	# Since retina_masks=True, mask is already (h, w). Just threshold it.
	mask_np = mask.cpu().numpy().astype(np.uint8)
	# Optional per-instance morphological cleanup before combining
	if morph_cleanup:
	mask_np = apply_morphology(mask_np)
	combined_mask \|= mask_np

	boxes.append(box.cpu().tolist())
	labels.append(f"glass {conf:.2f}")

	# We purposely do NOT apply morphology on the final combined_mask here,
	# otherwise it will bridge the gaps (pillars) between separate windows!

	combined_mask_bool = combined_mask > 0
	morph_note = " \| Morphology: ON ✅" if morph_cleanup else ""
	out = apply_mask_overlay(img_rgb, combined_mask_bool, color=color)
	out = draw_boxes(out, boxes, labels, color=color)
	bw_mask = (combined_mask * 255).astype(np.uint8)
	return out, bw_mask, f"Found: {len(boxes)} \| Inference Time: {elapsed:.2f}s{morph_note}"

	def run_sam_strategy(img_rgb, yolo_model_path, target_classes, color, strategy, morph_cleanup=False):
	try:
	from segment_anything import sam_model_registry, SamPredictor
	import urllib.request

	CKPT = "sam_vit_b_01ec64.pth"
	URL = "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth"
	if not os.path.exists(CKPT): urllib.request.urlretrieve(URL, CKPT)

	t0 = time.time()
	sam = sam_model_registry["vit_b"](checkpoint=CKPT).to(DEVICE)
	predictor = SamPredictor(sam)
	predictor.set_image(img_rgb)

	from ultralytics import YOLO as _YOLO
	yolo_res = _YOLO(yolo_model_path)(img_rgb, conf=CONF, verbose=False, retina_masks=True)[0]

	h, w = img_rgb.shape[:2]
	combined_mask = np.zeros((h, w), dtype=bool)
	boxes_list, labels = [], []

	if yolo_res.boxes is not None and yolo_res.masks is not None:
	for box, mask_data, cls, conf in zip(yolo_res.boxes.xyxy, yolo_res.masks.data, yolo_res.boxes.cls, yolo_res.boxes.conf):
	if int(cls) not in target_classes: continue
	box_np = box.cpu().numpy()
	yolo_mask = mask_data.cpu().numpy() > 0.5

	if strategy == 1:
	# Strategy 1: Bbox + 5 Points
	x1, y1, x2, y2 = map(int, box_np)
	cx, cy = (x1+x2)//2, (y1+y2)//2
	pts = [[cx, cy], [x1+5, y1+5], [x2-5, y1+5], [x1+5, y2-5], [x2-5, y2-5]]
	pts_np = np.array(pts)
	labels_np = np.ones(len(pts))
	masks_sam, _, _ = predictor.predict(box=box_np, point_coords=pts_np, point_labels=labels_np, multimask_output=False)
	sam_mask = masks_sam[0]
	elif strategy == 2:
	# Strategy 2: Mask + 5 Points
	y_coords, x_coords = np.where(yolo_mask)
	if len(x_coords) == 0: continue
	cx, cy = int(np.mean(x_coords)), int(np.mean(y_coords))
	idx_top, idx_bot = np.argmin(y_coords), np.argmax(y_coords)
	idx_lft, idx_rgt = np.argmin(x_coords), np.argmax(x_coords)
	def get_mid(x_1, y_1, x_2, y_2, f=0.6):
	return int(x_1 + (x_2-x_1)f), int(y_1 + (y_2-y_1)f)
	pts = []
	if yolo_mask[cy, cx]: pts.append([cx, cy])
	else: pts.append([x_coords[len(x_coords)//2], y_coords[len(y_coords)//2]])
	for idx in [idx_top, idx_bot, idx_lft, idx_rgt]:
	px, py = get_mid(cx, cy, x_coords[idx], y_coords[idx])
	if 0 <= py < h and 0 <= px < w and yolo_mask[py, px]: pts.append([px, py])
	else: pts.append(pts[0])
	pts_np = np.array(pts)
	labels_np = np.ones(len(pts))
	masks_sam, _, _ = predictor.predict(box=box_np, point_coords=pts_np, point_labels=labels_np, multimask_output=False)
	sam_mask = masks_sam[0]
	elif strategy == 3:
	# Strategy 3: Direct Mask Prompting
	yolo_mask_resized = cv2.resize((yolo_mask).astype(np.float32), (256, 256), interpolation=cv2.INTER_NEAREST)
	mask_input = np.zeros((1, 256, 256), dtype=np.float32)
	mask_input[0] = np.where(yolo_mask_resized > 0.5, 30.0, -30.0)
	masks_sam, _, _ = predictor.predict(box=box_np, mask_input=mask_input, multimask_output=False)

	raw_mask = (masks_sam[0].astype(np.uint8) * 255)
	contours, _ = cv2.findContours(raw_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
	filled_mask = np.zeros_like(raw_mask)
	cv2.drawContours(filled_mask, contours, -1, 255, cv2.FILLED)
	sam_mask = (filled_mask > 0)
	else:
	sam_mask = np.zeros((h, w), dtype=bool)

	sam_mask_uint = sam_mask.astype(np.uint8)
	if morph_cleanup:
	sam_mask_uint = apply_morphology(sam_mask_uint)
	combined_mask \|= sam_mask_uint.astype(bool)
	boxes_list.append(box_np.tolist())
	labels.append(f"glass {conf:.2f}")

	elapsed = time.time() - t0
	morph_note = " \| Morphology: ON ✅" if morph_cleanup else ""
	out = apply_mask_overlay(img_rgb, combined_mask, color=color)
	out = draw_boxes(out, boxes_list, labels, color=color)
	return out, (combined_mask * 255).astype(np.uint8), f"Found: {len(boxes_list)} \| Strategy: {strategy} \| Inference: {elapsed:.2f}s{morph_note}"
	except ImportError:
	return img_rgb, None, "Error: segment-anything not installed"

	def run_mask_rcnn(img_rgb, weights_path):
	t0 = time.time()
	try:
	from torchvision.models.detection import maskrcnn_resnet50_fpn_v2
	from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
	from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
	import torchvision.transforms.v2 as T

	model = maskrcnn_resnet50_fpn_v2(weights=None)
	in_features = model.roi_heads.box_predictor.cls_score.in_features
	model.roi_heads.box_predictor = FastRCNNPredictor(in_features, 2)
	in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
	model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask, 256, 2)

	checkpoint = torch.load(weights_path, map_location=DEVICE, weights_only=False)
	if isinstance(checkpoint, dict) and "model_state_dict" in checkpoint:
	model.load_state_dict(checkpoint["model_state_dict"])
	else:
	model.load_state_dict(checkpoint)

	model.to(DEVICE)
	model.eval()

	img_tensor = T.ToTensor()(Image.fromarray(img_rgb)).to(DEVICE)
	with torch.no_grad():
	outputs = model([img_tensor])[0]

	h, w = img_rgb.shape[:2]
	pred_mask = np.zeros((h, w), dtype=bool)
	boxes_list, labels_list = [], []

	for score, mask, box, cls in zip(outputs['scores'], outputs['masks'], outputs['boxes'], outputs['labels']):
	if score > 0.45:
	m = (mask[0].cpu().numpy() > 0.5)
	pred_mask \|= m
	boxes_list.append(box.cpu().numpy().tolist())
	labels_list.append(f"glass {score:.2f}")

	elapsed = time.time() - t0
	out = apply_mask_overlay(img_rgb, pred_mask, color=(255, 165, 0))
	out = draw_boxes(out, boxes_list, labels_list, color=(255, 165, 0))
	bw_mask = (pred_mask * 255).astype(np.uint8)

	return out, bw_mask, f"Found: {len(boxes_list)} \| Inference: {elapsed:.2f}s"
	except Exception as e:
	return img_rgb, None, f"Mask R-CNN Error: {e}"

	def run_grounding_dino(img_rgb, text_prompt):
	try:
	from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection
	t0 = time.time()
	model_id = "IDEA-Research/grounding-dino-tiny"
	processor = AutoProcessor.from_pretrained(model_id)
	model = AutoModelForZeroShotObjectDetection.from_pretrained(model_id).to(DEVICE)

	inputs = processor(images=img_rgb, text=text_prompt, return_tensors="pt").to(DEVICE)
	with torch.no_grad():
	outputs = model(**inputs)

	h, w = img_rgb.shape[:2]
	results = processor.post_process_grounded_object_detection(
	outputs, inputs.input_ids, text_threshold=0.25, target_sizes=[(h, w)]
	)[0]

	boxes = results["boxes"].cpu().numpy().tolist()
	scores = results["scores"].cpu().numpy().tolist()
	labels = results["labels"]

	elapsed = time.time() - t0
	bw_mask = np.zeros((h, w), dtype=np.uint8) # DINO is boxes only
	str_labels = [f"{lbl} {scr:.2f}" for lbl, scr in zip(labels, scores)]
	out = draw_boxes(img_rgb.copy(), boxes, str_labels, color=(255, 100, 50))
	return out, bw_mask, f"Found: {len(boxes)} \| Inference Time: {elapsed:.2f}s"
	except Exception as e:
	return img_rgb, None, f"Grounding DINO Error: {e}\n(Need transformers>=4.35)"

	def run_grounded_sam(img_rgb, text_prompt):
	try:
	from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection
	from segment_anything import sam_model_registry, SamPredictor
	import urllib.request

	t0 = time.time()

	# 1. DINO Detection
	dino_id = "IDEA-Research/grounding-dino-tiny"
	processor = AutoProcessor.from_pretrained(dino_id)
	dino_model = AutoModelForZeroShotObjectDetection.from_pretrained(dino_id).to(DEVICE)
	inputs = processor(images=img_rgb, text=text_prompt, return_tensors="pt").to(DEVICE)
	with torch.no_grad():
	outputs = dino_model(**inputs)

	h, w = img_rgb.shape[:2]
	dino_res = processor.post_process_grounded_object_detection(
	outputs, inputs.input_ids, text_threshold=0.25, target_sizes=[(h, w)]
	)[0]
	boxes = dino_res["boxes"].cpu().numpy()
	scores = dino_res["scores"].cpu().numpy()
	labels_txt = dino_res["labels"]

	# 2. SAM Segmentation
	CKPT = "sam_vit_b_01ec64.pth"
	URL = "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth"
	if not os.path.exists(CKPT): urllib.request.urlretrieve(URL, CKPT)

	sam = sam_model_registry["vit_b"](checkpoint=CKPT).to(DEVICE)
	predictor = SamPredictor(sam)
	predictor.set_image(img_rgb)

	combined_mask = np.zeros((h, w), dtype=bool)
	str_labels = []

	if len(boxes) > 0:
	for box, score, label in zip(boxes, scores, labels_txt):
	masks, _, _ = predictor.predict(box=box, multimask_output=False)
	combined_mask \|= masks[0]
	str_labels.append(f"{label} {score:.2f}")

	elapsed = time.time() - t0
	out = apply_mask_overlay(img_rgb, combined_mask, color=(255, 80, 160))
	out = draw_boxes(out, boxes.tolist(), str_labels, color=(255, 80, 160))
	return out, (combined_mask * 255).astype(np.uint8), f"Found: {len(boxes)} \| Inference: {elapsed:.2f}s"
	except Exception as e:
	return img_rgb, None, f"Grounded SAM Error: {e}"

	def run_intelliarts_car_parts(img_rgb):
	t0 = time.time()
	try:
	import detectron2
	except ImportError:
	print("Installing detectron2... this may take a few minutes!")
	os.system('pip install git+https://github.com/facebookresearch/detectron2.git --no-build-isolation')

	try:
	from detectron2 import model_zoo
	from detectron2.engine import DefaultPredictor
	from detectron2.config import get_cfg
	import urllib.request

	model_url = "https://huggingface.co/spaces/intelliarts/Car_parts_detection/resolve/main/model_final.pth"
	model_path = "intelliarts_model_final.pth"
	if not os.path.exists(model_path):
	print("Downloading Intelliarts Car Parts weights...")
	urllib.request.urlretrieve(model_url, model_path)

	cfg = get_cfg()
	cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
	cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.45
	cfg.MODEL.ROI_HEADS.NUM_CLASSES = 19
	cfg.MODEL.WEIGHTS = model_path
	cfg.MODEL.DEVICE = DEVICE

	predictor = DefaultPredictor(cfg)
	outputs = predictor(img_rgb)
	instances = outputs["instances"].to("cpu")

	# Classes: 2: back_glass, 8: front_glass, 14: left_mirror, 15: right_mirror
	target_classes = [2, 8, 14, 15]
	h, w = img_rgb.shape[:2]
	combined_mask = np.zeros((h, w), dtype=bool)
	boxes_list, labels_list = [], []

	classes = instances.pred_classes.numpy()
	scores = instances.scores.numpy()
	boxes = instances.pred_boxes.tensor.numpy()
	masks = instances.pred_masks.numpy()

	class_names = ['_background_', 'back_bumper', 'back_glass', 'back_left_door', 'back_left_light', 'back_right_door', 'back_right_light', 'front_bumper', 'front_glass', 'front_left_door', 'front_left_light', 'front_right_door', 'front_right_light', 'hood', 'left_mirror', 'right_mirror', 'tailgate', 'trunk', 'wheel']

	for i in range(len(classes)):
	c = classes[i]
	if c in target_classes:
	combined_mask \|= masks[i]
	boxes_list.append(boxes[i].tolist())
	labels_list.append(f"{class_names[c]} {scores[i]:.2f}")

	elapsed = time.time() - t0
	out = apply_mask_overlay(img_rgb, combined_mask, color=(50, 150, 255))
	out = draw_boxes(out, boxes_list, labels_list, color=(50, 150, 255))
	bw_mask = (combined_mask * 255).astype(np.uint8)

	return out, bw_mask, f"Found: {len(boxes_list)} \| Inference: {elapsed:.2f}s"
	except Exception as e:
	return img_rgb, None, f"Intelliarts Detectron2 Error: {e}"

	# ═══════════════════════════════════════════════════════════════════════════════
	# SegFormer Function
	# ═══════════════════════════════════════════════════════════════════════════════
	def run_segformer(img_rgb, morph_cleanup=False):
	try:
	from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation
	import torch.nn.functional as F

	t0 = time.time()
	base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))

	# Paths to try (works for both local PC and Hugging Face Cloud deployment)
	paths_to_try = [
	os.path.join(base_dir, "SegFormer_Model", "best_segformer_dice_model"), # Local PC
	"best_segformer_dice_model", # Hugging Face Root
	os.path.join(os.path.dirname(__file__), "best_segformer_dice_model"), # Next to app.py
	]

	# If files were uploaded directly to the root (no folder)
	if os.path.exists("config.json"):
	paths_to_try.append(".")
	if os.path.exists(os.path.join(os.path.dirname(__file__), "config.json")):
	paths_to_try.append(os.path.dirname(__file__))

	model_path = None
	for p in paths_to_try:
	# For SegFormer, the path must contain config.json
	if os.path.exists(p) and os.path.exists(os.path.join(p, "config.json")):
	model_path = p
	break

	# Fallback
	if model_path is None:
	model_path = "best_segformer_dice_model"

	processor = SegformerImageProcessor.from_pretrained(model_path)
	model = SegformerForSemanticSegmentation.from_pretrained(model_path).to(DEVICE)

	inputs = processor(images=Image.fromarray(img_rgb), return_tensors="pt")
	inputs = {k: v.to(DEVICE) for k, v in inputs.items()}

	with torch.no_grad():
	outputs = model(**inputs)
	h, w = img_rgb.shape[:2]
	logits = F.interpolate(outputs.logits, size=(h, w), mode="bilinear", align_corners=False)[0]

	probs = F.softmax(logits, dim=0)
	pred_mask = (probs[1] > 0.5).cpu().numpy().astype(np.uint8)

	# Apply morphological cleanup if requested
	if morph_cleanup:
	pred_mask = apply_morphology(pred_mask, close_k=15, open_k=7)

	elapsed = time.time() - t0
	morph_note = " \| Morphology: ON ✅" if morph_cleanup else ""
	out = apply_mask_overlay(img_rgb, pred_mask, color=(255, 50, 50))
	bw_mask = (pred_mask * 255).astype(np.uint8)
	return out, bw_mask, f"Found: 1 (Semantic) \| Inference: {elapsed:.2f}s{morph_note}"
	except Exception as e:
	return img_rgb, None, f"SegFormer Error: {e}"

	# ═══════════════════════════════════════════════════════════════════════════════
	# BiRefNet Function
	# ═══════════════════════════════════════════════════════════════════════════════
	def run_birefnet(img_rgb):
	try:
	from transformers import AutoModelForImageSegmentation
	from torchvision import transforms
	import torch.nn.functional as F

	t0 = time.time()

	base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))

	# Paths to try (works for local PC and Hugging Face Cloud deployment)
	paths_to_try = [
	os.path.join(base_dir, "BiRefNet_Model", "best_model-20260624T051601Z-3-001", "best_model"), # Local PC
	"birefnet_model", # Hugging Face Root / Root dir
	os.path.join(os.path.dirname(os.path.abspath(__file__)), "birefnet_model"), # Next to app.py
	"best_birefnet_model" # Extra fallback
	]

	model_path = None
	for p in paths_to_try:
	if os.path.exists(p) and os.path.exists(os.path.join(p, "config.json")) and os.path.exists(os.path.join(p, "model.safetensors")):
	model_path = p
	break

	# Final fallback: Download directly from Hugging Face Model Repo!
	if model_path is None:
	model_path = "Ayesha-Majeed/birefnet_car_window"

	model = AutoModelForImageSegmentation.from_pretrained(model_path, trust_remote_code=True).to(DEVICE)
	model.eval()

	image_transform = transforms.Compose([
	transforms.Resize((1024, 1024)),
	transforms.ToTensor(),
	transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
	])

	from PIL import Image
	pil_img = Image.fromarray(img_rgb)
	input_tensor = image_transform(pil_img).unsqueeze(0).to(DEVICE)

	with torch.no_grad():
	if DEVICE == "cuda":
	with torch.amp.autocast("cuda"):
	preds = model(input_tensor)
	final_pred = preds[-1] if isinstance(preds, (list, tuple)) else preds
	else:
	preds = model(input_tensor)
	final_pred = preds[-1] if isinstance(preds, (list, tuple)) else preds

	h, w = img_rgb.shape[:2]
	final_pred = F.interpolate(final_pred, size=(h, w), mode="bilinear", align_corners=False)
	pred_mask = (torch.sigmoid(final_pred) > 0.5).squeeze().cpu().numpy().astype(np.uint8)

	elapsed = time.time() - t0
	out = apply_mask_overlay(img_rgb, pred_mask > 0, color=(255, 0, 0)) # Red
	bw_mask = (pred_mask * 255).astype(np.uint8)
	return out, bw_mask, f"Found: 1 (Semantic) \| Inference: {elapsed:.2f}s"
	except Exception as e:
	return img_rgb, None, f"BiRefNet Error: {e}"

	# ═══════════════════════════════════════════════════════════════════════════════
	# Gradio Process Function
	# ═══════════════════════════════════════════════════════════════════════════════
	# A beautiful palette of pastel and neon colors for dynamic visualizations
	PASTEL_COLORS = [
	(255, 105, 180), # Hot/Light Pink
	(180, 130, 255), # Light Purple
	(0, 215, 255), # Light Sky Blue / Cyan
	(255, 220, 50), # Light Yellow
	(255, 160, 50), # Light Orange
	(150, 255, 150), # Light Mint Green
	(240, 240, 255), # Light White / Silver
	]

	def process_image(img_rgb, model_name, text_prompt="", morph_cleanup=False):
	if img_rgb is None: return None, None, "Please upload an image."

	# Pick a random color for this specific inference run
	run_color = random.choice(PASTEL_COLORS)

	try:
	if model_name == "YOLOv8x-seg (Custom Window)":
	return run_yolo_generic(img_rgb, "best.pt", target_classes=[0, 1], color=run_color, morph_cleanup=morph_cleanup)
	elif model_name == "YOLOv8x-seg":
	return run_yolo_generic(img_rgb, "best.pt", target_classes=[0, 1], color=(255, 215, 0), morph_cleanup=morph_cleanup)
	elif model_name == "YOLO11x-seg":
	if os.path.exists("yolo11_best.pt"):
	y11_weights = "yolo11_best.pt"
	else:
	base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
	y11_weights = os.path.join(base_dir, "runs", "segment", "runs", "car_mirror_seg", "yolo11x_seg_1024", "weights", "best.pt")
	if not os.path.exists(y11_weights):
	y11_weights = "best.pt" # Fallback
	return run_yolo_generic(img_rgb, y11_weights, target_classes=[0, 1], color=(0, 255, 120), morph_cleanup=morph_cleanup)
	elif model_name == "SAM + YOLO (Strategy 1: Bbox + 5 Points)":
	return run_sam_strategy(img_rgb, "best.pt", target_classes=[0, 1], color=run_color, strategy=1, morph_cleanup=morph_cleanup)
	elif model_name == "SAM + YOLO (Strategy 2: Mask + 5 Points)":
	return run_sam_strategy(img_rgb, "best.pt", target_classes=[0, 1], color=run_color, strategy=2, morph_cleanup=morph_cleanup)
	elif model_name == "SAM + YOLO (Strategy 3: Direct Mask Prompting)":
	return run_sam_strategy(img_rgb, "best.pt", target_classes=[0, 1], color=run_color, strategy=3, morph_cleanup=morph_cleanup)
	elif model_name == "Mask R-CNN":
	# First check if she uploaded it directly next to app.py as "maskrcnn_best.pt"
	if os.path.exists("maskrcnn_best.pt"):
	mrcnn_weights = "maskrcnn_best.pt"
	else:
	base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
	mrcnn_weights = os.path.join(base_dir, "Mask_RCNN", "runs", "woven-sweep-5", "best.pt")
	if not os.path.exists(mrcnn_weights):
	mrcnn_weights = "Mask_RCNN/runs/woven-sweep-5/best.pt"
	return run_mask_rcnn(img_rgb, mrcnn_weights)
	elif model_name == "Grounding DINO (Zero-Shot Detection)":
	return run_grounding_dino(img_rgb, text_prompt)
	elif model_name == "Grounded SAM (Zero-Shot Segmentation)":
	return run_grounded_sam(img_rgb, text_prompt)
	elif model_name == "Intelliarts Car Parts (Detectron2)":
	return run_intelliarts_car_parts(img_rgb)
	elif model_name == "SegFormer":
	return run_segformer(img_rgb, morph_cleanup=morph_cleanup)
	else:
	return img_rgb, None, "Model not recognized."
	except Exception as e:
	return img_rgb, None, f"Error: {str(e)}"

	# ═══════════════════════════════════════════════════════════════════════════════
	# Gradio UI
	# ═══════════════════════════════════════════════════════════════════════════════
	theme = gr.themes.Soft(primary_hue="blue", secondary_hue="indigo")

	with gr.Blocks(theme=theme, title="Car Window Segmentation") as demo:
	gr.Markdown("""
	# Car Window Segmentation
	Compare your custom trained YOLOv8 model against state-of-the-art Zero-Shot models!
	""")



	# ── TAB 3: Comprehensive Evaluation ──
	with gr.Tab("Comprehensive Evaluation"):
	gr.Markdown("### Comprehensive Evaluation: Results from All Trained and Pretrained Models")
	gr.Markdown("""The following models will run and display their results below:

	Custom Trained Models:

	1. SegFormer
	2. SegFormer + Morphological
	3. YOLO11x-seg
	4. YOLOv8x-seg
	5. Mask R-CNN
	6. BiRefNet
	7. SAM + YOLO (Strategy 1: Bbox + 5 Points)
	8. SAM + YOLO (Strategy 2: Mask + 5 Points)
	9. SAM + YOLO (Strategy 3: Direct Mask Prompting)

	Pretrained Zero-Shot Models:

	10\. Grounding DINO

	11\. Grounded SAM

	12\. Intelliarts Car Parts

	Our Findings: SegFormer and YOLO11x deliver the best performance with significantly sharper edge precision.
	""")

	with gr.Row():
	input_image_seq = gr.Image(type="numpy", label="Upload Window Image")
	with gr.Row():
	submit_btn_seq = gr.Button("Run All Models", variant="primary", size="lg")
	stop_btn_seq = gr.Button("🛑 Stop Processing", variant="stop", size="lg")

	if mirror_examples:
	gr.Markdown("### Or click any example image below to load it:")
	compare_gallery = gr.Gallery(value=mirror_examples, columns=10, height=120, object_fit="cover", allow_preview=False, show_label=False)
	def load_compare_img(evt: gr.SelectData): return mirror_examples[evt.index]
	compare_gallery.select(fn=load_compare_img, inputs=None, outputs=input_image_seq)

	gr.Markdown("---")
	gr.Markdown("## 🚀 Custom Trained Models")

	gr.Markdown("### 1️⃣ SegFormer (Transformer)")
	with gr.Row():
	seq_segf_img = gr.Image(label="SegFormer Overlay", interactive=False)
	seq_segf_bw = gr.Image(label="SegFormer Binary Mask", interactive=False, image_mode="L")
	seq_segf_stats = gr.Textbox(label="SegFormer Stats", interactive=False)

	gr.Markdown("---")
	gr.Markdown("### 2️⃣ SegFormer + Morphological Cleanup (Holes Filled + Sharp Borders)")
	with gr.Row():
	seq_segf_morph_img = gr.Image(label="SegFormer + Morph Overlay", interactive=False)
	seq_segf_morph_bw = gr.Image(label="SegFormer + Morph Binary Mask", interactive=False, image_mode="L")
	seq_segf_morph_stats = gr.Textbox(label="SegFormer + Morph Stats", interactive=False)

	gr.Markdown("---")
	gr.Markdown("### 3️⃣ YOLO11x-seg")
	with gr.Row():
	seq_yolo11_img = gr.Image(label="YOLO11x Overlay", interactive=False)
	seq_yolo11_bw = gr.Image(label="YOLO11x Binary Mask", interactive=False, image_mode="L")
	seq_yolo11_stats = gr.Textbox(label="YOLO11x Stats", interactive=False)

	gr.Markdown("---")
	gr.Markdown("### 4️⃣ YOLOv8x-seg")
	with gr.Row():
	seq_yolo_img = gr.Image(label="YOLO Overlay", interactive=False)
	seq_yolo_bw = gr.Image(label="YOLO Binary Mask", interactive=False, image_mode="L")
	seq_yolo_stats = gr.Textbox(label="YOLO Stats", interactive=False)

	gr.Markdown("---")
	gr.Markdown("### 5️⃣ Mask R-CNN (ResNet50-FPN)")
	with gr.Row():
	seq_mrcnn_img = gr.Image(label="Mask R-CNN Overlay", interactive=False)
	seq_mrcnn_bw = gr.Image(label="Mask R-CNN Binary Mask", interactive=False, image_mode="L")
	seq_mrcnn_stats = gr.Textbox(label="Mask R-CNN Stats", interactive=False)

	gr.Markdown("---")
	gr.Markdown("### 6️⃣ BiRefNet (Boundary-Aware Model)")
	with gr.Row():
	seq_biref_img = gr.Image(label="BiRefNet Overlay", interactive=False)
	seq_biref_bw = gr.Image(label="BiRefNet Binary Mask", interactive=False, image_mode="L")
	seq_biref_stats = gr.Textbox(label="BiRefNet Stats", interactive=False)

	gr.Markdown("---")
	gr.Markdown("### 7️⃣ SAM + YOLO (Strategy 1: Bbox + 5 Points)")
	with gr.Row():
	seq_sam1_img = gr.Image(label="SAM+YOLO Strat 1 Overlay", interactive=False)
	seq_sam1_bw = gr.Image(label="SAM+YOLO Strat 1 Binary Mask", interactive=False, image_mode="L")
	seq_sam1_stats = gr.Textbox(label="SAM+YOLO Strat 1 Stats", interactive=False)

	gr.Markdown("---")
	gr.Markdown("### 8️⃣ SAM + YOLO (Strategy 2: Mask + 5 Points)")
	with gr.Row():
	seq_sam2_img = gr.Image(label="SAM+YOLO Strat 2 Overlay", interactive=False)
	seq_sam2_bw = gr.Image(label="SAM+YOLO Strat 2 Binary Mask", interactive=False, image_mode="L")
	seq_sam2_stats = gr.Textbox(label="SAM+YOLO Strat 2 Stats", interactive=False)

	gr.Markdown("---")
	gr.Markdown("### 9️⃣ SAM + YOLO (Strategy 3: Direct Mask Prompting)")
	with gr.Row():
	seq_sam3_img = gr.Image(label="SAM+YOLO Strat 3 Overlay", interactive=False)
	seq_sam3_bw = gr.Image(label="SAM+YOLO Strat 3 Binary Mask", interactive=False, image_mode="L")
	seq_sam3_stats = gr.Textbox(label="SAM+YOLO Strat 3 Stats", interactive=False)

	gr.Markdown("---")
	gr.Markdown("## 🌍 Pretrained Zero-Shot Models")

	gr.Markdown("### 🔟 Grounding DINO (Zero-Shot Detection)")
	with gr.Row():
	seq_dino_img = gr.Image(label="Grounding DINO Overlay", interactive=False)
	seq_dino_bw = gr.Image(label="Grounding DINO Binary Mask", interactive=False, image_mode="L")
	seq_dino_stats = gr.Textbox(label="Grounding DINO Stats", interactive=False)

	gr.Markdown("---")
	gr.Markdown("### 1️⃣1️⃣ Grounded SAM (Zero-Shot Segmentation)")
	with gr.Row():
	seq_gsam_img = gr.Image(label="Grounded SAM Overlay", interactive=False)
	seq_gsam_bw = gr.Image(label="Grounded SAM Binary Mask", interactive=False, image_mode="L")
	seq_gsam_stats = gr.Textbox(label="Grounded SAM Stats", interactive=False)

	gr.Markdown("---")
	gr.Markdown("### 1️⃣2️⃣ Intelliarts Car Parts (Detectron2)")
	with gr.Row():
	seq_intell_img = gr.Image(label="Intelliarts Car Parts Overlay", interactive=False)
	seq_intell_bw = gr.Image(label="Intelliarts Car Parts Binary Mask", interactive=False, image_mode="L")
	seq_intell_stats = gr.Textbox(label="Intelliarts Car Parts Stats", interactive=False)

	def run_all_models(img):
	if img is None:
	yield tuple([None]*36)
	return

	# ── Step 0: Show "Processing..." in ALL textboxes immediately ──
	PENDING = "⏳ Processing..."
	results = [None] * 36
	# Set all stats textboxes to pending state
	for i in [2, 5, 8, 11, 14, 17, 20, 23, 26, 29, 32, 35]:
	results[i] = PENDING
	yield tuple(results)

	# 1. SegFormer
	results[0], results[1], results[2] = run_segformer(img, morph_cleanup=False)
	yield tuple(results)

	# 2. SegFormer + Morphology
	results[3], results[4], results[5] = run_segformer(img, morph_cleanup=True)
	yield tuple(results)

	# 3. YOLO11x-seg
	results[6], results[7], results[8] = process_image(img, "YOLO11x-seg", "", False)
	yield tuple(results)

	# 4. YOLOv8x-seg
	results[9], results[10], results[11] = process_image(img, "YOLOv8x-seg", "", False)
	yield tuple(results)

	# 5. Mask R-CNN
	results[12], results[13], results[14] = process_image(img, "Mask R-CNN", "", False)
	yield tuple(results)

	# 6. BiRefNet
	results[15], results[16], results[17] = run_birefnet(img)
	yield tuple(results)

	# 7. SAM + YOLO Strat 1
	results[18], results[19], results[20] = process_image(img, "SAM + YOLO (Strategy 1: Bbox + 5 Points)", "", False)
	yield tuple(results)

	# 8. SAM + YOLO Strat 2
	results[21], results[22], results[23] = process_image(img, "SAM + YOLO (Strategy 2: Mask + 5 Points)", "", False)
	yield tuple(results)

	# 9. SAM + YOLO Strat 3
	results[24], results[25], results[26] = process_image(img, "SAM + YOLO (Strategy 3: Direct Mask Prompting)", "", False)
	yield tuple(results)

	# 10. Grounding DINO
	results[27], results[28], results[29] = process_image(img, "Grounding DINO (Zero-Shot Detection)", "car window. car glass. windshield.", False)
	yield tuple(results)

	# 11. Grounded SAM
	results[30], results[31], results[32] = process_image(img, "Grounded SAM (Zero-Shot Segmentation)", "car window. car glass. windshield.", False)
	yield tuple(results)

	# 12. Intelliarts
	results[33], results[34], results[35] = process_image(img, "Intelliarts Car Parts (Detectron2)", "", False)
	yield tuple(results)

	run_event = submit_btn_seq.click(
	fn=run_all_models,
	inputs=[input_image_seq],
	outputs=[seq_segf_img, seq_segf_bw, seq_segf_stats,
	seq_segf_morph_img, seq_segf_morph_bw, seq_segf_morph_stats,
	seq_yolo11_img, seq_yolo11_bw, seq_yolo11_stats,
	seq_yolo_img, seq_yolo_bw, seq_yolo_stats,
	seq_mrcnn_img, seq_mrcnn_bw, seq_mrcnn_stats,
	seq_biref_img, seq_biref_bw, seq_biref_stats,
	seq_sam1_img, seq_sam1_bw, seq_sam1_stats,
	seq_sam2_img, seq_sam2_bw, seq_sam2_stats,
	seq_sam3_img, seq_sam3_bw, seq_sam3_stats,
	seq_dino_img, seq_dino_bw, seq_dino_stats,
	seq_gsam_img, seq_gsam_bw, seq_gsam_stats,
	seq_intell_img, seq_intell_bw, seq_intell_stats]
	)

	stop_btn_seq.click(fn=None, inputs=None, outputs=None, cancels=[run_event])

	if __name__ == "__main__":
	demo.launch()