Spaces:
Sleeping
Sleeping
File size: 43,684 Bytes
d1af3a0 da8f9fb d1af3a0 6e550a4 d1af3a0 da8f9fb e6b31f2 da8f9fb e6b31f2 da8f9fb a5e3b77 da8f9fb fcce079 a5e3b77 da8f9fb e6b31f2 fcce079 e6b31f2 da8f9fb e6b31f2 da8f9fb a5e3b77 da8f9fb a9389ab da8f9fb d1af3a0 da8f9fb d1af3a0 5096687 f769ef6 1aaee22 f769ef6 5096687 1aaee22 b22e1a8 1aaee22 b22e1a8 1aaee22 d1af3a0 81349ee d1af3a0 1aaee22 5096687 1aaee22 5096687 1aaee22 fcce079 d1af3a0 ffe6aa0 da8f9fb ffe6aa0 d1af3a0 6e550a4 d1af3a0 54601eb d1af3a0 6e550a4 da8f9fb d1af3a0 622a449 d1af3a0 54601eb 6e550a4 ffe6aa0 6e550a4 54601eb d1af3a0 81349ee d1af3a0 5afc58f ffe6aa0 54601eb ffe6aa0 54601eb d1af3a0 09e30af ffe6aa0 d1af3a0 363b492 d1af3a0 da8f9fb d1af3a0 e6b31f2 d1af3a0 e6b31f2 d1af3a0 363b492 d1af3a0 363b492 e6b31f2 d1af3a0 363b492 ffe6aa0 363b492 d1af3a0 81349ee d1af3a0 ffe6aa0 d1af3a0 363b492 d1af3a0 e6b31f2 d1af3a0 363b492 e6b31f2 7c9920d e6b31f2 d1af3a0 e6b31f2 7c9920d e6b31f2 d1af3a0 38a20f7 474cd82 38a20f7 eda2615 c371d9b eda2615 82d9585 9d7be5a 82d9585 9d7be5a 82d9585 9d7be5a 82d9585 9d7be5a 82d9585 eda2615 c371d9b eda2615 c371d9b eda2615 c371d9b eda2615 fbc2557 876813a fbc2557 da8f9fb 6e550a4 ffe6aa0 e6b31f2 6e550a4 d1af3a0 6a7c232 ffe6aa0 cb5086d ffe6aa0 cb5086d 9eaca74 363b492 cb5086d 6194cd0 363b492 e6b31f2 38a20f7 cb5086d c371d9b d1af3a0 09e30af d1af3a0 09e30af d1af3a0 da8f9fb 6a7c232 da8f9fb 6a7c232 e6b31f2 da8f9fb 09e30af e11c4c9 cd4235b e11c4c9 09e30af e11c4c9 da8f9fb fbc2557 6d32870 fbc2557 16198bd e11c4c9 269676c 5bd9410 9e1032b e11c4c9 eda2615 363b492 e11c4c9 855717d eda2615 5341ea9 eda2615 e11c4c9 fbc2557 363b492 fbc2557 9eaca74 6d32870 9eaca74 eda2615 6d32870 fbc2557 eda2615 6d32870 363b492 fbc2557 eda2615 9e1032b 6d32870 9e1032b e11c4c9 fbc2557 e11c4c9 fbc2557 e11c4c9 fbc2557 e11c4c9 fbc2557 e11c4c9 fbc2557 e11c4c9 fbc2557 e11c4c9 5bd9410 fbc2557 5bd9410 6d32870 fbc2557 6d32870 5bd9410 fbc2557 5bd9410 6d32870 5bd9410 6d32870 fbc2557 6d32870 5bd9410 6d32870 5bd9410 6d32870 5bd9410 fbc2557 5bd9410 fbc2557 5bd9410 363b492 fbc2557 5bd9410 eda2615 fbc2557 5bd9410 e11c4c9 fbc2557 5bd9410 e11c4c9 fbc2557 5bd9410 eda2615 855717d e11c4c9 eda2615 fbc2557 6d32870 9eaca74 fbc2557 eda2615 9e1032b e11c4c9 363b492 855717d 363b492 d1af3a0 c6053ac | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 | import gradio as gr
import numpy as np
import cv2
import time
import torch
import warnings
import os
import zipfile
from PIL import Image
import random
warnings.filterwarnings("ignore")
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# STEP 1: Extract any .zip files in current directory
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
print("=" * 60)
print(f"[STARTUP] Working dir: {os.getcwd()}")
for f in os.listdir("."):
if f.endswith(".zip"):
try:
with zipfile.ZipFile(f, 'r') as zf:
zf.extractall(".")
print(f"[ZIP] Extracted {f} OK!")
except Exception as e:
print(f"[ZIP] ERROR: {e}")
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# STEP 2: Copy images to root
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def prepare_clean_examples(src_folder, prefix, limit=10):
results = []
if not os.path.exists(src_folder): return results
count = 0
for root, dirs, files in os.walk(src_folder):
for fname in sorted(files):
if not fname.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp', '.webp')):
continue
src_path = os.path.join(root, fname)
dst_name = f"{prefix}_{count}.jpg"
try:
import shutil
shutil.copy2(src_path, dst_name)
results.append(dst_name)
count += 1
if count >= limit: break
except Exception as e:
print(f"Error copying {src_path}: {e}")
if count >= limit: break
return results
mirror_examples = []
for folder in ["test car windows", "test_car_windows", "test car windows segmentation"]:
if os.path.exists(folder):
mirror_examples = prepare_clean_examples(folder, "mirror", limit=15)
break
if not mirror_examples and os.path.exists("car.jpeg"):
mirror_examples = ["car.jpeg"]
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Global Settings
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
CONF = 0.45
def apply_mask_overlay(img_rgb, mask_bool, color=(0, 215, 255), alpha=0.4):
# 1. Darken the background (50% brightness, no blur)
dark_bg = cv2.addWeighted(img_rgb, 0.5, np.zeros_like(img_rgb), 0.5, 0)
# 2. For the mask area, keep original brightness and tint it
tinted_sharp = img_rgb.copy()
tinted_sharp[mask_bool] = color
tinted_sharp = cv2.addWeighted(tinted_sharp, alpha, img_rgb, 1 - alpha, 0)
# 3. Find and draw the boundary edge strictly inside the mask
mask_img = (mask_bool * 255).astype(np.uint8)
contours, _ = cv2.findContours(mask_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Draw contour on the tinted image (before blending)
cv2.drawContours(tinted_sharp, contours, -1, color, 2, cv2.LINE_AA)
# 4. Combine: Dark background outside, Bright tinted object + boundary inside
blended = np.where(mask_bool[:, :, None], tinted_sharp, dark_bg)
return blended
def draw_boxes(img_rgb, boxes, labels, color=(0, 215, 255)):
out = img_rgb.copy()
for box, label in zip(boxes, labels):
x1, y1, x2, y2 = map(int, box)
# Faint inner bounding box line
cv2.rectangle(out, (x1, y1), (x2, y2), color, 1)
# HUD-Style Corner Brackets
length = int(min(x2 - x1, y2 - y1) * 0.15)
thick = 3
# Top-Left
cv2.line(out, (x1, y1), (x1 + length, y1), color, thick, cv2.LINE_AA)
cv2.line(out, (x1, y1), (x1, y1 + length), color, thick, cv2.LINE_AA)
# Top-Right
cv2.line(out, (x2, y1), (x2 - length, y1), color, thick, cv2.LINE_AA)
cv2.line(out, (x2, y1), (x2, y1 + length), color, thick, cv2.LINE_AA)
# Bottom-Left
cv2.line(out, (x1, y2), (x1 + length, y2), color, thick, cv2.LINE_AA)
cv2.line(out, (x1, y2), (x1, y2 - length), color, thick, cv2.LINE_AA)
# Bottom-Right
cv2.line(out, (x2, y2), (x2 - length, y2), color, thick, cv2.LINE_AA)
cv2.line(out, (x2, y2), (x2, y2 - length), color, thick, cv2.LINE_AA)
# Text labels have been removed to prevent obstructing the view of the segmentation masks.
return out
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Morphological post-processing helper
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def apply_morphology(mask_uint8, close_k=15, open_k=7):
"""Fill holes (Closing) then remove tiny blobs (Opening) on a binary mask."""
close_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (close_k, close_k))
open_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (open_k, open_k))
closed = cv2.morphologyEx(mask_uint8, cv2.MORPH_CLOSE, close_kernel) # fill holes
opened = cv2.morphologyEx(closed, cv2.MORPH_OPEN, open_kernel) # remove noise
return opened
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Model Functions
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def run_yolo_generic(img_rgb, model_path, target_classes, color, morph_cleanup=False):
from ultralytics import YOLO
t0 = time.time()
model = YOLO(model_path)
# Use retina_masks=True to get pixel-perfect masks at the original image resolution
results = model(img_rgb, conf=CONF, verbose=False, retina_masks=True)
elapsed = time.time() - t0
result = results[0]
h, w = img_rgb.shape[:2]
combined_mask = np.zeros((h, w), dtype=np.uint8)
boxes, labels = [], []
if result.masks is not None:
for mask, box, cls, conf in zip(
result.masks.data, result.boxes.xyxy,
result.boxes.cls, result.boxes.conf
):
if int(cls) not in target_classes:
continue
# Since retina_masks=True, mask is already (h, w). Just threshold it.
mask_np = mask.cpu().numpy().astype(np.uint8)
# Optional per-instance morphological cleanup before combining
if morph_cleanup:
mask_np = apply_morphology(mask_np)
combined_mask |= mask_np
boxes.append(box.cpu().tolist())
labels.append(f"glass {conf:.2f}")
# We purposely do NOT apply morphology on the final combined_mask here,
# otherwise it will bridge the gaps (pillars) between separate windows!
combined_mask_bool = combined_mask > 0
morph_note = " | Morphology: ON β
" if morph_cleanup else ""
out = apply_mask_overlay(img_rgb, combined_mask_bool, color=color)
out = draw_boxes(out, boxes, labels, color=color)
bw_mask = (combined_mask * 255).astype(np.uint8)
return out, bw_mask, f"Found: {len(boxes)} | Inference Time: {elapsed:.2f}s{morph_note}"
def run_sam_strategy(img_rgb, yolo_model_path, target_classes, color, strategy, morph_cleanup=False):
try:
from segment_anything import sam_model_registry, SamPredictor
import urllib.request
CKPT = "sam_vit_b_01ec64.pth"
URL = "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth"
if not os.path.exists(CKPT): urllib.request.urlretrieve(URL, CKPT)
t0 = time.time()
sam = sam_model_registry["vit_b"](checkpoint=CKPT).to(DEVICE)
predictor = SamPredictor(sam)
predictor.set_image(img_rgb)
from ultralytics import YOLO as _YOLO
yolo_res = _YOLO(yolo_model_path)(img_rgb, conf=CONF, verbose=False, retina_masks=True)[0]
h, w = img_rgb.shape[:2]
combined_mask = np.zeros((h, w), dtype=bool)
boxes_list, labels = [], []
if yolo_res.boxes is not None and yolo_res.masks is not None:
for box, mask_data, cls, conf in zip(yolo_res.boxes.xyxy, yolo_res.masks.data, yolo_res.boxes.cls, yolo_res.boxes.conf):
if int(cls) not in target_classes: continue
box_np = box.cpu().numpy()
yolo_mask = mask_data.cpu().numpy() > 0.5
if strategy == 1:
# Strategy 1: Bbox + 5 Points
x1, y1, x2, y2 = map(int, box_np)
cx, cy = (x1+x2)//2, (y1+y2)//2
pts = [[cx, cy], [x1+5, y1+5], [x2-5, y1+5], [x1+5, y2-5], [x2-5, y2-5]]
pts_np = np.array(pts)
labels_np = np.ones(len(pts))
masks_sam, _, _ = predictor.predict(box=box_np, point_coords=pts_np, point_labels=labels_np, multimask_output=False)
sam_mask = masks_sam[0]
elif strategy == 2:
# Strategy 2: Mask + 5 Points
y_coords, x_coords = np.where(yolo_mask)
if len(x_coords) == 0: continue
cx, cy = int(np.mean(x_coords)), int(np.mean(y_coords))
idx_top, idx_bot = np.argmin(y_coords), np.argmax(y_coords)
idx_lft, idx_rgt = np.argmin(x_coords), np.argmax(x_coords)
def get_mid(x_1, y_1, x_2, y_2, f=0.6):
return int(x_1 + (x_2-x_1)*f), int(y_1 + (y_2-y_1)*f)
pts = []
if yolo_mask[cy, cx]: pts.append([cx, cy])
else: pts.append([x_coords[len(x_coords)//2], y_coords[len(y_coords)//2]])
for idx in [idx_top, idx_bot, idx_lft, idx_rgt]:
px, py = get_mid(cx, cy, x_coords[idx], y_coords[idx])
if 0 <= py < h and 0 <= px < w and yolo_mask[py, px]: pts.append([px, py])
else: pts.append(pts[0])
pts_np = np.array(pts)
labels_np = np.ones(len(pts))
masks_sam, _, _ = predictor.predict(box=box_np, point_coords=pts_np, point_labels=labels_np, multimask_output=False)
sam_mask = masks_sam[0]
elif strategy == 3:
# Strategy 3: Direct Mask Prompting
yolo_mask_resized = cv2.resize((yolo_mask).astype(np.float32), (256, 256), interpolation=cv2.INTER_NEAREST)
mask_input = np.zeros((1, 256, 256), dtype=np.float32)
mask_input[0] = np.where(yolo_mask_resized > 0.5, 30.0, -30.0)
masks_sam, _, _ = predictor.predict(box=box_np, mask_input=mask_input, multimask_output=False)
raw_mask = (masks_sam[0].astype(np.uint8) * 255)
contours, _ = cv2.findContours(raw_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
filled_mask = np.zeros_like(raw_mask)
cv2.drawContours(filled_mask, contours, -1, 255, cv2.FILLED)
sam_mask = (filled_mask > 0)
else:
sam_mask = np.zeros((h, w), dtype=bool)
sam_mask_uint = sam_mask.astype(np.uint8)
if morph_cleanup:
sam_mask_uint = apply_morphology(sam_mask_uint)
combined_mask |= sam_mask_uint.astype(bool)
boxes_list.append(box_np.tolist())
labels.append(f"glass {conf:.2f}")
elapsed = time.time() - t0
morph_note = " | Morphology: ON β
" if morph_cleanup else ""
out = apply_mask_overlay(img_rgb, combined_mask, color=color)
out = draw_boxes(out, boxes_list, labels, color=color)
return out, (combined_mask * 255).astype(np.uint8), f"Found: {len(boxes_list)} | Strategy: {strategy} | Inference: {elapsed:.2f}s{morph_note}"
except ImportError:
return img_rgb, None, "Error: segment-anything not installed"
def run_mask_rcnn(img_rgb, weights_path):
t0 = time.time()
try:
from torchvision.models.detection import maskrcnn_resnet50_fpn_v2
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
import torchvision.transforms.v2 as T
model = maskrcnn_resnet50_fpn_v2(weights=None)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, 2)
in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask, 256, 2)
checkpoint = torch.load(weights_path, map_location=DEVICE, weights_only=False)
if isinstance(checkpoint, dict) and "model_state_dict" in checkpoint:
model.load_state_dict(checkpoint["model_state_dict"])
else:
model.load_state_dict(checkpoint)
model.to(DEVICE)
model.eval()
img_tensor = T.ToTensor()(Image.fromarray(img_rgb)).to(DEVICE)
with torch.no_grad():
outputs = model([img_tensor])[0]
h, w = img_rgb.shape[:2]
pred_mask = np.zeros((h, w), dtype=bool)
boxes_list, labels_list = [], []
for score, mask, box, cls in zip(outputs['scores'], outputs['masks'], outputs['boxes'], outputs['labels']):
if score > 0.45:
m = (mask[0].cpu().numpy() > 0.5)
pred_mask |= m
boxes_list.append(box.cpu().numpy().tolist())
labels_list.append(f"glass {score:.2f}")
elapsed = time.time() - t0
out = apply_mask_overlay(img_rgb, pred_mask, color=(255, 165, 0))
out = draw_boxes(out, boxes_list, labels_list, color=(255, 165, 0))
bw_mask = (pred_mask * 255).astype(np.uint8)
return out, bw_mask, f"Found: {len(boxes_list)} | Inference: {elapsed:.2f}s"
except Exception as e:
return img_rgb, None, f"Mask R-CNN Error: {e}"
def run_grounding_dino(img_rgb, text_prompt):
try:
from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection
t0 = time.time()
model_id = "IDEA-Research/grounding-dino-tiny"
processor = AutoProcessor.from_pretrained(model_id)
model = AutoModelForZeroShotObjectDetection.from_pretrained(model_id).to(DEVICE)
inputs = processor(images=img_rgb, text=text_prompt, return_tensors="pt").to(DEVICE)
with torch.no_grad():
outputs = model(**inputs)
h, w = img_rgb.shape[:2]
results = processor.post_process_grounded_object_detection(
outputs, inputs.input_ids, text_threshold=0.25, target_sizes=[(h, w)]
)[0]
boxes = results["boxes"].cpu().numpy().tolist()
scores = results["scores"].cpu().numpy().tolist()
labels = results["labels"]
elapsed = time.time() - t0
bw_mask = np.zeros((h, w), dtype=np.uint8) # DINO is boxes only
str_labels = [f"{lbl} {scr:.2f}" for lbl, scr in zip(labels, scores)]
out = draw_boxes(img_rgb.copy(), boxes, str_labels, color=(255, 100, 50))
return out, bw_mask, f"Found: {len(boxes)} | Inference Time: {elapsed:.2f}s"
except Exception as e:
return img_rgb, None, f"Grounding DINO Error: {e}\n(Need transformers>=4.35)"
def run_grounded_sam(img_rgb, text_prompt):
try:
from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection
from segment_anything import sam_model_registry, SamPredictor
import urllib.request
t0 = time.time()
# 1. DINO Detection
dino_id = "IDEA-Research/grounding-dino-tiny"
processor = AutoProcessor.from_pretrained(dino_id)
dino_model = AutoModelForZeroShotObjectDetection.from_pretrained(dino_id).to(DEVICE)
inputs = processor(images=img_rgb, text=text_prompt, return_tensors="pt").to(DEVICE)
with torch.no_grad():
outputs = dino_model(**inputs)
h, w = img_rgb.shape[:2]
dino_res = processor.post_process_grounded_object_detection(
outputs, inputs.input_ids, text_threshold=0.25, target_sizes=[(h, w)]
)[0]
boxes = dino_res["boxes"].cpu().numpy()
scores = dino_res["scores"].cpu().numpy()
labels_txt = dino_res["labels"]
# 2. SAM Segmentation
CKPT = "sam_vit_b_01ec64.pth"
URL = "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth"
if not os.path.exists(CKPT): urllib.request.urlretrieve(URL, CKPT)
sam = sam_model_registry["vit_b"](checkpoint=CKPT).to(DEVICE)
predictor = SamPredictor(sam)
predictor.set_image(img_rgb)
combined_mask = np.zeros((h, w), dtype=bool)
str_labels = []
if len(boxes) > 0:
for box, score, label in zip(boxes, scores, labels_txt):
masks, _, _ = predictor.predict(box=box, multimask_output=False)
combined_mask |= masks[0]
str_labels.append(f"{label} {score:.2f}")
elapsed = time.time() - t0
out = apply_mask_overlay(img_rgb, combined_mask, color=(255, 80, 160))
out = draw_boxes(out, boxes.tolist(), str_labels, color=(255, 80, 160))
return out, (combined_mask * 255).astype(np.uint8), f"Found: {len(boxes)} | Inference: {elapsed:.2f}s"
except Exception as e:
return img_rgb, None, f"Grounded SAM Error: {e}"
def run_intelliarts_car_parts(img_rgb):
t0 = time.time()
try:
import detectron2
except ImportError:
print("Installing detectron2... this may take a few minutes!")
os.system('pip install git+https://github.com/facebookresearch/detectron2.git --no-build-isolation')
try:
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
import urllib.request
model_url = "https://huggingface.co/spaces/intelliarts/Car_parts_detection/resolve/main/model_final.pth"
model_path = "intelliarts_model_final.pth"
if not os.path.exists(model_path):
print("Downloading Intelliarts Car Parts weights...")
urllib.request.urlretrieve(model_url, model_path)
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.45
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 19
cfg.MODEL.WEIGHTS = model_path
cfg.MODEL.DEVICE = DEVICE
predictor = DefaultPredictor(cfg)
outputs = predictor(img_rgb)
instances = outputs["instances"].to("cpu")
# Classes: 2: back_glass, 8: front_glass, 14: left_mirror, 15: right_mirror
target_classes = [2, 8, 14, 15]
h, w = img_rgb.shape[:2]
combined_mask = np.zeros((h, w), dtype=bool)
boxes_list, labels_list = [], []
classes = instances.pred_classes.numpy()
scores = instances.scores.numpy()
boxes = instances.pred_boxes.tensor.numpy()
masks = instances.pred_masks.numpy()
class_names = ['_background_', 'back_bumper', 'back_glass', 'back_left_door', 'back_left_light', 'back_right_door', 'back_right_light', 'front_bumper', 'front_glass', 'front_left_door', 'front_left_light', 'front_right_door', 'front_right_light', 'hood', 'left_mirror', 'right_mirror', 'tailgate', 'trunk', 'wheel']
for i in range(len(classes)):
c = classes[i]
if c in target_classes:
combined_mask |= masks[i]
boxes_list.append(boxes[i].tolist())
labels_list.append(f"{class_names[c]} {scores[i]:.2f}")
elapsed = time.time() - t0
out = apply_mask_overlay(img_rgb, combined_mask, color=(50, 150, 255))
out = draw_boxes(out, boxes_list, labels_list, color=(50, 150, 255))
bw_mask = (combined_mask * 255).astype(np.uint8)
return out, bw_mask, f"Found: {len(boxes_list)} | Inference: {elapsed:.2f}s"
except Exception as e:
return img_rgb, None, f"Intelliarts Detectron2 Error: {e}"
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# SegFormer Function
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def run_segformer(img_rgb, morph_cleanup=False):
try:
from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation
import torch.nn.functional as F
t0 = time.time()
base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
# Paths to try (works for both local PC and Hugging Face Cloud deployment)
paths_to_try = [
os.path.join(base_dir, "SegFormer_Model", "best_segformer_dice_model"), # Local PC
"best_segformer_dice_model", # Hugging Face Root
os.path.join(os.path.dirname(__file__), "best_segformer_dice_model"), # Next to app.py
]
# If files were uploaded directly to the root (no folder)
if os.path.exists("config.json"):
paths_to_try.append(".")
if os.path.exists(os.path.join(os.path.dirname(__file__), "config.json")):
paths_to_try.append(os.path.dirname(__file__))
model_path = None
for p in paths_to_try:
# For SegFormer, the path must contain config.json
if os.path.exists(p) and os.path.exists(os.path.join(p, "config.json")):
model_path = p
break
# Fallback
if model_path is None:
model_path = "best_segformer_dice_model"
processor = SegformerImageProcessor.from_pretrained(model_path)
model = SegformerForSemanticSegmentation.from_pretrained(model_path).to(DEVICE)
inputs = processor(images=Image.fromarray(img_rgb), return_tensors="pt")
inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
with torch.no_grad():
outputs = model(**inputs)
h, w = img_rgb.shape[:2]
logits = F.interpolate(outputs.logits, size=(h, w), mode="bilinear", align_corners=False)[0]
probs = F.softmax(logits, dim=0)
pred_mask = (probs[1] > 0.5).cpu().numpy().astype(np.uint8)
# Apply morphological cleanup if requested
if morph_cleanup:
pred_mask = apply_morphology(pred_mask, close_k=15, open_k=7)
elapsed = time.time() - t0
morph_note = " | Morphology: ON β
" if morph_cleanup else ""
out = apply_mask_overlay(img_rgb, pred_mask, color=(255, 50, 50))
bw_mask = (pred_mask * 255).astype(np.uint8)
return out, bw_mask, f"Found: 1 (Semantic) | Inference: {elapsed:.2f}s{morph_note}"
except Exception as e:
return img_rgb, None, f"SegFormer Error: {e}"
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# BiRefNet Function
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def run_birefnet(img_rgb):
try:
from transformers import AutoModelForImageSegmentation
from torchvision import transforms
import torch.nn.functional as F
t0 = time.time()
base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
# Paths to try (works for local PC and Hugging Face Cloud deployment)
paths_to_try = [
os.path.join(base_dir, "BiRefNet_Model", "best_model-20260624T051601Z-3-001", "best_model"), # Local PC
"birefnet_model", # Hugging Face Root / Root dir
os.path.join(os.path.dirname(os.path.abspath(__file__)), "birefnet_model"), # Next to app.py
"best_birefnet_model" # Extra fallback
]
model_path = None
for p in paths_to_try:
if os.path.exists(p) and os.path.exists(os.path.join(p, "config.json")) and os.path.exists(os.path.join(p, "model.safetensors")):
model_path = p
break
# Final fallback: Download directly from Hugging Face Model Repo!
if model_path is None:
model_path = "Ayesha-Majeed/birefnet_car_window"
model = AutoModelForImageSegmentation.from_pretrained(model_path, trust_remote_code=True).to(DEVICE)
model.eval()
image_transform = transforms.Compose([
transforms.Resize((1024, 1024)),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])
from PIL import Image
pil_img = Image.fromarray(img_rgb)
input_tensor = image_transform(pil_img).unsqueeze(0).to(DEVICE)
with torch.no_grad():
if DEVICE == "cuda":
with torch.amp.autocast("cuda"):
preds = model(input_tensor)
final_pred = preds[-1] if isinstance(preds, (list, tuple)) else preds
else:
preds = model(input_tensor)
final_pred = preds[-1] if isinstance(preds, (list, tuple)) else preds
h, w = img_rgb.shape[:2]
final_pred = F.interpolate(final_pred, size=(h, w), mode="bilinear", align_corners=False)
pred_mask = (torch.sigmoid(final_pred) > 0.5).squeeze().cpu().numpy().astype(np.uint8)
elapsed = time.time() - t0
out = apply_mask_overlay(img_rgb, pred_mask > 0, color=(255, 0, 0)) # Red
bw_mask = (pred_mask * 255).astype(np.uint8)
return out, bw_mask, f"Found: 1 (Semantic) | Inference: {elapsed:.2f}s"
except Exception as e:
return img_rgb, None, f"BiRefNet Error: {e}"
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Gradio Process Function
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# A beautiful palette of pastel and neon colors for dynamic visualizations
PASTEL_COLORS = [
(255, 105, 180), # Hot/Light Pink
(180, 130, 255), # Light Purple
(0, 215, 255), # Light Sky Blue / Cyan
(255, 220, 50), # Light Yellow
(255, 160, 50), # Light Orange
(150, 255, 150), # Light Mint Green
(240, 240, 255), # Light White / Silver
]
def process_image(img_rgb, model_name, text_prompt="", morph_cleanup=False):
if img_rgb is None: return None, None, "Please upload an image."
# Pick a random color for this specific inference run
run_color = random.choice(PASTEL_COLORS)
try:
if model_name == "YOLOv8x-seg (Custom Window)":
return run_yolo_generic(img_rgb, "best.pt", target_classes=[0, 1], color=run_color, morph_cleanup=morph_cleanup)
elif model_name == "YOLOv8x-seg":
return run_yolo_generic(img_rgb, "best.pt", target_classes=[0, 1], color=(255, 215, 0), morph_cleanup=morph_cleanup)
elif model_name == "YOLO11x-seg":
if os.path.exists("yolo11_best.pt"):
y11_weights = "yolo11_best.pt"
else:
base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
y11_weights = os.path.join(base_dir, "runs", "segment", "runs", "car_mirror_seg", "yolo11x_seg_1024", "weights", "best.pt")
if not os.path.exists(y11_weights):
y11_weights = "best.pt" # Fallback
return run_yolo_generic(img_rgb, y11_weights, target_classes=[0, 1], color=(0, 255, 120), morph_cleanup=morph_cleanup)
elif model_name == "SAM + YOLO (Strategy 1: Bbox + 5 Points)":
return run_sam_strategy(img_rgb, "best.pt", target_classes=[0, 1], color=run_color, strategy=1, morph_cleanup=morph_cleanup)
elif model_name == "SAM + YOLO (Strategy 2: Mask + 5 Points)":
return run_sam_strategy(img_rgb, "best.pt", target_classes=[0, 1], color=run_color, strategy=2, morph_cleanup=morph_cleanup)
elif model_name == "SAM + YOLO (Strategy 3: Direct Mask Prompting)":
return run_sam_strategy(img_rgb, "best.pt", target_classes=[0, 1], color=run_color, strategy=3, morph_cleanup=morph_cleanup)
elif model_name == "Mask R-CNN":
# First check if she uploaded it directly next to app.py as "maskrcnn_best.pt"
if os.path.exists("maskrcnn_best.pt"):
mrcnn_weights = "maskrcnn_best.pt"
else:
base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
mrcnn_weights = os.path.join(base_dir, "Mask_RCNN", "runs", "woven-sweep-5", "best.pt")
if not os.path.exists(mrcnn_weights):
mrcnn_weights = "Mask_RCNN/runs/woven-sweep-5/best.pt"
return run_mask_rcnn(img_rgb, mrcnn_weights)
elif model_name == "Grounding DINO (Zero-Shot Detection)":
return run_grounding_dino(img_rgb, text_prompt)
elif model_name == "Grounded SAM (Zero-Shot Segmentation)":
return run_grounded_sam(img_rgb, text_prompt)
elif model_name == "Intelliarts Car Parts (Detectron2)":
return run_intelliarts_car_parts(img_rgb)
elif model_name == "SegFormer":
return run_segformer(img_rgb, morph_cleanup=morph_cleanup)
else:
return img_rgb, None, "Model not recognized."
except Exception as e:
return img_rgb, None, f"Error: {str(e)}"
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Gradio UI
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
theme = gr.themes.Soft(primary_hue="blue", secondary_hue="indigo")
with gr.Blocks(theme=theme, title="Car Window Segmentation") as demo:
gr.Markdown("""
# Car Window Segmentation
Compare your custom trained YOLOv8 model against state-of-the-art Zero-Shot models!
""")
# ββ TAB 3: Comprehensive Evaluation ββ
with gr.Tab("Comprehensive Evaluation"):
gr.Markdown("### Comprehensive Evaluation: Results from All Trained and Pretrained Models")
gr.Markdown("""**The following models will run and display their results below:**
**Custom Trained Models:**
1. SegFormer
2. SegFormer + Morphological
3. YOLO11x-seg
4. YOLOv8x-seg
5. Mask R-CNN
6. BiRefNet
7. SAM + YOLO (Strategy 1: Bbox + 5 Points)
8. SAM + YOLO (Strategy 2: Mask + 5 Points)
9. SAM + YOLO (Strategy 3: Direct Mask Prompting)
**Pretrained Zero-Shot Models:**
10\. Grounding DINO
11\. Grounded SAM
12\. Intelliarts Car Parts
**Our Findings:** SegFormer and YOLO11x deliver the best performance with significantly sharper edge precision.
""")
with gr.Row():
input_image_seq = gr.Image(type="numpy", label="Upload Window Image")
with gr.Row():
submit_btn_seq = gr.Button("Run All Models", variant="primary", size="lg")
stop_btn_seq = gr.Button("π Stop Processing", variant="stop", size="lg")
if mirror_examples:
gr.Markdown("### Or click any example image below to load it:")
compare_gallery = gr.Gallery(value=mirror_examples, columns=10, height=120, object_fit="cover", allow_preview=False, show_label=False)
def load_compare_img(evt: gr.SelectData): return mirror_examples[evt.index]
compare_gallery.select(fn=load_compare_img, inputs=None, outputs=input_image_seq)
gr.Markdown("---")
gr.Markdown("## π Custom Trained Models")
gr.Markdown("### 1οΈβ£ SegFormer (Transformer)")
with gr.Row():
seq_segf_img = gr.Image(label="SegFormer Overlay", interactive=False)
seq_segf_bw = gr.Image(label="SegFormer Binary Mask", interactive=False, image_mode="L")
seq_segf_stats = gr.Textbox(label="SegFormer Stats", interactive=False)
gr.Markdown("---")
gr.Markdown("### 2οΈβ£ SegFormer + Morphological Cleanup (Holes Filled + Sharp Borders)")
with gr.Row():
seq_segf_morph_img = gr.Image(label="SegFormer + Morph Overlay", interactive=False)
seq_segf_morph_bw = gr.Image(label="SegFormer + Morph Binary Mask", interactive=False, image_mode="L")
seq_segf_morph_stats = gr.Textbox(label="SegFormer + Morph Stats", interactive=False)
gr.Markdown("---")
gr.Markdown("### 3οΈβ£ YOLO11x-seg")
with gr.Row():
seq_yolo11_img = gr.Image(label="YOLO11x Overlay", interactive=False)
seq_yolo11_bw = gr.Image(label="YOLO11x Binary Mask", interactive=False, image_mode="L")
seq_yolo11_stats = gr.Textbox(label="YOLO11x Stats", interactive=False)
gr.Markdown("---")
gr.Markdown("### 4οΈβ£ YOLOv8x-seg")
with gr.Row():
seq_yolo_img = gr.Image(label="YOLO Overlay", interactive=False)
seq_yolo_bw = gr.Image(label="YOLO Binary Mask", interactive=False, image_mode="L")
seq_yolo_stats = gr.Textbox(label="YOLO Stats", interactive=False)
gr.Markdown("---")
gr.Markdown("### 5οΈβ£ Mask R-CNN (ResNet50-FPN)")
with gr.Row():
seq_mrcnn_img = gr.Image(label="Mask R-CNN Overlay", interactive=False)
seq_mrcnn_bw = gr.Image(label="Mask R-CNN Binary Mask", interactive=False, image_mode="L")
seq_mrcnn_stats = gr.Textbox(label="Mask R-CNN Stats", interactive=False)
gr.Markdown("---")
gr.Markdown("### 6οΈβ£ BiRefNet (Boundary-Aware Model)")
with gr.Row():
seq_biref_img = gr.Image(label="BiRefNet Overlay", interactive=False)
seq_biref_bw = gr.Image(label="BiRefNet Binary Mask", interactive=False, image_mode="L")
seq_biref_stats = gr.Textbox(label="BiRefNet Stats", interactive=False)
gr.Markdown("---")
gr.Markdown("### 7οΈβ£ SAM + YOLO (Strategy 1: Bbox + 5 Points)")
with gr.Row():
seq_sam1_img = gr.Image(label="SAM+YOLO Strat 1 Overlay", interactive=False)
seq_sam1_bw = gr.Image(label="SAM+YOLO Strat 1 Binary Mask", interactive=False, image_mode="L")
seq_sam1_stats = gr.Textbox(label="SAM+YOLO Strat 1 Stats", interactive=False)
gr.Markdown("---")
gr.Markdown("### 8οΈβ£ SAM + YOLO (Strategy 2: Mask + 5 Points)")
with gr.Row():
seq_sam2_img = gr.Image(label="SAM+YOLO Strat 2 Overlay", interactive=False)
seq_sam2_bw = gr.Image(label="SAM+YOLO Strat 2 Binary Mask", interactive=False, image_mode="L")
seq_sam2_stats = gr.Textbox(label="SAM+YOLO Strat 2 Stats", interactive=False)
gr.Markdown("---")
gr.Markdown("### 9οΈβ£ SAM + YOLO (Strategy 3: Direct Mask Prompting)")
with gr.Row():
seq_sam3_img = gr.Image(label="SAM+YOLO Strat 3 Overlay", interactive=False)
seq_sam3_bw = gr.Image(label="SAM+YOLO Strat 3 Binary Mask", interactive=False, image_mode="L")
seq_sam3_stats = gr.Textbox(label="SAM+YOLO Strat 3 Stats", interactive=False)
gr.Markdown("---")
gr.Markdown("## π Pretrained Zero-Shot Models")
gr.Markdown("### π Grounding DINO (Zero-Shot Detection)")
with gr.Row():
seq_dino_img = gr.Image(label="Grounding DINO Overlay", interactive=False)
seq_dino_bw = gr.Image(label="Grounding DINO Binary Mask", interactive=False, image_mode="L")
seq_dino_stats = gr.Textbox(label="Grounding DINO Stats", interactive=False)
gr.Markdown("---")
gr.Markdown("### 1οΈβ£1οΈβ£ Grounded SAM (Zero-Shot Segmentation)")
with gr.Row():
seq_gsam_img = gr.Image(label="Grounded SAM Overlay", interactive=False)
seq_gsam_bw = gr.Image(label="Grounded SAM Binary Mask", interactive=False, image_mode="L")
seq_gsam_stats = gr.Textbox(label="Grounded SAM Stats", interactive=False)
gr.Markdown("---")
gr.Markdown("### 1οΈβ£2οΈβ£ Intelliarts Car Parts (Detectron2)")
with gr.Row():
seq_intell_img = gr.Image(label="Intelliarts Car Parts Overlay", interactive=False)
seq_intell_bw = gr.Image(label="Intelliarts Car Parts Binary Mask", interactive=False, image_mode="L")
seq_intell_stats = gr.Textbox(label="Intelliarts Car Parts Stats", interactive=False)
def run_all_models(img):
if img is None:
yield tuple([None]*36)
return
# ββ Step 0: Show "Processing..." in ALL textboxes immediately ββ
PENDING = "β³ Processing..."
results = [None] * 36
# Set all stats textboxes to pending state
for i in [2, 5, 8, 11, 14, 17, 20, 23, 26, 29, 32, 35]:
results[i] = PENDING
yield tuple(results)
# 1. SegFormer
results[0], results[1], results[2] = run_segformer(img, morph_cleanup=False)
yield tuple(results)
# 2. SegFormer + Morphology
results[3], results[4], results[5] = run_segformer(img, morph_cleanup=True)
yield tuple(results)
# 3. YOLO11x-seg
results[6], results[7], results[8] = process_image(img, "YOLO11x-seg", "", False)
yield tuple(results)
# 4. YOLOv8x-seg
results[9], results[10], results[11] = process_image(img, "YOLOv8x-seg", "", False)
yield tuple(results)
# 5. Mask R-CNN
results[12], results[13], results[14] = process_image(img, "Mask R-CNN", "", False)
yield tuple(results)
# 6. BiRefNet
results[15], results[16], results[17] = run_birefnet(img)
yield tuple(results)
# 7. SAM + YOLO Strat 1
results[18], results[19], results[20] = process_image(img, "SAM + YOLO (Strategy 1: Bbox + 5 Points)", "", False)
yield tuple(results)
# 8. SAM + YOLO Strat 2
results[21], results[22], results[23] = process_image(img, "SAM + YOLO (Strategy 2: Mask + 5 Points)", "", False)
yield tuple(results)
# 9. SAM + YOLO Strat 3
results[24], results[25], results[26] = process_image(img, "SAM + YOLO (Strategy 3: Direct Mask Prompting)", "", False)
yield tuple(results)
# 10. Grounding DINO
results[27], results[28], results[29] = process_image(img, "Grounding DINO (Zero-Shot Detection)", "car window. car glass. windshield.", False)
yield tuple(results)
# 11. Grounded SAM
results[30], results[31], results[32] = process_image(img, "Grounded SAM (Zero-Shot Segmentation)", "car window. car glass. windshield.", False)
yield tuple(results)
# 12. Intelliarts
results[33], results[34], results[35] = process_image(img, "Intelliarts Car Parts (Detectron2)", "", False)
yield tuple(results)
run_event = submit_btn_seq.click(
fn=run_all_models,
inputs=[input_image_seq],
outputs=[seq_segf_img, seq_segf_bw, seq_segf_stats,
seq_segf_morph_img, seq_segf_morph_bw, seq_segf_morph_stats,
seq_yolo11_img, seq_yolo11_bw, seq_yolo11_stats,
seq_yolo_img, seq_yolo_bw, seq_yolo_stats,
seq_mrcnn_img, seq_mrcnn_bw, seq_mrcnn_stats,
seq_biref_img, seq_biref_bw, seq_biref_stats,
seq_sam1_img, seq_sam1_bw, seq_sam1_stats,
seq_sam2_img, seq_sam2_bw, seq_sam2_stats,
seq_sam3_img, seq_sam3_bw, seq_sam3_stats,
seq_dino_img, seq_dino_bw, seq_dino_stats,
seq_gsam_img, seq_gsam_bw, seq_gsam_stats,
seq_intell_img, seq_intell_bw, seq_intell_stats]
)
stop_btn_seq.click(fn=None, inputs=None, outputs=None, cancels=[run_event])
if __name__ == "__main__":
demo.launch()
|