import os import traceback import numpy as np import gradio as gr from PIL import Image # Close previous demos (helps in notebooks) gr.close_all() os.environ["GRADIO_DEBUG"] = "1" # ----------------------------- # OpenCV (headless-safe) + patch for Ultralytics import # ----------------------------- import cv2 # Ultralytics may reference cv2.imshow during import; headless OpenCV may not have it. if not hasattr(cv2, "imshow"): def _noop(*args, **kwargs): return None cv2.imshow = _noop cv2.waitKey = _noop cv2.destroyAllWindows = _noop # ----------------------------- # Ultralytics YOLO # ----------------------------- from ultralytics import YOLO DEFAULT_MODEL = "yolo26n-seg.pt" # YOLO26 segmentation weights use -seg suffix :contentReference[oaicite:4]{index=4} # Cache models so they don't reload every click _MODEL_CACHE = {} def get_model(model_name: str): name = model_name.strip() if name not in _MODEL_CACHE: _MODEL_CACHE[name] = YOLO(name) return _MODEL_CACHE[name] # ----------------------------- # ArUco helpers (new + old OpenCV APIs) # ----------------------------- def get_aruco_dictionary(dict_name: str): if not hasattr(cv2, "aruco"): raise RuntimeError("cv2.aruco missing. Install opencv-contrib-python-headless.") aruco = cv2.aruco if not hasattr(aruco, dict_name): raise ValueError(f"Unknown ArUco dictionary: {dict_name}") return aruco.getPredefinedDictionary(getattr(aruco, dict_name)) def detect_markers(gray_img: np.ndarray, dictionary): """Detect ArUco markers using new API if available, else old API.""" aruco = cv2.aruco # New API if hasattr(aruco, "ArucoDetector") and hasattr(aruco, "DetectorParameters"): params = aruco.DetectorParameters() detector = aruco.ArucoDetector(dictionary, params) corners_list, ids, rejected = detector.detectMarkers(gray_img) return corners_list, ids, rejected # Old API if hasattr(aruco, "detectMarkers"): params = aruco.DetectorParameters_create() if hasattr(aruco, "DetectorParameters_create") else None corners_list, ids, rejected = aruco.detectMarkers(gray_img, dictionary, parameters=params) return corners_list, ids, rejected raise RuntimeError("No compatible ArUco detection API found.") def order_corners_4pts(pts): """Order 4 points: top-left, top-right, bottom-right, bottom-left.""" pts = np.asarray(pts, dtype=np.float32) s = pts.sum(axis=1) d = np.diff(pts, axis=1).reshape(-1) tl = pts[np.argmin(s)] br = pts[np.argmax(s)] tr = pts[np.argmin(d)] bl = pts[np.argmax(d)] return np.array([tl, tr, br, bl], dtype=np.float32) def choose_marker(corners_list, ids, marker_id: int | None): """Use marker_id if provided; else choose largest marker.""" ids_list = ids.flatten().tolist() if marker_id is not None and marker_id >= 0: if marker_id not in ids_list: raise ValueError(f"Detected marker IDs: {ids_list}, but marker_id={marker_id} not found.") i = ids_list.index(marker_id) c = corners_list[i][0].astype(np.float32) return order_corners_4pts(c), ids_list[i], ids_list best_i, best_score = 0, -1.0 for i in range(len(ids_list)): c = order_corners_4pts(corners_list[i][0].astype(np.float32)) edges = [ np.linalg.norm(c[0] - c[1]), np.linalg.norm(c[1] - c[2]), np.linalg.norm(c[2] - c[3]), np.linalg.norm(c[3] - c[0]), ] score = float(np.mean(edges)) if score > best_score: best_score = score best_i = i c = corners_list[best_i][0].astype(np.float32) return order_corners_4pts(c), ids_list[best_i], ids_list def rectify_using_marker(rgb_img: np.ndarray, marker_corners_src: np.ndarray, marker_side_cm: float, px_per_cm: int): """ Rectify (flatten) using marker corners. In rectified image: 1 cm = px_per_cm pixels. """ H_img, W_img = rgb_img.shape[:2] src = order_corners_4pts(marker_corners_src) side_px = float(marker_side_cm * px_per_cm) dst = np.array([[0, 0], [side_px, 0], [side_px, side_px], [0, side_px]], dtype=np.float32) H = cv2.getPerspectiveTransform(src, dst) # big canvas to avoid cropping objects img_corners = np.array([[0, 0], [W_img, 0], [W_img, H_img], [0, H_img]], dtype=np.float32).reshape(-1, 1, 2) warped_corners = cv2.perspectiveTransform(img_corners, H).reshape(-1, 2) min_xy = warped_corners.min(axis=0) max_xy = warped_corners.max(axis=0) tx = -min_xy[0] if min_xy[0] < 0 else 0.0 ty = -min_xy[1] if min_xy[1] < 0 else 0.0 T = np.array([[1, 0, tx], [0, 1, ty], [0, 0, 1]], dtype=np.float32) H_total = T @ H out_w = int(np.ceil(max_xy[0] + tx)) out_h = int(np.ceil(max_xy[1] + ty)) out_w = max(out_w, int(side_px) + 80) out_h = max(out_h, int(side_px) + 80) rectified = cv2.warpPerspective(rgb_img, H_total, (out_w, out_h), flags=cv2.INTER_LINEAR) marker_rect = cv2.perspectiveTransform(src.reshape(-1, 1, 2), H_total).reshape(-1, 2) return rectified, H_total, marker_rect # ----------------------------- # Mask + drawing helpers # ----------------------------- def build_mask_from_xy(polys_xy, h, w): """ Build a full-size boolean mask from polygon(s) in pixel coordinates. Ultralytics masks.xy provides polygon outlines (pixels). :contentReference[oaicite:5]{index=5} """ m = np.zeros((h, w), dtype=np.uint8) for poly in polys_xy: if poly is None or len(poly) < 3: continue pts = np.asarray(poly, dtype=np.float32) pts = np.clip(pts, [0, 0], [w - 1, h - 1]).astype(np.int32).reshape(-1, 1, 2) cv2.fillPoly(m, [pts], 255) return m.astype(bool) def overlay_mask(img_rgb: np.ndarray, mask_bool: np.ndarray, color_rgb=(255, 0, 0), alpha=0.35): out = img_rgb.copy() color = np.array(color_rgb, dtype=np.uint8).reshape(1, 1, 3) out[mask_bool] = (out[mask_bool].astype(np.float32) * (1 - alpha) + color.astype(np.float32) * alpha).astype(np.uint8) return out def draw_closed_poly(img_rgb: np.ndarray, pts_xy: np.ndarray, color_rgb=(0, 102, 255), thickness=6): out = img_rgb.copy() pts = pts_xy.astype(np.int32).reshape(-1, 1, 2) bgr = (int(color_rgb[2]), int(color_rgb[1]), int(color_rgb[0])) cv2.polylines(out, [pts], isClosed=True, color=bgr, thickness=thickness) return out def make_side_by_side(left_rgb: np.ndarray, right_rgb: np.ndarray, max_h=900): """Create a nice side-by-side image for confidence: left=marker detection, right=rectified+mask.""" def resize_to_h(img, h): H, W = img.shape[:2] scale = h / float(H) new_w = int(round(W * scale)) return cv2.resize(img, (new_w, h), interpolation=cv2.INTER_AREA) h_left = left_rgb.shape[0] h_right = right_rgb.shape[0] h = min(max_h, max(h_left, h_right)) L = resize_to_h(left_rgb, h) R = resize_to_h(right_rgb, h) gap = np.ones((h, 12, 3), dtype=np.uint8) * 255 return np.concatenate([L, gap, R], axis=1) # ----------------------------- # Class filter parsing # ----------------------------- def parse_class_filter(text: str): """ User can type: - "" (empty) -> allow ANY class - "cup" -> only cup - "cup, bottle" -> cup OR bottle """ t = (text or "").strip().lower() if not t: return [] parts = [p.strip().lower() for p in t.split(",") if p.strip()] return parts def class_name_from_id(mdl, cid: int): return mdl.names.get(int(cid), str(int(cid))) def class_id_from_name(mdl, name: str): # mdl.names is {id: "name"} for k, v in mdl.names.items(): if str(v).lower() == name.lower(): return int(k) return None # ----------------------------- # Core measurement function # ----------------------------- def measure_object_area( image_pil, model_name: str, marker_side_cm: float, px_per_cm: int, aruco_dict_name: str, marker_id: int, conf: float, iou: float, retina_masks: bool, class_filter_text: str, selection_mode: str, ): if image_pil is None: raise gr.Error("Please upload an image first.") if marker_side_cm <= 0: raise gr.Error("marker_side_cm must be > 0. Measure the printed marker with a ruler (e.g., 4.7 cm).") rgb = np.array(image_pil.convert("RGB")) mdl = get_model(model_name) # 1) Detect ArUco on original image gray = cv2.cvtColor(rgb, cv2.COLOR_RGB2GRAY) dictionary = get_aruco_dictionary(aruco_dict_name) corners_list, ids, _ = detect_markers(gray, dictionary) if ids is None or len(corners_list) == 0: return rgb, ( "❌ ArUco NOT detected.\n\n" "Tips:\n" "- Ensure marker is fully visible\n" "- Avoid blur and glare\n" "- Confirm dictionary matches your printed marker\n" ) chosen_corners, chosen_id, detected_ids = choose_marker( corners_list, ids, None if marker_id < 0 else int(marker_id) ) # Visual proof on original aruco = cv2.aruco vis_bgr = cv2.cvtColor(rgb, cv2.COLOR_RGB2BGR) vis_bgr = aruco.drawDetectedMarkers(vis_bgr, corners_list, ids) vis_orig = cv2.cvtColor(vis_bgr, cv2.COLOR_BGR2RGB) # 2) Rectify original image (not the drawn one) rectified, _, marker_rect = rectify_using_marker(rgb, chosen_corners, float(marker_side_cm), int(px_per_cm)) H, W = rectified.shape[:2] # Base output (always show marker) rect_out = draw_closed_poly(rectified, marker_rect, color_rgb=(0, 102, 255), thickness=6) # 3) Run YOLO segmentation # retina_masks=True can return masks.data matching original inference image size :contentReference[oaicite:6]{index=6} pred_kwargs = dict(conf=float(conf), iou=float(iou), verbose=False, retina_masks=bool(retina_masks)) results = mdl.predict(rectified, **pred_kwargs) r0 = results[0] if r0.masks is None or r0.boxes is None or len(r0.boxes) == 0: side = make_side_by_side(vis_orig, rect_out) txt = ( "✅ ArUco detected and rectified (blue outline shows the marker used).\n" "❌ No segmentation masks found.\n\n" "Try:\n" "- Better lighting\n" "- Move object closer\n" "- Lower confidence a bit\n\n" f"Detected marker IDs: {detected_ids}\nUsed marker ID: {chosen_id}\n" ) return side, txt # Ultralytics: masks.xy returns polygons in pixel coords :contentReference[oaicite:7]{index=7} polys_all = r0.masks.xy cls = r0.boxes.cls confs = r0.boxes.conf cls_np = cls.cpu().numpy() if hasattr(cls, "cpu") else np.array(cls) conf_np = confs.cpu().numpy() if hasattr(confs, "cpu") else np.array(confs) # Filter by class names if user requested wanted_names = parse_class_filter(class_filter_text) # empty -> allow any wanted_ids = [] if wanted_names: for nm in wanted_names: cid = class_id_from_name(mdl, nm) if cid is not None: wanted_ids.append(cid) if not wanted_ids: available = sorted(set([str(v) for v in mdl.names.values()])) return make_side_by_side(vis_orig, rect_out), ( "❌ Your class name(s) were not found in this model.\n\n" "Tip: YOLO26-seg is pretrained on COCO (80 categories). :contentReference[oaicite:8]{index=8}\n" "Try a COCO name like: person, bottle, cup, book, cell phone, chair...\n\n" "If you want *any object*, leave the class filter empty." ) # Build per-instance masks & areas instances = [] for i in range(len(cls_np)): cid = int(cls_np[i]) if wanted_ids and cid not in wanted_ids: continue if i >= len(polys_all): continue poly = polys_all[i] polys = poly if isinstance(poly, (list, tuple)) else [poly] m = build_mask_from_xy(polys, H, W) area_px = int(np.count_nonzero(m)) if area_px == 0: continue instances.append({ "i": i, "class_id": cid, "class_name": class_name_from_id(mdl, cid), "conf": float(conf_np[i]), "mask": m, "area_px": area_px }) if not instances: side = make_side_by_side(vis_orig, rect_out) txt = ( "✅ ArUco detected + rectified.\n" "❌ No masks left after filtering.\n\n" "If you typed a class filter, try leaving it blank to measure the largest object of ANY class." ) return side, txt # Choose which mask(s) to measure if selection_mode == "largest": best = max(instances, key=lambda d: d["area_px"]) mask_final = best["mask"] chosen_label = f"largest instance: {best['class_name']} (conf={best['conf']:.2f})" area_px = best["area_px"] else: # Union of all selected instances mask_final = np.zeros((H, W), dtype=bool) for d in instances: mask_final |= d["mask"] area_px = int(np.count_nonzero(mask_final)) chosen_label = "union of all matching instances" # Convert to cm² (projected area on the paper plane) area_cm2 = area_px / float(px_per_cm * px_per_cm) # Overlay rect_out = overlay_mask(rect_out, mask_final, color_rgb=(255, 0, 0), alpha=0.35) label = f"Area: {area_cm2:.2f} cm²" cv2.putText(rect_out, label, (15, 40), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255), 3, cv2.LINE_AA) # Side-by-side output side = make_side_by_side(vis_orig, rect_out) # Make a readable table of top instances by area instances_sorted = sorted(instances, key=lambda d: d["area_px"], reverse=True)[:10] lines = [] lines.append("Top detected instances (by pixel area):") for d in instances_sorted: a_cm2 = d["area_px"] / float(px_per_cm * px_per_cm) lines.append(f" - {d['class_name']:<12} conf={d['conf']:.2f} area={a_cm2:.2f} cm²") class_note = "ANY class (no filter)" if not wanted_names else f"Filter: {', '.join(wanted_names)}" txt = ( "✅ Done!\n\n" f"Measured: {chosen_label}\n" f"{class_note}\n\n" f"Projected area: {area_cm2:.2f} cm²\n\n" + "\n".join(lines) + "\n\nMarker:\n" f"- Detected IDs: {detected_ids}\n" f"- Used ID: {chosen_id}\n" f"- Marker side used: {float(marker_side_cm):.2f} cm\n" f"- Rectified scale: {int(px_per_cm)} px/cm\n" f"Model: {model_name}\n\n" "Note: This is a 2D projected area on the paper plane (not true 3D surface area).\n" ) return side, txt # ----------------------------- # Safe wrapper: always show traceback in Results box # ----------------------------- def safe_measure(*args): try: return measure_object_area(*args) except gr.Error as e: return None, f"❌ {str(e)}" except Exception: return None, "❌ Full error traceback:\n\n" + traceback.format_exc() # ----------------------------- # Gradio UI # ----------------------------- with gr.Blocks(title="Measure ANY Object Area (cm²) using YOLO26 + ArUco") as demo: gr.Markdown( """ # Measure ANY object projected area (cm²) using YOLO26 + ArUco **What you get** - Left image: original photo with detected ArUco marker(s) + IDs - Right image: rectified (flattened) view with the chosen marker (blue) and measured object mask (red) **How to use** 1) Put object + printed ArUco marker on the same flat paper 2) Upload photo 3) Enter the **real printed marker side** (measure with a ruler, e.g. 4.7 cm if printing shrank it) 4) (Optional) Type class filter (COCO name). Leave blank = “largest object of any class” 5) Click **Measure** """ ) inp = gr.Image(type="pil", label="Upload photo (object + ArUco marker)") with gr.Accordion("Settings", open=True): model_name = gr.Textbox(value=DEFAULT_MODEL, label="Model weights (e.g. yolo26n-seg.pt)") marker_side_cm = gr.Number(value=4.7, label="Printed marker side (cm) — measure with ruler") px_per_cm = gr.Slider(60, 200, value=120, step=5, label="Rectified resolution (px per cm)") aruco_dict = gr.Dropdown( choices=["DICT_4X4_50", "DICT_5X5_100", "DICT_6X6_250"], value="DICT_4X4_50", label="ArUco dictionary (must match what you printed)" ) marker_id = gr.Number(value=-1, precision=0, label="Marker ID (-1 = auto pick largest)") class_filter_text = gr.Textbox( value="", label="Class filter (optional, COCO name). Examples: 'bottle' or 'cup, bottle'. Leave blank = ANY class" ) selection_mode = gr.Radio( choices=["largest", "union"], value="largest", label="If multiple matches: measure largest instance OR union of all" ) with gr.Row(): conf = gr.Slider(0.05, 0.80, value=0.25, step=0.01, label="YOLO confidence") iou = gr.Slider(0.10, 0.90, value=0.70, step=0.01, label="YOLO IoU") retina_masks = gr.Checkbox(value=True, label="retina_masks (often improves mask alignment)") btn = gr.Button("Measure object area", variant="primary") out_img = gr.Image(type="numpy", label="Side-by-side output (left original marker detection, right rectified measurement)") out_txt = gr.Textbox(label="Results (and full errors if something crashes)", lines=20) btn.click( fn=safe_measure, inputs=[inp, model_name, marker_side_cm, px_per_cm, aruco_dict, marker_id, conf, iou, retina_masks, class_filter_text, selection_mode], outputs=[out_img, out_txt] ) # show_error helps surface errors when debugging :contentReference[oaicite:9]{index=9} demo.launch(share=True, debug=True, show_error=True)