ArUco_YoLo26 / app.py
lyimo's picture
Update app.py
17df435 verified
import os
import traceback
import numpy as np
import gradio as gr
from PIL import Image
# Close previous demos (helps in notebooks)
gr.close_all()
os.environ["GRADIO_DEBUG"] = "1"
# -----------------------------
# OpenCV (headless-safe) + patch for Ultralytics import
# -----------------------------
import cv2
# Ultralytics may reference cv2.imshow during import; headless OpenCV may not have it.
if not hasattr(cv2, "imshow"):
def _noop(*args, **kwargs): return None
cv2.imshow = _noop
cv2.waitKey = _noop
cv2.destroyAllWindows = _noop
# -----------------------------
# Ultralytics YOLO
# -----------------------------
from ultralytics import YOLO
DEFAULT_MODEL = "yolo26n-seg.pt" # YOLO26 segmentation weights use -seg suffix :contentReference[oaicite:4]{index=4}
# Cache models so they don't reload every click
_MODEL_CACHE = {}
def get_model(model_name: str):
name = model_name.strip()
if name not in _MODEL_CACHE:
_MODEL_CACHE[name] = YOLO(name)
return _MODEL_CACHE[name]
# -----------------------------
# ArUco helpers (new + old OpenCV APIs)
# -----------------------------
def get_aruco_dictionary(dict_name: str):
if not hasattr(cv2, "aruco"):
raise RuntimeError("cv2.aruco missing. Install opencv-contrib-python-headless.")
aruco = cv2.aruco
if not hasattr(aruco, dict_name):
raise ValueError(f"Unknown ArUco dictionary: {dict_name}")
return aruco.getPredefinedDictionary(getattr(aruco, dict_name))
def detect_markers(gray_img: np.ndarray, dictionary):
"""Detect ArUco markers using new API if available, else old API."""
aruco = cv2.aruco
# New API
if hasattr(aruco, "ArucoDetector") and hasattr(aruco, "DetectorParameters"):
params = aruco.DetectorParameters()
detector = aruco.ArucoDetector(dictionary, params)
corners_list, ids, rejected = detector.detectMarkers(gray_img)
return corners_list, ids, rejected
# Old API
if hasattr(aruco, "detectMarkers"):
params = aruco.DetectorParameters_create() if hasattr(aruco, "DetectorParameters_create") else None
corners_list, ids, rejected = aruco.detectMarkers(gray_img, dictionary, parameters=params)
return corners_list, ids, rejected
raise RuntimeError("No compatible ArUco detection API found.")
def order_corners_4pts(pts):
"""Order 4 points: top-left, top-right, bottom-right, bottom-left."""
pts = np.asarray(pts, dtype=np.float32)
s = pts.sum(axis=1)
d = np.diff(pts, axis=1).reshape(-1)
tl = pts[np.argmin(s)]
br = pts[np.argmax(s)]
tr = pts[np.argmin(d)]
bl = pts[np.argmax(d)]
return np.array([tl, tr, br, bl], dtype=np.float32)
def choose_marker(corners_list, ids, marker_id: int | None):
"""Use marker_id if provided; else choose largest marker."""
ids_list = ids.flatten().tolist()
if marker_id is not None and marker_id >= 0:
if marker_id not in ids_list:
raise ValueError(f"Detected marker IDs: {ids_list}, but marker_id={marker_id} not found.")
i = ids_list.index(marker_id)
c = corners_list[i][0].astype(np.float32)
return order_corners_4pts(c), ids_list[i], ids_list
best_i, best_score = 0, -1.0
for i in range(len(ids_list)):
c = order_corners_4pts(corners_list[i][0].astype(np.float32))
edges = [
np.linalg.norm(c[0] - c[1]),
np.linalg.norm(c[1] - c[2]),
np.linalg.norm(c[2] - c[3]),
np.linalg.norm(c[3] - c[0]),
]
score = float(np.mean(edges))
if score > best_score:
best_score = score
best_i = i
c = corners_list[best_i][0].astype(np.float32)
return order_corners_4pts(c), ids_list[best_i], ids_list
def rectify_using_marker(rgb_img: np.ndarray, marker_corners_src: np.ndarray,
marker_side_cm: float, px_per_cm: int):
"""
Rectify (flatten) using marker corners.
In rectified image: 1 cm = px_per_cm pixels.
"""
H_img, W_img = rgb_img.shape[:2]
src = order_corners_4pts(marker_corners_src)
side_px = float(marker_side_cm * px_per_cm)
dst = np.array([[0, 0], [side_px, 0], [side_px, side_px], [0, side_px]], dtype=np.float32)
H = cv2.getPerspectiveTransform(src, dst)
# big canvas to avoid cropping objects
img_corners = np.array([[0, 0], [W_img, 0], [W_img, H_img], [0, H_img]], dtype=np.float32).reshape(-1, 1, 2)
warped_corners = cv2.perspectiveTransform(img_corners, H).reshape(-1, 2)
min_xy = warped_corners.min(axis=0)
max_xy = warped_corners.max(axis=0)
tx = -min_xy[0] if min_xy[0] < 0 else 0.0
ty = -min_xy[1] if min_xy[1] < 0 else 0.0
T = np.array([[1, 0, tx], [0, 1, ty], [0, 0, 1]], dtype=np.float32)
H_total = T @ H
out_w = int(np.ceil(max_xy[0] + tx))
out_h = int(np.ceil(max_xy[1] + ty))
out_w = max(out_w, int(side_px) + 80)
out_h = max(out_h, int(side_px) + 80)
rectified = cv2.warpPerspective(rgb_img, H_total, (out_w, out_h), flags=cv2.INTER_LINEAR)
marker_rect = cv2.perspectiveTransform(src.reshape(-1, 1, 2), H_total).reshape(-1, 2)
return rectified, H_total, marker_rect
# -----------------------------
# Mask + drawing helpers
# -----------------------------
def build_mask_from_xy(polys_xy, h, w):
"""
Build a full-size boolean mask from polygon(s) in pixel coordinates.
Ultralytics masks.xy provides polygon outlines (pixels). :contentReference[oaicite:5]{index=5}
"""
m = np.zeros((h, w), dtype=np.uint8)
for poly in polys_xy:
if poly is None or len(poly) < 3:
continue
pts = np.asarray(poly, dtype=np.float32)
pts = np.clip(pts, [0, 0], [w - 1, h - 1]).astype(np.int32).reshape(-1, 1, 2)
cv2.fillPoly(m, [pts], 255)
return m.astype(bool)
def overlay_mask(img_rgb: np.ndarray, mask_bool: np.ndarray, color_rgb=(255, 0, 0), alpha=0.35):
out = img_rgb.copy()
color = np.array(color_rgb, dtype=np.uint8).reshape(1, 1, 3)
out[mask_bool] = (out[mask_bool].astype(np.float32) * (1 - alpha) + color.astype(np.float32) * alpha).astype(np.uint8)
return out
def draw_closed_poly(img_rgb: np.ndarray, pts_xy: np.ndarray, color_rgb=(0, 102, 255), thickness=6):
out = img_rgb.copy()
pts = pts_xy.astype(np.int32).reshape(-1, 1, 2)
bgr = (int(color_rgb[2]), int(color_rgb[1]), int(color_rgb[0]))
cv2.polylines(out, [pts], isClosed=True, color=bgr, thickness=thickness)
return out
def make_side_by_side(left_rgb: np.ndarray, right_rgb: np.ndarray, max_h=900):
"""Create a nice side-by-side image for confidence: left=marker detection, right=rectified+mask."""
def resize_to_h(img, h):
H, W = img.shape[:2]
scale = h / float(H)
new_w = int(round(W * scale))
return cv2.resize(img, (new_w, h), interpolation=cv2.INTER_AREA)
h_left = left_rgb.shape[0]
h_right = right_rgb.shape[0]
h = min(max_h, max(h_left, h_right))
L = resize_to_h(left_rgb, h)
R = resize_to_h(right_rgb, h)
gap = np.ones((h, 12, 3), dtype=np.uint8) * 255
return np.concatenate([L, gap, R], axis=1)
# -----------------------------
# Class filter parsing
# -----------------------------
def parse_class_filter(text: str):
"""
User can type:
- "" (empty) -> allow ANY class
- "cup" -> only cup
- "cup, bottle" -> cup OR bottle
"""
t = (text or "").strip().lower()
if not t:
return []
parts = [p.strip().lower() for p in t.split(",") if p.strip()]
return parts
def class_name_from_id(mdl, cid: int):
return mdl.names.get(int(cid), str(int(cid)))
def class_id_from_name(mdl, name: str):
# mdl.names is {id: "name"}
for k, v in mdl.names.items():
if str(v).lower() == name.lower():
return int(k)
return None
# -----------------------------
# Core measurement function
# -----------------------------
def measure_object_area(
image_pil,
model_name: str,
marker_side_cm: float,
px_per_cm: int,
aruco_dict_name: str,
marker_id: int,
conf: float,
iou: float,
retina_masks: bool,
class_filter_text: str,
selection_mode: str,
):
if image_pil is None:
raise gr.Error("Please upload an image first.")
if marker_side_cm <= 0:
raise gr.Error("marker_side_cm must be > 0. Measure the printed marker with a ruler (e.g., 4.7 cm).")
rgb = np.array(image_pil.convert("RGB"))
mdl = get_model(model_name)
# 1) Detect ArUco on original image
gray = cv2.cvtColor(rgb, cv2.COLOR_RGB2GRAY)
dictionary = get_aruco_dictionary(aruco_dict_name)
corners_list, ids, _ = detect_markers(gray, dictionary)
if ids is None or len(corners_list) == 0:
return rgb, (
"❌ ArUco NOT detected.\n\n"
"Tips:\n"
"- Ensure marker is fully visible\n"
"- Avoid blur and glare\n"
"- Confirm dictionary matches your printed marker\n"
)
chosen_corners, chosen_id, detected_ids = choose_marker(
corners_list, ids, None if marker_id < 0 else int(marker_id)
)
# Visual proof on original
aruco = cv2.aruco
vis_bgr = cv2.cvtColor(rgb, cv2.COLOR_RGB2BGR)
vis_bgr = aruco.drawDetectedMarkers(vis_bgr, corners_list, ids)
vis_orig = cv2.cvtColor(vis_bgr, cv2.COLOR_BGR2RGB)
# 2) Rectify original image (not the drawn one)
rectified, _, marker_rect = rectify_using_marker(rgb, chosen_corners, float(marker_side_cm), int(px_per_cm))
H, W = rectified.shape[:2]
# Base output (always show marker)
rect_out = draw_closed_poly(rectified, marker_rect, color_rgb=(0, 102, 255), thickness=6)
# 3) Run YOLO segmentation
# retina_masks=True can return masks.data matching original inference image size :contentReference[oaicite:6]{index=6}
pred_kwargs = dict(conf=float(conf), iou=float(iou), verbose=False, retina_masks=bool(retina_masks))
results = mdl.predict(rectified, **pred_kwargs)
r0 = results[0]
if r0.masks is None or r0.boxes is None or len(r0.boxes) == 0:
side = make_side_by_side(vis_orig, rect_out)
txt = (
"✅ ArUco detected and rectified (blue outline shows the marker used).\n"
"❌ No segmentation masks found.\n\n"
"Try:\n"
"- Better lighting\n"
"- Move object closer\n"
"- Lower confidence a bit\n\n"
f"Detected marker IDs: {detected_ids}\nUsed marker ID: {chosen_id}\n"
)
return side, txt
# Ultralytics: masks.xy returns polygons in pixel coords :contentReference[oaicite:7]{index=7}
polys_all = r0.masks.xy
cls = r0.boxes.cls
confs = r0.boxes.conf
cls_np = cls.cpu().numpy() if hasattr(cls, "cpu") else np.array(cls)
conf_np = confs.cpu().numpy() if hasattr(confs, "cpu") else np.array(confs)
# Filter by class names if user requested
wanted_names = parse_class_filter(class_filter_text) # empty -> allow any
wanted_ids = []
if wanted_names:
for nm in wanted_names:
cid = class_id_from_name(mdl, nm)
if cid is not None:
wanted_ids.append(cid)
if not wanted_ids:
available = sorted(set([str(v) for v in mdl.names.values()]))
return make_side_by_side(vis_orig, rect_out), (
"❌ Your class name(s) were not found in this model.\n\n"
"Tip: YOLO26-seg is pretrained on COCO (80 categories). :contentReference[oaicite:8]{index=8}\n"
"Try a COCO name like: person, bottle, cup, book, cell phone, chair...\n\n"
"If you want *any object*, leave the class filter empty."
)
# Build per-instance masks & areas
instances = []
for i in range(len(cls_np)):
cid = int(cls_np[i])
if wanted_ids and cid not in wanted_ids:
continue
if i >= len(polys_all):
continue
poly = polys_all[i]
polys = poly if isinstance(poly, (list, tuple)) else [poly]
m = build_mask_from_xy(polys, H, W)
area_px = int(np.count_nonzero(m))
if area_px == 0:
continue
instances.append({
"i": i,
"class_id": cid,
"class_name": class_name_from_id(mdl, cid),
"conf": float(conf_np[i]),
"mask": m,
"area_px": area_px
})
if not instances:
side = make_side_by_side(vis_orig, rect_out)
txt = (
"✅ ArUco detected + rectified.\n"
"❌ No masks left after filtering.\n\n"
"If you typed a class filter, try leaving it blank to measure the largest object of ANY class."
)
return side, txt
# Choose which mask(s) to measure
if selection_mode == "largest":
best = max(instances, key=lambda d: d["area_px"])
mask_final = best["mask"]
chosen_label = f"largest instance: {best['class_name']} (conf={best['conf']:.2f})"
area_px = best["area_px"]
else:
# Union of all selected instances
mask_final = np.zeros((H, W), dtype=bool)
for d in instances:
mask_final |= d["mask"]
area_px = int(np.count_nonzero(mask_final))
chosen_label = "union of all matching instances"
# Convert to cm² (projected area on the paper plane)
area_cm2 = area_px / float(px_per_cm * px_per_cm)
# Overlay
rect_out = overlay_mask(rect_out, mask_final, color_rgb=(255, 0, 0), alpha=0.35)
label = f"Area: {area_cm2:.2f} cm²"
cv2.putText(rect_out, label, (15, 40), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255), 3, cv2.LINE_AA)
# Side-by-side output
side = make_side_by_side(vis_orig, rect_out)
# Make a readable table of top instances by area
instances_sorted = sorted(instances, key=lambda d: d["area_px"], reverse=True)[:10]
lines = []
lines.append("Top detected instances (by pixel area):")
for d in instances_sorted:
a_cm2 = d["area_px"] / float(px_per_cm * px_per_cm)
lines.append(f" - {d['class_name']:<12} conf={d['conf']:.2f} area={a_cm2:.2f} cm²")
class_note = "ANY class (no filter)" if not wanted_names else f"Filter: {', '.join(wanted_names)}"
txt = (
"✅ Done!\n\n"
f"Measured: {chosen_label}\n"
f"{class_note}\n\n"
f"Projected area: {area_cm2:.2f} cm²\n\n"
+ "\n".join(lines) +
"\n\nMarker:\n"
f"- Detected IDs: {detected_ids}\n"
f"- Used ID: {chosen_id}\n"
f"- Marker side used: {float(marker_side_cm):.2f} cm\n"
f"- Rectified scale: {int(px_per_cm)} px/cm\n"
f"Model: {model_name}\n\n"
"Note: This is a 2D projected area on the paper plane (not true 3D surface area).\n"
)
return side, txt
# -----------------------------
# Safe wrapper: always show traceback in Results box
# -----------------------------
def safe_measure(*args):
try:
return measure_object_area(*args)
except gr.Error as e:
return None, f"❌ {str(e)}"
except Exception:
return None, "❌ Full error traceback:\n\n" + traceback.format_exc()
# -----------------------------
# Gradio UI
# -----------------------------
with gr.Blocks(title="Measure ANY Object Area (cm²) using YOLO26 + ArUco") as demo:
gr.Markdown(
"""
# Measure ANY object projected area (cm²) using YOLO26 + ArUco
**What you get**
- Left image: original photo with detected ArUco marker(s) + IDs
- Right image: rectified (flattened) view with the chosen marker (blue) and measured object mask (red)
**How to use**
1) Put object + printed ArUco marker on the same flat paper
2) Upload photo
3) Enter the **real printed marker side** (measure with a ruler, e.g. 4.7 cm if printing shrank it)
4) (Optional) Type class filter (COCO name). Leave blank = “largest object of any class”
5) Click **Measure**
"""
)
inp = gr.Image(type="pil", label="Upload photo (object + ArUco marker)")
with gr.Accordion("Settings", open=True):
model_name = gr.Textbox(value=DEFAULT_MODEL, label="Model weights (e.g. yolo26n-seg.pt)")
marker_side_cm = gr.Number(value=4.7, label="Printed marker side (cm) — measure with ruler")
px_per_cm = gr.Slider(60, 200, value=120, step=5, label="Rectified resolution (px per cm)")
aruco_dict = gr.Dropdown(
choices=["DICT_4X4_50", "DICT_5X5_100", "DICT_6X6_250"],
value="DICT_4X4_50",
label="ArUco dictionary (must match what you printed)"
)
marker_id = gr.Number(value=-1, precision=0, label="Marker ID (-1 = auto pick largest)")
class_filter_text = gr.Textbox(
value="",
label="Class filter (optional, COCO name). Examples: 'bottle' or 'cup, bottle'. Leave blank = ANY class"
)
selection_mode = gr.Radio(
choices=["largest", "union"],
value="largest",
label="If multiple matches: measure largest instance OR union of all"
)
with gr.Row():
conf = gr.Slider(0.05, 0.80, value=0.25, step=0.01, label="YOLO confidence")
iou = gr.Slider(0.10, 0.90, value=0.70, step=0.01, label="YOLO IoU")
retina_masks = gr.Checkbox(value=True, label="retina_masks (often improves mask alignment)")
btn = gr.Button("Measure object area", variant="primary")
out_img = gr.Image(type="numpy", label="Side-by-side output (left original marker detection, right rectified measurement)")
out_txt = gr.Textbox(label="Results (and full errors if something crashes)", lines=20)
btn.click(
fn=safe_measure,
inputs=[inp, model_name, marker_side_cm, px_per_cm, aruco_dict, marker_id, conf, iou, retina_masks, class_filter_text, selection_mode],
outputs=[out_img, out_txt]
)
# show_error helps surface errors when debugging :contentReference[oaicite:9]{index=9}
demo.launch(share=True, debug=True, show_error=True)