| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| from __future__ import annotations |
| import re |
| from typing import List, Tuple, Dict |
| import numpy as np |
| from PIL import Image |
|
|
| |
| |
| |
| |
| |
| CLASS_NAMES: List[str] = [ |
| "unlabeled", |
| "person","bicycle","car","motorcycle","airplane","bus","train","truck","boat", |
| "traffic light","fire hydrant","street sign","stop sign","parking meter","bench", |
| "bird","cat","dog","horse","sheep","cow","elephant","bear","zebra","giraffe", |
| "hat","backpack","umbrella","shoe","eyeglasses","handbag","tie","suitcase", |
| "frisbee","skis","snowboard","ball","kite","baseball_bat","baseball_glove", |
| "skateboard","surfboard","tennis_racket","bottle","plate","wine_glass","cup", |
| "fork","knife","spoon","bowl","banana","apple","sandwich","orange","broccoli", |
| "carrot","hot_dog","pizza","donut","cake","chair","couch","potted_plant","bed", |
| "mirror","dining_table","window","desk","toilet","door","tv","laptop","mouse", |
| "remote","keyboard","cell_phone","microwave","oven","toaster","sink","refrigerator", |
| "blender","book","clock","vase","scissors","teddy_bear","hair_dryer","toothbrush", |
| "hair_brush", |
|
|
| |
| "banner","blanket","branch","bridge","building-other","bush","cabinet","cage", |
| "cardboard","carpet","ceiling-other","ceiling-tile","cloth","clothes","clouds", |
| "counter","cupboard","curtain","desk-stuff","dirt","door-stuff","fence", |
| "floor-marble","floor-other","floor-stone","floor-tile","floor-wood","flower", |
| "fog","food-other","fruit","furniture-other","grass","gravel","ground-other", |
| "hill","house","leaves","light","mat","metal","mirror-stuff","moss","mountain", |
| "mud","napkin","net","paper","pavement","pillow","plant-other","plastic", |
| "platform","playingfield","railing","railroad","river","road","rock","roof","rug", |
| "salad","sand","sea","shelf","sky-other","skyscraper","snow","solid-other", |
| "stairs","stone","straw","structural-other","table","tent","textile-other", |
| "towel","tree","vegetable","wall-brick","wall-concrete","wall-other","wall-panel", |
| "wall-stone","wall-tile","wall-wood","water","waterdrops","window_blind", |
| "window","wood", |
| ] |
|
|
| |
| CLASS_NAMES = [re.sub(r"\s+", "_", n.strip().lower()) for n in CLASS_NAMES] |
|
|
| |
| |
| |
| ADE_151_CLASS_NAMES: List[str] = [ |
| "unlabeled","wall","building","blue_sky","floor","tree","ceiling","road","bed","window", |
| "grass","cabinet","sidewalk","person","ground","door","table","mountain","flora","curtain", |
| "chair","car","water","painting","sofa","shelf","house","sea","mirror","rug", |
| "field","armchair","seat","fence","desk","rock","wardrobe","lamp","bathtub","rail", |
| "cushion","pedestal","box","pillar","signboard","dresser","counter","sand","sink","skyscraper", |
| "fireplace","refrigerator","grandstand","path","stairs","runway","display","snooker","pillow","screen_door", |
| "stairway","river","bridge","bookcase","blind","tea_table","commode","flower","book","hill", |
| "bench","countertop","stove","palm_tree","kitchen","computer","swivel_chair","boat","bar","console", |
| "hovel","bus","towel","light","truck","tower","chandelier","sunshade","streetlight","booth", |
| "television","aeroplane","dirt","apparel","pole","land","bannister","escalator","ottoman","bottle", |
| "sideboard","poster","stage","van","ship","fountain","conveyer_belt","canopy","washer","plaything", |
| "swimming_pool","stool","barrel","basket","waterfall","tent","bag","motorcycle","cradle","oven", |
| "ball","food","stair","tank","marque","microwave","flowerpot","animal","bicycle","lake", |
| "dishwasher","projector","blanket","sculpture","exhaust","sconce","vase","traffic_light","tray","ashcan", |
| "fan","pier","screen","plate","monitor","notice_board","shower","radiator","glass","clock","flag", |
| ] |
| ADE_151_CLASS_NAMES = [n.strip().lower() for n in ADE_151_CLASS_NAMES] |
|
|
| |
| |
| |
| |
| NAMED_COLORS: Dict[str, Tuple[int, int, int]] = { |
| |
| "unlabeled": (0, 0, 0), |
|
|
| |
| "person": (220, 20, 60), |
| "human": (220, 20, 60), |
| "horse": (90, 60, 30), |
| "dog": (184, 134, 11), |
| "cat": (255, 160, 122), |
| "bird": (30, 144, 255), |
| "sheep": (245, 222, 179), |
| "cow": (139, 69, 19), |
| "elephant": (128, 128, 128), |
| "bear": (92, 64, 51), |
| "zebra": (200, 200, 200), |
| "giraffe": (218, 165, 32), |
|
|
| "bicycle": (60, 180, 75), |
| "car": (0, 90, 190), |
| "motorcycle": (255, 80, 80), |
| "airplane": (120, 120, 255), |
| "aeroplane": (120, 120, 255), |
| "bus": (255, 140, 0), |
| "train": (70, 130, 180), |
| "truck": (200, 120, 0), |
| "boat": (0, 120, 170), |
| "van": (80, 140, 220), |
| "ship": (30, 100, 160), |
|
|
| |
| "water": (64, 164, 223), |
| "river": (64, 164, 223), |
| "lake": (64, 164, 223), |
| "sea": (0, 105, 148), |
| "waterfall": (120, 170, 230), |
| "swimming_pool": (100, 200, 230), |
|
|
| "sky": (135, 206, 235), |
| "blue_sky": (135, 206, 235), |
| "clouds": (220, 230, 240), |
|
|
| "tree": (34, 139, 34), |
| "palm_tree": (44, 159, 44), |
| "flora": (52, 168, 83), |
| "flower": (233, 84, 150), |
| "grass": (76, 187, 23), |
| "leaves": (76, 187, 23), |
| "moss": (107, 142, 35), |
| "hill": (88, 120, 80), |
| "mountain": (96, 108, 118), |
|
|
| "sand": (194, 178, 128), |
| "ground": (120, 72, 48), |
| "land": (120, 72, 48), |
| "dirt": (115, 74, 53), |
| "mud": (110, 74, 57), |
| "rock": (101, 110, 120), |
| "stone": (112, 128, 144), |
|
|
| |
| "road": (128, 128, 128), |
| "sidewalk": (170, 170, 170), |
| "pavement": (150, 150, 150), |
| "path": (150, 150, 150), |
| "playingfield": (100, 180, 100), |
| "runway": (160, 160, 160), |
| "stairs": (145, 145, 145), |
| "stair": (145, 145, 145), |
| "stairway": (145, 145, 145), |
| "railroad": (100, 100, 100), |
| "bridge": (120, 120, 140), |
| "pier": (120, 120, 140), |
|
|
| |
| "building": (160, 160, 160), |
| "building-other": (160, 160, 160), |
| "house": (170, 160, 160), |
| "skyscraper": (120, 130, 140), |
| "roof": (150, 120, 100), |
| "wall": (180, 180, 180), |
| "wall-brick": (178, 34, 34), |
| "wall-concrete": (190, 190, 190), |
| "wall-other": (170, 170, 170), |
| "wall-panel": (160, 160, 160), |
| "wall-stone": (135, 135, 135), |
| "wall-tile": (200, 200, 200), |
| "wall-wood": (181, 101, 29), |
| "ceiling": (210, 210, 210), |
| "ceiling-other": (210, 210, 210), |
| "ceiling-tile": (220, 220, 220), |
| "door": (150, 120, 90), |
| "door-stuff": (150, 120, 90), |
| "window": (175, 215, 230), |
| "window_blind": (170, 210, 230), |
| "mirror": (210, 220, 230), |
| "mirror-stuff": (210, 220, 230), |
| "light": (255, 230, 140), |
| "streetlight": (240, 210, 120), |
| "tower": (140, 140, 160), |
|
|
| "fence": (189, 183, 107), |
| "railing": (170, 170, 150), |
| "pillar": (180, 180, 170), |
| "signboard": (255, 200, 80), |
| "poster": (255, 200, 140), |
| "traffic_light": (50, 205, 50), |
|
|
| |
| "chair": (205, 133, 63), |
| "armchair": (200, 120, 80), |
| "seat": (205, 133, 63), |
| "bench": (160, 120, 70), |
| "sofa": (160, 82, 45), |
| "stool": (175, 125, 80), |
| "table": (181, 101, 29), |
| "dining_table": (181, 101, 29), |
| "desk": (170, 100, 40), |
| "desk-stuff": (170, 100, 40), |
| "bed": (180, 130, 100), |
| "cabinet": (145, 110, 70), |
| "cupboard": (145, 110, 70), |
| "wardrobe": (130, 90, 60), |
| "dresser": (135, 95, 65), |
| "sideboard": (135, 95, 65), |
| "shelf": (140, 105, 65), |
|
|
| "carpet": (150, 80, 60), |
| "rug": (150, 80, 60), |
| "curtain": (200, 180, 160), |
| "pillow": (230, 200, 170), |
| "cushion": (230, 200, 170), |
| "blanket": (200, 170, 150), |
| "towel": (220, 220, 200), |
|
|
| "kitchen": (170, 170, 160), |
| "counter": (150, 140, 130), |
| "countertop": (160, 150, 140), |
| "sink": (200, 210, 220), |
| "stove": (140, 140, 140), |
| "oven": (140, 140, 150), |
| "microwave": (155, 160, 170), |
| "dishwasher": (190, 200, 210), |
| "washer": (190, 200, 210), |
| "refrigerator": (200, 220, 235), |
|
|
| |
| "television": (70, 100, 160), |
| "tv": (70, 100, 160), |
| "monitor": (70, 100, 160), |
| "screen": (70, 100, 160), |
| "screen_door": (170, 210, 230), |
| "projector": (100, 120, 160), |
| "laptop": (70, 100, 160), |
| "keyboard": (70, 90, 120), |
| "mouse": (80, 80, 90), |
| "remote": (60, 60, 70), |
| "cell_phone": (100, 120, 140), |
|
|
| |
| "vase": (186, 85, 211), |
| "flowerpot": (170, 100, 60), |
| "lamp": (255, 230, 140), |
| "chandelier": (255, 220, 120), |
| "sconce": (255, 225, 140), |
|
|
| |
| "paper": (240, 240, 220), |
| "plastic": (200, 200, 220), |
| "metal": (180, 180, 190), |
| "cloth": (220, 200, 190), |
| "textile-other": (220, 200, 190), |
| "glass": (200, 220, 240), |
| "wood": (181, 101, 29), |
|
|
| |
| "banana": (255, 225, 53), |
| "apple": (220, 30, 30), |
| "sandwich": (222, 184, 135), |
| "orange": (255, 165, 0), |
| "broccoli": (67, 160, 71), |
| "carrot": (255, 127, 80), |
| "pizza": (255, 180, 100), |
| "donut": (210, 180, 140), |
| "cake": (255, 218, 185), |
| "hot_dog": (204, 102, 0), |
| "salad": (143, 188, 143), |
| "fruit": (255, 160, 122), |
| "vegetable": (85, 139, 47), |
| "food-other": (200, 160, 120), |
| "food": (200, 160, 120), |
|
|
| |
| "bottle": (135, 206, 250), |
| "plate": (245, 245, 245), |
| "wine_glass": (230, 230, 250), |
| "cup": (250, 250, 250), |
| "fork": (192, 192, 192), |
| "knife": (192, 192, 192), |
| "spoon": (192, 192, 192), |
| "bowl": (255, 239, 213), |
| "bag": (170, 120, 70), |
| "box": (170, 120, 70), |
| "barrel": (165, 105, 58), |
| "basket": (170, 120, 70), |
| "tray": (210, 210, 210), |
|
|
| |
| "banner": (255, 215, 0), |
| "flag": (220, 20, 60), |
|
|
| |
| "booth": (160, 160, 160), |
| "display": (100, 120, 160), |
| "notice_board": (210, 180, 140), |
| "signboard": (255, 200, 80), |
| } |
|
|
| |
| |
| |
| |
| _ALIASES: Dict[str, str] = { |
| "human": "person", "humans": "person", "man": "person", "men": "person", |
| "woman": "person", "women": "person", "people": "person", |
|
|
| "tv": "television", "tv_monitor": "television", "monitor_tv": "television", |
|
|
| "cell phone": "cell_phone", "cellphone": "cell_phone", "mobile": "cell_phone", "phone": "cell_phone", |
| "teddy bear": "teddy_bear", "wine glass": "wine_glass", "baseball bat": "baseball_bat", |
| "baseball glove": "baseball_glove", "tennis racket": "tennis_racket", |
|
|
| "blue sky": "blue_sky", "traffic light": "traffic_light", "water fall": "waterfall", |
| "window blind": "window_blind", "street light": "streetlight", |
|
|
| |
| "aeroplane": "airplane", |
| } |
|
|
| def _normalize_token(s: str) -> str: |
| s = s.strip().lower() |
| s = re.sub(r"[_\-]+", " ", s) |
| s = re.sub(r"\s+", " ", s) |
| s = _ALIASES.get(s, s) |
| s = s.replace(" ", "_") |
| return s |
|
|
| def _resolve_prompt_item_to_names(item: str) -> List[str]: |
| """ |
| Turn one prompt item into one or more canonical names. |
| Splits ONLY on 'and' as a WORD, or on &, /, + (with optional spaces). |
| Critically, it won't split inside words like 'sand'. |
| """ |
| norm = item.strip() |
| parts = re.split(r"\s*(?:\band\b|&|/|\+)\s*", norm, flags=re.IGNORECASE) |
| out: List[str] = [] |
| for p in parts: |
| tok = _normalize_token(p) |
| if not tok: |
| continue |
| if tok in ("background", "unlabeled"): |
| tok = "unlabeled" |
| out.append(tok) |
| return out if out else ["unlabeled"] |
|
|
| |
| |
| |
| def _infer_color_from_name(name: str) -> Tuple[int, int, int]: |
| """Heuristic fallback: choose a sensible color by keyword.""" |
| n = name.lower().replace("_", " ") |
| def c(r,g,b): return (r, g, b) |
|
|
| |
| if "blue sky" in n or ("sky" in n and "blue" in n): return c(135,206,235) |
| if "sky" in n: return c(135,206,235) |
| if any(k in n for k in ["sea","ocean"]): return c(0,105,148) |
| if any(k in n for k in ["river","lake","waterfall","pool"]): return c(64,164,223) |
| if "water" in n: return c(64,164,223) |
|
|
| |
| if any(k in n for k in ["tree","palm","flora","grass","plant","field","hill","land"]): return c(52,168,83) |
| if any(k in n for k in ["sand","beach","desert"]): return c(194,178,128) |
| if any(k in n for k in ["ground","dirt","soil","mud"]): return c(120,72,48) |
| if any(k in n for k in ["rock","mountain","stone","skyscraper"]): return c(120,130,140) |
|
|
| |
| if any(k in n for k in ["road","street","sidewalk","path","runway","stairs","stair"]): return c(150,150,150) |
| if "railroad" in n: return c(100,100,100) |
|
|
| |
| if any(k in n for k in ["person","people","human"]): return c(220,20,60) |
| if any(k in n for k in ["car","truck","van","bus"]): return c(0,90,190) |
| if any(k in n for k in ["bicycle","bike","motorcycle"]): return c(60,180,75) |
| if any(k in n for k in ["boat","ship","ferry"]): return c(0,120,170) |
| if any(k in n for k in ["aeroplane","airplane","aircraft"]): return c(120,120,255) |
|
|
| |
| if any(k in n for k in ["building","house","wall","ceiling","door","window","bridge","tower"]): return c(170,170,170) |
|
|
| |
| if any(k in n for k in ["sofa","chair","stool","bench","table","desk","bed","cabinet","wardrobe","dresser","shelf"]): return c(181,101,29) |
|
|
| |
| if any(k in n for k in ["television","monitor","computer","screen","projector","tv"]): return c(70,100,160) |
| if any(k in n for k in ["lamp","light","chandelier","sconce","streetlight"]): return c(255,230,140) |
|
|
| |
| if "mirror" in n or "glass" in n: return c(200, 220, 240) |
|
|
| |
| if any(k in n for k in ["flower","vase","sculpture","poster","painting","flag"]): return c(186,85,211) |
|
|
| |
| if any(k in n for k in ["bag","bottle","barrel","basket","box"]): return c(170,120,70) |
|
|
| |
| if any(k in n for k in ["kitchen","sink","stove","oven","microwave","dishwasher","washer","refrigerator","counter","countertop"]): |
| return c(175,185,195) |
|
|
| |
| return c(128, 128, 128) |
|
|
| def _color_for_name(name: str) -> Tuple[int, int, int]: |
| key = _normalize_token(name) |
| if key in NAMED_COLORS: |
| return NAMED_COLORS[key] |
| |
| alias_back = _ALIASES.get(name.lower(), None) |
| if alias_back and alias_back in NAMED_COLORS: |
| return NAMED_COLORS[alias_back] |
| return _infer_color_from_name(key) |
|
|
| |
| |
| |
| def _build_lut_for_names(names: List[str]) -> np.ndarray: |
| lut = np.zeros((len(names), 3), dtype=np.uint8) |
| for i, n in enumerate(names): |
| lut[i] = _color_for_name(n) |
| return lut |
|
|
| _COCO_LUT: np.ndarray | None = None |
| _ADE_LUT: np.ndarray | None = None |
|
|
| def _palette_for_dataset(dataset: str) -> np.ndarray: |
| """Return [N,3] palette for dataset: 'coco' or 'ade'.""" |
| global _COCO_LUT, _ADE_LUT |
| if dataset == "ade": |
| if _ADE_LUT is None: |
| _ADE_LUT = _build_lut_for_names(ADE_151_CLASS_NAMES) |
| return _ADE_LUT |
| |
| if _COCO_LUT is None: |
| _COCO_LUT = _build_lut_for_names(CLASS_NAMES) |
| return _COCO_LUT |
|
|
| def _palette_for_prompt_classes(classes: List[str]) -> np.ndarray: |
| """ |
| Build a per-request palette given a prompt class list. |
| Index 0 is treated as 'unlabeled' (background) if present. |
| Supports entries like 'human and horse' -> average of person + horse. |
| """ |
| n = len(classes) |
| pal = np.zeros((n, 3), dtype=np.uint8) |
| for idx, raw in enumerate(classes): |
| if idx == 0: |
| pal[idx] = np.array(NAMED_COLORS.get("unlabeled", (0, 0, 0)), dtype=np.uint8) |
| continue |
|
|
| names = _resolve_prompt_item_to_names(raw) |
| |
| canon_names = [ _ALIASES.get(n.replace("_"," "), n).replace(" ", "_") for n in names ] |
| |
| cols = [ np.array(_color_for_name(n), dtype=np.float32) for n in canon_names ] |
| if len(cols) == 0: |
| rgb = np.array((128,128,128), dtype=np.float32) |
| else: |
| rgb = np.mean(cols, axis=0) |
| pal[idx] = np.clip(rgb, 0, 255).astype(np.uint8) |
| return pal |
|
|
| |
| def _display_name_for_prompt_item(item: str) -> str: |
| names = _resolve_prompt_item_to_names(item) |
| if not names: |
| return "unlabeled" |
| disp = [] |
| for n in names: |
| if n in ("background", "unlabeled"): |
| disp.append("unlabeled") |
| else: |
| |
| nn = _ALIASES.get(n.replace("_", " "), n).replace(" ", "_") |
| disp.append(nn) |
| return "+".join(disp) |
|
|
| |
| |
| |
| def colorize_mask(mask_tensor, classes: List[str] | None = None, dataset: str | None = None) -> Image.Image: |
| """ |
| Colorize a [H,W] mask. |
| - If `classes` is provided (prompt mode), use prompt palette: |
| index 0 is background (unlabeled), others per item or averaged |
| - Else, choose dataset palette: 'ade' (151) or default 'coco' (183) |
| """ |
| mask = np.array(mask_tensor, dtype=np.int32) |
| h, w = mask.shape |
|
|
| if classes is not None: |
| pal = _palette_for_prompt_classes(classes) |
| else: |
| pal = _palette_for_dataset("ade" if dataset == "ade" else "coco") |
|
|
| color = np.zeros((h, w, 3), dtype=np.uint8) |
| valid = (mask >= 0) & (mask < pal.shape[0]) |
| color[valid] = pal[mask[valid]] |
| return Image.fromarray(color, mode="RGB") |
|
|
|
|
| def overlay_mask(image: Image.Image, color: Image.Image, alpha: float = 0.5) -> Image.Image: |
| if color.size != image.size: |
| color = color.resize(image.size, resample=Image.NEAREST) |
| return Image.blend(image.convert("RGB"), color.convert("RGB"), alpha) |
|
|
|
|
| def build_legend_from_mask(mask_tensor, classes: List[str] | None = None, dataset: str | None = None): |
| """ |
| Build a compact legend for the classes PRESENT in the mask. |
| Returns a list of entries: {'index': int, 'name': str, 'color': [r,g,b]} |
| - In prompt mode, names are prompt-derived (with '+' for multi-terms) |
| - In dataset mode, names come from the dataset class list (COCO or ADE) |
| """ |
| mask = np.array(mask_tensor, dtype=np.int64) |
| present = np.unique(mask[(mask >= 0)]) |
|
|
| legend: List[Dict] = [] |
| if classes is not None: |
| pal = _palette_for_prompt_classes(classes) |
| for idx in present: |
| if 0 <= idx < pal.shape[0]: |
| raw_item = classes[int(idx)] if int(idx) < len(classes) else "unlabeled" |
| try: |
| name = _display_name_for_prompt_item(raw_item) |
| except Exception: |
| name = str(raw_item) |
| col = pal[int(idx)] |
| legend.append({ |
| "index": int(idx), |
| "name": name, |
| "color": [int(col[0]), int(col[1]), int(col[2])], |
| }) |
| else: |
| if dataset == "ade": |
| names = ADE_151_CLASS_NAMES |
| pal = _palette_for_dataset("ade") |
| else: |
| names = CLASS_NAMES |
| pal = _palette_for_dataset("coco") |
|
|
| for idx in present: |
| if 0 <= idx < len(names): |
| col = pal[int(idx)] |
| legend.append({ |
| "index": int(idx), |
| "name": names[int(idx)], |
| "color": [int(col[0]), int(col[1]), int(col[2])], |
| }) |
|
|
| legend.sort(key=lambda e: (0 if e["index"] == 0 else 1, e["index"])) |
| return legend |
|
|