zoo3d / exts /cropformer_runner.py
drozdgk's picture
init
4eeefd1
import os
import sys
import glob
import numpy as np
import cv2
from typing import List, Optional
"""
Lightweight runner for CropFormer/Mask2Former inference without spawning a process.
Keeps a global singleton VisualizationDemo so the model is initialized only once.
"""
# Insert CropFormer project into sys.path only once
_CROPF_DIR = None
def make_cropformer_dir(MK_PATH: str) -> str:
global _CROPF_DIR
_CROPF_DIR = os.path.join(MK_PATH, "third_party/detectron2/projects/CropFormer")
if _CROPF_DIR not in sys.path:
sys.path.insert(0, _CROPF_DIR)
sys.path.insert(0, os.path.join(_CROPF_DIR, "demo_cropformer"))
# Globals (singleton)
_DEMO = None
_CFG_KEY = None # (config_file_abs, tuple(opts))
def _build_key(config_file: str, opts: Optional[List[str]]) -> tuple:
return (os.path.abspath(config_file), tuple(opts) if opts else ())
def preload_cropformer_model(config_file: str, opts: Optional[List[str]] = None) -> bool:
"""
Public helper to initialize the model once at script startup.
Returns True if initialized or already available.
"""
_ensure_demo(config_file, opts)
return True
def _ensure_demo(config_file: str, opts: Optional[List[str]]):
"""
Build or reuse a global VisualizationDemo for given config/options.
"""
global _DEMO, _CFG_KEY
key = _build_key(config_file, opts)
if _DEMO is not None and _CFG_KEY == key:
return _DEMO
# Lazy imports to avoid import cost at module import time
from detectron2.config import get_cfg
from detectron2.projects.deeplab import add_deeplab_config
from mask2former import add_maskformer2_config
from predictor import VisualizationDemo
cfg = get_cfg()
add_deeplab_config(cfg)
add_maskformer2_config(cfg)
cfg.merge_from_file(config_file)
if opts:
cfg.merge_from_list(opts)
cfg.freeze()
_DEMO = VisualizationDemo(cfg)
_CFG_KEY = key
return _DEMO
def run_cropformer_mask_predict(
config_file: str,
root: str,
image_path_pattern: str,
dataset: str,
seq_name_list: str,
confidence_threshold: float = 0.5,
opts: Optional[List[str]] = None,
) -> None:
"""
Run CropFormer/Mask2Former demo (mask_predict) logic directly from Python.
Writes mask PNGs into {root}/{seq}/output/mask (or special matterport3d path).
"""
from detectron2.data.detection_utils import read_image
import torch
demo = _ensure_demo(config_file, opts)
# Support multiple sequences joined by '+'
seq_names = seq_name_list.split("+")
for seq_name in seq_names:
seq_dir = os.path.join(root, seq_name)
image_list = sorted(glob.glob(os.path.join(seq_dir, image_path_pattern)))
if dataset == "matterport3d":
output_dir = os.path.join(seq_dir, seq_name, "output/mask")
else:
output_dir = os.path.join(seq_dir, "output/mask")
os.makedirs(output_dir, exist_ok=True)
for path in image_list:
# Read BGR image as in original demo
img = read_image(path, format="BGR")
predictions = demo.run_on_image(img)
pred_masks = predictions["instances"].pred_masks
pred_scores = predictions["instances"].scores
# Select by threshold
selected_indexes = (pred_scores >= confidence_threshold)
selected_scores = pred_scores[selected_indexes]
selected_masks = pred_masks[selected_indexes]
if selected_masks.numel() == 0:
# Still write an empty mask to keep pipeline consistent
h, w = img.shape[:2]
cv2.imwrite(
os.path.join(output_dir, os.path.basename(path).split(".")[0] + ".png"),
np.zeros((h, w), dtype=np.uint8),
)
continue
_, m_H, m_W = selected_masks.shape
mask_image = np.zeros((m_H, m_W), dtype=np.uint8)
# Rank by score (ascending as in original script)
mask_id = 1
selected_scores, ranks = torch.sort(selected_scores)
for index in ranks:
num_pixels = torch.sum(selected_masks[index])
if num_pixels < 400:
# ignore small masks
continue
mask_image[(selected_masks[index] == 1).cpu().numpy()] = mask_id
mask_id += 1
cv2.imwrite(
os.path.join(output_dir, os.path.basename(path).split(".")[0] + ".png"),
mask_image,
)