unwritten2 / TensorRTBBoxDetector.py
saliacoel's picture
Upload TensorRTBBoxDetector.py
202cca2 verified
import os
import logging
from typing import List, Tuple
import torch
import numpy as np
from ultralytics import YOLO
# Impact Pack (for SEG and SEGS helpers)
import impact.core as core
from impact.core import SEG
# Optional: TensorRT sanity check
try:
import tensorrt as trt # type: ignore
except Exception:
trt = None
# Local helpers
try:
# If this folder is a package (has __init__.py), use relative import
from .utils_salia import (
NODE_DIR,
IMGSZ,
list_local_pt_files,
tensor_to_pil,
make_crop_region,
crop_image,
crop_ndarray2,
dilate_mask,
)
except ImportError:
# Fallback: direct import if utils_salia is on sys.path
from utils_salia import (
NODE_DIR,
IMGSZ,
list_local_pt_files,
tensor_to_pil,
make_crop_region,
crop_image,
crop_ndarray2,
dilate_mask,
)
logger = logging.getLogger(__name__)
# -------------------------------------------------------------------------
# YOLO TensorRT-based BBOX_DETECTOR implementation
# -------------------------------------------------------------------------
class TRTYOLOBBoxDetector:
"""
BBOX_DETECTOR interface compatible with Impact Pack / FaceDetailer.
Methods required:
- setAux(x)
- detect(image, threshold, dilation, crop_factor, drop_size=1, detailer_hook=None)
- detect_combined(image, threshold, dilation)
"""
def __init__(self, yolo_model: YOLO, device: str = "0"):
self.bbox_model = yolo_model
# Ultralytics accepts "0" or "cuda:0"; normalize a bit:
if device in ("0", "cuda", "cuda:0"):
self.device = "0"
else:
self.device = str(device)
# FaceDetailer calls setAux('face'); we keep it for compatibility.
self.aux = None
def setAux(self, x):
# Kept for API compatibility. You could use this
# later to filter by specific labels/classes.
self.aux = x
def detect(
self,
image: torch.Tensor,
threshold: float,
dilation: int,
crop_factor: float,
drop_size: int = 1,
detailer_hook=None,
) -> Tuple[Tuple[int, int], List[SEG]]:
"""
Main detection method used by FaceDetailer.
Args:
image: ComfyUI IMAGE tensor [B, H, W, C] in 0..1
threshold: confidence threshold
dilation: mask dilation in pixels
crop_factor: expansion factor for bbox when computing crop_region
drop_size: minimum bbox width/height to keep
detailer_hook: optional hook with post_crop_region / post_detection
Returns:
SEGS tuple: ( (H, W), [SEG, SEG, ...] )
"""
if image.dim() != 4:
raise ValueError(
"[TRTYOLOBBoxDetector] Expected IMAGE tensor with 4 dims [B, H, W, C]."
)
if image.shape[0] != 1:
logger.warning(
"[TRTYOLOBBoxDetector] Batch > 1 detected, using only the first image for detection."
)
image = image[:1]
h, w = int(image.shape[1]), int(image.shape[2])
shape = (h, w)
# -----------------------------------------------------------------
# Run YOLO TensorRT via Ultralytics wrapper
# -----------------------------------------------------------------
pil_img = tensor_to_pil(image) # should return a single PIL image for B=1
# Ultralytics chooses TensorRT backend automatically when you pass an .engine
# model to YOLO(). Here we only set device & threshold.
pred_list = self.bbox_model(
pil_img,
conf=threshold,
device=self.device,
verbose=False,
)
if len(pred_list) == 0:
return (shape, [])
pred = pred_list[0]
boxes = pred.boxes
if boxes is None or boxes.xyxy is None or boxes.xyxy.shape[0] == 0:
return (shape, [])
xyxy = boxes.xyxy.cpu().numpy() # [N, 4] (x1, y1, x2, y2)
confs = boxes.conf.cpu().numpy()
clses = boxes.cls.cpu().numpy().astype(int)
names = pred.names # dict: class_index -> class_name
seg_items: List[SEG] = []
for i in range(xyxy.shape[0]):
x1, y1, x2, y2 = xyxy[i]
score = float(confs[i])
cls_id = int(clses[i])
label = names.get(cls_id, str(cls_id))
box_w = x2 - x1
box_h = y2 - y1
if box_w <= drop_size or box_h <= drop_size:
continue
# Clamp bbox to image bounds
x1_i = max(int(np.floor(x1)), 0)
y1_i = max(int(np.floor(y1)), 0)
x2_i = min(int(np.ceil(x2)), w)
y2_i = min(int(np.ceil(y2)), h)
if x2_i <= x1_i or y2_i <= y1_i:
continue
# Rectangular mask from bbox, uint8 0..255
mask = np.zeros((h, w), dtype=np.uint8)
mask[y1_i:y2_i, x1_i:x2_i] = 255
# Optional dilation
if dilation != 0:
mask = dilate_mask(mask, dilation)
# Impact bbox order here is (x1, y1, x2, y2)
item_bbox = [float(x1), float(y1), float(x2), float(y2)]
# Compute crop region from bbox and crop_factor
crop_region = make_crop_region(w, h, item_bbox, crop_factor)
if detailer_hook is not None and hasattr(detailer_hook, "post_crop_region"):
crop_region = detailer_hook.post_crop_region(
w, h, item_bbox, crop_region
)
# Crop image + mask
cropped_image = crop_image(image, crop_region) # torch [1, h', w', C]
cropped_mask = crop_ndarray2(mask, crop_region) # np.uint8 [h', w']
seg = SEG(
cropped_image,
cropped_mask,
score,
crop_region,
item_bbox,
label,
None, # control_net_wrapper
)
seg_items.append(seg)
segs = (shape, seg_items)
if detailer_hook is not None and hasattr(detailer_hook, "post_detection"):
segs = detailer_hook.post_detection(segs)
return segs
def detect_combined(
self,
image: torch.Tensor,
threshold: float,
dilation: int,
) -> torch.Tensor:
"""
Optional helper API: returns a combined MASK of all detections.
"""
segs = self.detect(
image=image,
threshold=threshold,
dilation=dilation,
crop_factor=1.0,
drop_size=1,
detailer_hook=None,
)
return core.segs_to_combined_mask(segs)
# -------------------------------------------------------------------------
# NODE 1: TRTYOLOEngineBuilder
# - Builds TensorRT engine from a .pt file sitting next to this .py
# imgsz = IMGSZ (H, W) from utils_salia
# batch = 1
# half = True (FP16)
# device = "0"
# overwrite (exist_ok) = True
# -------------------------------------------------------------------------
class TRTYOLOEngineBuilder:
@classmethod
def INPUT_TYPES(cls):
pt_files = list_local_pt_files()
default_name = pt_files[0] if pt_files else "face.pt"
return {
"required": {
"pt_model_name": (
pt_files if pt_files else ["face.pt"],
{
"default": default_name,
"tooltip": (
"Select a YOLO .pt file that lives in the SAME folder as this node file.\n"
"Example: 'face.pt' next to TensorRTBBoxDetector.py"
),
},
),
}
}
RETURN_TYPES = ("STRING",)
RETURN_NAMES = ("engine_path",)
FUNCTION = "build"
CATEGORY = "ImpactPack/TensorRT"
def _check_tensorrt_available(self):
"""
Optional: preflight check to give a clearer error message if TensorRT
cannot initialize (instead of a raw pybind11::init() error deep inside Ultralytics).
"""
if trt is None:
raise RuntimeError(
"[TRTYOLOEngineBuilder] TensorRT Python package is not available. "
"Install it via pip (cu12 build) or use an image with TensorRT preinstalled."
)
try:
logger_trt = trt.Logger(trt.Logger.ERROR)
builder = trt.Builder(logger_trt)
# If TensorRT has trouble with CUDA/driver, this is where it will fail.
del builder
except Exception as e:
raise RuntimeError(
"[TRTYOLOEngineBuilder] TensorRT failed to initialize. "
"Check that your CUDA driver, CUDA runtime, and TensorRT versions match. "
f"Original error: {e}"
) from e
def build(self, pt_model_name: str):
# Resolve .pt path relative to this .py
pt_path = os.path.join(NODE_DIR, pt_model_name)
if not os.path.isfile(pt_path):
raise FileNotFoundError(
f"[TRTYOLOEngineBuilder] .pt model not found next to this node: {pt_path}"
)
# Optional: sanity check TensorRT/driver before asking Ultralytics to export.
self._check_tensorrt_available()
logger.info(
f"[TRTYOLOEngineBuilder] Exporting TensorRT engine from '{pt_path}' "
f"with imgsz={IMGSZ} (H,W), batch=1, half=True, device='0', exist_ok=True"
)
try:
# Ultralytics API: export TensorRT engine directly from .pt
engine_path = YOLO(pt_path).export(
format="engine",
imgsz=IMGSZ,
half=True,
device="0",
exist_ok=True, # overwrite or reuse if same settings
)
except TypeError:
# Fallback for older Ultralytics versions that might not recognize some args.
engine_path = YOLO(pt_path).export(
format="engine",
imgsz=IMGSZ,
half=True,
device="0",
)
engine_path = str(engine_path)
# If Ultralytics returned a relative path, treat it as relative to NODE_DIR
if not os.path.isabs(engine_path):
candidate = os.path.join(NODE_DIR, engine_path)
if os.path.isfile(candidate):
engine_path = candidate
if not os.path.isfile(engine_path):
raise FileNotFoundError(
f"[TRTYOLOEngineBuilder] Export completed but engine file not found at: {engine_path}"
)
logger.info(f"[TRTYOLOEngineBuilder] Export done. Engine path: {engine_path}")
return (engine_path,)
# -------------------------------------------------------------------------
# NODE 2: TRTYOLOBBoxDetectorProvider
# - Loads the TensorRT engine and wraps it as BBOX_DETECTOR
# - engine_path can be:
# * Absolute path
# * Relative to this Python file's folder
# -------------------------------------------------------------------------
class TRTYOLOBBoxDetectorProvider:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"engine_path": (
"STRING",
{
"default": "",
"multiline": False,
"tooltip": (
"Path to .engine file.\n"
"Can be an absolute path OR a path RELATIVE to this node's folder.\n"
"Typically, you connect this from TRTYOLOEngineBuilder."
),
},
),
}
}
RETURN_TYPES = ("BBOX_DETECTOR",)
RETURN_NAMES = ("bbox_detector",)
FUNCTION = "load"
CATEGORY = "ImpactPack/TensorRT"
def load(self, engine_path: str):
if not engine_path:
raise ValueError(
"[TRTYOLOBBoxDetectorProvider] engine_path is empty. "
"Connect the output from TRTYOLOEngineBuilder or type a path."
)
engine_path = engine_path.strip()
# If relative, treat as relative to this file's folder
if not os.path.isabs(engine_path):
engine_path = os.path.join(NODE_DIR, engine_path)
if not os.path.isfile(engine_path):
raise FileNotFoundError(
f"[TRTYOLOBBoxDetectorProvider] Engine file not found: {engine_path}"
)
logger.info(
f"[TRTYOLOBBoxDetectorProvider] Loading YOLO TensorRT engine from '{engine_path}' on device '0'"
)
# Ultralytics will detect it's a TensorRT engine and use TRT backend internally.
yolo_model = YOLO(engine_path)
detector = TRTYOLOBBoxDetector(yolo_model, device="0")
return (detector,)
# -------------------------------------------------------------------------
# ComfyUI registration
# -------------------------------------------------------------------------
NODE_CLASS_MAPPINGS = {
"TRTYOLOEngineBuilder": TRTYOLOEngineBuilder,
"TRTYOLOBBoxDetectorProvider": TRTYOLOBBoxDetectorProvider,
}
NODE_DISPLAY_NAME_MAPPINGS = {
"TRTYOLOEngineBuilder": "TensorRT YOLO Engine Builder (1344x768, local .pt)",
"TRTYOLOBBoxDetectorProvider": "TensorRT YOLO BBox Detector",
}