saliacoel
/

unwritten2

Model card Files Files and versions

xet

Community

saliacoel commited on Dec 10, 2025

Commit

202cca2

verified ·

1 Parent(s): d7c9f29

Upload TensorRTBBoxDetector.py

Browse files

Files changed (1) hide show

TensorRTBBoxDetector.py +412 -0

TensorRTBBoxDetector.py ADDED Viewed

	@@ -0,0 +1,412 @@

+import os
+import logging
+from typing import List, Tuple
+import torch
+import numpy as np
+from ultralytics import YOLO
+# Impact Pack (for SEG and SEGS helpers)
+import impact.core as core
+from impact.core import SEG
+# Optional: TensorRT sanity check
+try:
+    import tensorrt as trt  # type: ignore
+except Exception:
+    trt = None
+# Local helpers
+try:
+    # If this folder is a package (has __init__.py), use relative import
+    from .utils_salia import (
+        NODE_DIR,
+        IMGSZ,
+        list_local_pt_files,
+        tensor_to_pil,
+        make_crop_region,
+        crop_image,
+        crop_ndarray2,
+        dilate_mask,
+    )
+except ImportError:
+    # Fallback: direct import if utils_salia is on sys.path
+    from utils_salia import (
+        NODE_DIR,
+        IMGSZ,
+        list_local_pt_files,
+        tensor_to_pil,
+        make_crop_region,
+        crop_image,
+        crop_ndarray2,
+        dilate_mask,
+    )
+logger = logging.getLogger(__name__)
+# -------------------------------------------------------------------------
+# YOLO TensorRT-based BBOX_DETECTOR implementation
+# -------------------------------------------------------------------------
+class TRTYOLOBBoxDetector:
+    """
+    BBOX_DETECTOR interface compatible with Impact Pack / FaceDetailer.
+    Methods required:
+      - setAux(x)
+      - detect(image, threshold, dilation, crop_factor, drop_size=1, detailer_hook=None)
+      - detect_combined(image, threshold, dilation)
+    """
+    def __init__(self, yolo_model: YOLO, device: str = "0"):
+        self.bbox_model = yolo_model
+        # Ultralytics accepts "0" or "cuda:0"; normalize a bit:
+        if device in ("0", "cuda", "cuda:0"):
+            self.device = "0"
+        else:
+            self.device = str(device)
+        # FaceDetailer calls setAux('face'); we keep it for compatibility.
+        self.aux = None
+    def setAux(self, x):
+        # Kept for API compatibility. You could use this
+        # later to filter by specific labels/classes.
+        self.aux = x
+    def detect(
+        self,
+        image: torch.Tensor,
+        threshold: float,
+        dilation: int,
+        crop_factor: float,
+        drop_size: int = 1,
+        detailer_hook=None,
+    ) -> Tuple[Tuple[int, int], List[SEG]]:
+        """
+        Main detection method used by FaceDetailer.
+        Args:
+            image: ComfyUI IMAGE tensor [B, H, W, C] in 0..1
+            threshold: confidence threshold
+            dilation: mask dilation in pixels
+            crop_factor: expansion factor for bbox when computing crop_region
+            drop_size: minimum bbox width/height to keep
+            detailer_hook: optional hook with post_crop_region / post_detection
+        Returns:
+            SEGS tuple: ( (H, W), [SEG, SEG, ...] )
+        """
+        if image.dim() != 4:
+            raise ValueError(
+                "[TRTYOLOBBoxDetector] Expected IMAGE tensor with 4 dims [B, H, W, C]."
+            )
+        if image.shape[0] != 1:
+            logger.warning(
+                "[TRTYOLOBBoxDetector] Batch > 1 detected, using only the first image for detection."
+            )
+            image = image[:1]
+        h, w = int(image.shape[1]), int(image.shape[2])
+        shape = (h, w)
+        # -----------------------------------------------------------------
+        # Run YOLO TensorRT via Ultralytics wrapper
+        # -----------------------------------------------------------------
+        pil_img = tensor_to_pil(image)  # should return a single PIL image for B=1
+        # Ultralytics chooses TensorRT backend automatically when you pass an .engine
+        # model to YOLO(). Here we only set device & threshold.
+        pred_list = self.bbox_model(
+            pil_img,
+            conf=threshold,
+            device=self.device,
+            verbose=False,
+        )
+        if len(pred_list) == 0:
+            return (shape, [])
+        pred = pred_list[0]
+        boxes = pred.boxes
+        if boxes is None or boxes.xyxy is None or boxes.xyxy.shape[0] == 0:
+            return (shape, [])
+        xyxy = boxes.xyxy.cpu().numpy()  # [N, 4] (x1, y1, x2, y2)
+        confs = boxes.conf.cpu().numpy()
+        clses = boxes.cls.cpu().numpy().astype(int)
+        names = pred.names  # dict: class_index -> class_name
+        seg_items: List[SEG] = []
+        for i in range(xyxy.shape[0]):
+            x1, y1, x2, y2 = xyxy[i]
+            score = float(confs[i])
+            cls_id = int(clses[i])
+            label = names.get(cls_id, str(cls_id))
+            box_w = x2 - x1
+            box_h = y2 - y1
+            if box_w <= drop_size or box_h <= drop_size:
+                continue
+            # Clamp bbox to image bounds
+            x1_i = max(int(np.floor(x1)), 0)
+            y1_i = max(int(np.floor(y1)), 0)
+            x2_i = min(int(np.ceil(x2)), w)
+            y2_i = min(int(np.ceil(y2)), h)
+            if x2_i <= x1_i or y2_i <= y1_i:
+                continue
+            # Rectangular mask from bbox, uint8 0..255
+            mask = np.zeros((h, w), dtype=np.uint8)
+            mask[y1_i:y2_i, x1_i:x2_i] = 255
+            # Optional dilation
+            if dilation != 0:
+                mask = dilate_mask(mask, dilation)
+            # Impact bbox order here is (x1, y1, x2, y2)
+            item_bbox = [float(x1), float(y1), float(x2), float(y2)]
+            # Compute crop region from bbox and crop_factor
+            crop_region = make_crop_region(w, h, item_bbox, crop_factor)
+            if detailer_hook is not None and hasattr(detailer_hook, "post_crop_region"):
+                crop_region = detailer_hook.post_crop_region(
+                    w, h, item_bbox, crop_region
+                )
+            # Crop image + mask
+            cropped_image = crop_image(image, crop_region)  # torch [1, h', w', C]
+            cropped_mask = crop_ndarray2(mask, crop_region)  # np.uint8 [h', w']
+            seg = SEG(
+                cropped_image,
+                cropped_mask,
+                score,
+                crop_region,
+                item_bbox,
+                label,
+                None,  # control_net_wrapper
+            )
+            seg_items.append(seg)
+        segs = (shape, seg_items)
+        if detailer_hook is not None and hasattr(detailer_hook, "post_detection"):
+            segs = detailer_hook.post_detection(segs)
+        return segs
+    def detect_combined(
+        self,
+        image: torch.Tensor,
+        threshold: float,
+        dilation: int,
+    ) -> torch.Tensor:
+        """
+        Optional helper API: returns a combined MASK of all detections.
+        """
+        segs = self.detect(
+            image=image,
+            threshold=threshold,
+            dilation=dilation,
+            crop_factor=1.0,
+            drop_size=1,
+            detailer_hook=None,
+        )
+        return core.segs_to_combined_mask(segs)
+# -------------------------------------------------------------------------
+# NODE 1: TRTYOLOEngineBuilder
+# - Builds TensorRT engine from a .pt file sitting next to this .py
+#   imgsz  = IMGSZ (H, W) from utils_salia
+#   batch  = 1
+#   half   = True  (FP16)
+#   device = "0"
+#   overwrite (exist_ok) = True
+# -------------------------------------------------------------------------
+class TRTYOLOEngineBuilder:
+    @classmethod
+    def INPUT_TYPES(cls):
+        pt_files = list_local_pt_files()
+        default_name = pt_files[0] if pt_files else "face.pt"
+        return {
+            "required": {
+                "pt_model_name": (
+                    pt_files if pt_files else ["face.pt"],
+                    {
+                        "default": default_name,
+                        "tooltip": (
+                            "Select a YOLO .pt file that lives in the SAME folder as this node file.\n"
+                            "Example: 'face.pt' next to TensorRTBBoxDetector.py"
+                        ),
+                    },
+                ),
+            }
+        }
+    RETURN_TYPES = ("STRING",)
+    RETURN_NAMES = ("engine_path",)
+    FUNCTION = "build"
+    CATEGORY = "ImpactPack/TensorRT"
+    def _check_tensorrt_available(self):
+        """
+        Optional: preflight check to give a clearer error message if TensorRT
+        cannot initialize (instead of a raw pybind11::init() error deep inside Ultralytics).
+        """
+        if trt is None:
+            raise RuntimeError(
+                "[TRTYOLOEngineBuilder] TensorRT Python package is not available. "
+                "Install it via pip (cu12 build) or use an image with TensorRT preinstalled."
+            )
+        try:
+            logger_trt = trt.Logger(trt.Logger.ERROR)
+            builder = trt.Builder(logger_trt)
+            # If TensorRT has trouble with CUDA/driver, this is where it will fail.
+            del builder
+        except Exception as e:
+            raise RuntimeError(
+                "[TRTYOLOEngineBuilder] TensorRT failed to initialize. "
+                "Check that your CUDA driver, CUDA runtime, and TensorRT versions match. "
+                f"Original error: {e}"
+            ) from e
+    def build(self, pt_model_name: str):
+        # Resolve .pt path relative to this .py
+        pt_path = os.path.join(NODE_DIR, pt_model_name)
+        if not os.path.isfile(pt_path):
+            raise FileNotFoundError(
+                f"[TRTYOLOEngineBuilder] .pt model not found next to this node: {pt_path}"
+            )
+        # Optional: sanity check TensorRT/driver before asking Ultralytics to export.
+        self._check_tensorrt_available()
+        logger.info(
+            f"[TRTYOLOEngineBuilder] Exporting TensorRT engine from '{pt_path}' "
+            f"with imgsz={IMGSZ} (H,W), batch=1, half=True, device='0', exist_ok=True"
+        )
+        try:
+            # Ultralytics API: export TensorRT engine directly from .pt
+            engine_path = YOLO(pt_path).export(
+                format="engine",
+                imgsz=IMGSZ,
+                half=True,
+                device="0",
+                exist_ok=True,  # overwrite or reuse if same settings
+            )
+        except TypeError:
+            # Fallback for older Ultralytics versions that might not recognize some args.
+            engine_path = YOLO(pt_path).export(
+                format="engine",
+                imgsz=IMGSZ,
+                half=True,
+                device="0",
+            )
+        engine_path = str(engine_path)
+        # If Ultralytics returned a relative path, treat it as relative to NODE_DIR
+        if not os.path.isabs(engine_path):
+            candidate = os.path.join(NODE_DIR, engine_path)
+            if os.path.isfile(candidate):
+                engine_path = candidate
+        if not os.path.isfile(engine_path):
+            raise FileNotFoundError(
+                f"[TRTYOLOEngineBuilder] Export completed but engine file not found at: {engine_path}"
+            )
+        logger.info(f"[TRTYOLOEngineBuilder] Export done. Engine path: {engine_path}")
+        return (engine_path,)
+# -------------------------------------------------------------------------
+# NODE 2: TRTYOLOBBoxDetectorProvider
+# - Loads the TensorRT engine and wraps it as BBOX_DETECTOR
+# - engine_path can be:
+#     * Absolute path
+#     * Relative to this Python file's folder
+# -------------------------------------------------------------------------
+class TRTYOLOBBoxDetectorProvider:
+    @classmethod
+    def INPUT_TYPES(cls):
+        return {
+            "required": {
+                "engine_path": (
+                    "STRING",
+                    {
+                        "default": "",
+                        "multiline": False,
+                        "tooltip": (
+                            "Path to .engine file.\n"
+                            "Can be an absolute path OR a path RELATIVE to this node's folder.\n"
+                            "Typically, you connect this from TRTYOLOEngineBuilder."
+                        ),
+                    },
+                ),
+            }
+        }
+    RETURN_TYPES = ("BBOX_DETECTOR",)
+    RETURN_NAMES = ("bbox_detector",)
+    FUNCTION = "load"
+    CATEGORY = "ImpactPack/TensorRT"
+    def load(self, engine_path: str):
+        if not engine_path:
+            raise ValueError(
+                "[TRTYOLOBBoxDetectorProvider] engine_path is empty. "
+                "Connect the output from TRTYOLOEngineBuilder or type a path."
+            )
+        engine_path = engine_path.strip()
+        # If relative, treat as relative to this file's folder
+        if not os.path.isabs(engine_path):
+            engine_path = os.path.join(NODE_DIR, engine_path)
+        if not os.path.isfile(engine_path):
+            raise FileNotFoundError(
+                f"[TRTYOLOBBoxDetectorProvider] Engine file not found: {engine_path}"
+            )
+        logger.info(
+            f"[TRTYOLOBBoxDetectorProvider] Loading YOLO TensorRT engine from '{engine_path}' on device '0'"
+        )
+        # Ultralytics will detect it's a TensorRT engine and use TRT backend internally.
+        yolo_model = YOLO(engine_path)
+        detector = TRTYOLOBBoxDetector(yolo_model, device="0")
+        return (detector,)
+# -------------------------------------------------------------------------
+# ComfyUI registration
+# -------------------------------------------------------------------------
+NODE_CLASS_MAPPINGS = {
+    "TRTYOLOEngineBuilder": TRTYOLOEngineBuilder,
+    "TRTYOLOBBoxDetectorProvider": TRTYOLOBBoxDetectorProvider,
+}
+NODE_DISPLAY_NAME_MAPPINGS = {
+    "TRTYOLOEngineBuilder": "TensorRT YOLO Engine Builder (1344x768, local .pt)",
+    "TRTYOLOBBoxDetectorProvider": "TensorRT YOLO BBox Detector",
+}