saliacoel
/

MyCustomNodes

+#Put this in the custom_nodes folder, put your tensorrt engine files in ComfyUI/models/tensorrt/ (you will have to create the directory)
+import torch
+import os
+import comfy.model_base
+import comfy.model_management
+import comfy.model_patcher
+import comfy.supported_models
+import folder_paths
+if "tensorrt" in folder_paths.folder_names_and_paths:
+    folder_paths.folder_names_and_paths["tensorrt"][0].append(
+        os.path.join(folder_paths.models_dir, "tensorrt"))
+    folder_paths.folder_names_and_paths["tensorrt"][1].add(".engine")
+else:
+    folder_paths.folder_names_and_paths["tensorrt"] = (
+        [os.path.join(folder_paths.models_dir, "tensorrt")], {".engine"})
+import tensorrt as trt
+trt.init_libnvinfer_plugins(None, "")
+logger = trt.Logger(trt.Logger.INFO)
+runtime = trt.Runtime(logger)
+# Is there a function that already exists for this?
+def trt_datatype_to_torch(datatype):
+    if datatype == trt.float16:
+        return torch.float16
+    elif datatype == trt.float32:
+        return torch.float32
+    elif datatype == trt.int32:
+        return torch.int32
+    elif datatype == trt.bfloat16:
+        return torch.bfloat16
+class TrTUnet:
+    def __init__(self, engine_path):
+        with open(engine_path, "rb") as f:
+            self.engine = runtime.deserialize_cuda_engine(f.read())
+        self.context = self.engine.create_execution_context()
+        self.dtype = torch.float16
+    def set_bindings_shape(self, inputs, split_batch):
+        for k in inputs:
+            shape = inputs[k].shape
+            shape = [shape[0] // split_batch] + list(shape[1:])
+            self.context.set_input_shape(k, shape)
+def __call__(self, x, timesteps, context, y=None, **kwargs):
+    # Ensure input types match engine precision (e.g., FP16)
+    if x.dtype != self.dtype:
+        x = x.to(dtype=self.dtype)
+        timesteps = timesteps.to(dtype=self.dtype)
+        context = context.to(dtype=self.dtype)
+        if y is not None:
+            y = y.to(dtype=self.dtype)
+    # Prepare model inputs list
+    model_inputs = [x, timesteps, context]
+    if y is not None:
+        model_inputs.append(y)
+    # Set dynamic input shapes for the execution context
+    tensor_names = [self.engine.get_tensor_name(i) for i in range(self.engine.num_io_tensors)]
+    # Identify input and output names using TensorRT I/O mode
+    input_names  = [n for n in tensor_names if self.engine.get_tensor_mode(n) == trt.TensorIOMode.INPUT]
+    output_names = [n for n in tensor_names if self.engine.get_tensor_mode(n) == trt.TensorIOMode.OUTPUT]
+    # Ensure we have a matching number of input names and provided tensors
+    if len(input_names) != len(model_inputs):
+        raise RuntimeError(f"Expected {len(input_names)} inputs for TensorRT engine, but got {len(model_inputs)}.")
+    # Set input shapes and addresses
+    for name, tensor in zip(input_names, model_inputs):
+        shape = tuple(tensor.shape)
+        self.context.set_input_shape(name, shape)        # specify runtime shape for dynamic dims
+        self.context.set_tensor_address(name, tensor.data_ptr())  # bind input memory
+    # Infer shapes (ensures all dynamic dims are resolved)
+    missing = self.context.infer_shapes()
+    if missing:  # if any tensor shapes still unspecified, something is wrong
+        raise RuntimeError(f"TensorRT shape inference failed, unresolved tensors: {missing}")
+    # Allocate outputs with proper shapes
+    outputs = []
+    for name in output_names:
+        out_dims = self.context.get_tensor_shape(name)   # get resolved output shape (trt.Dims)
+        out_shape = [int(d) for d in out_dims]           # convert Dims to list of ints
+        out_tensor = torch.empty(out_shape, device=self.torch_device, dtype=self.torch_dtype)
+        self.context.set_tensor_address(name, out_tensor.data_ptr())  # bind output memory
+        outputs.append(out_tensor)
+    # Execute the engine (on default CUDA stream or a pre-created stream)
+    self.context.execute_async_v3(stream_handle=0)  # using default stream (0) for simplicity
+    # If only one output tensor, return it directly for convenience
+    return outputs[0] if len(outputs) == 1 else tuple(outputs)
+    def load_state_dict(self, sd, strict=False):
+        pass
+    def state_dict(self):
+        return {}
+class TensorRTLoader:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": {"unet_name": (folder_paths.get_filename_list("tensorrt"), ),
+                             "model_type": (["sdxl_base", "sdxl_refiner", "sd1.x", "sd2.x-768v", "svd", "sd3", "auraflow", "flux_dev", "flux_schnell"], ),
+                             }}
+    RETURN_TYPES = ("MODEL",)
+    FUNCTION = "load_unet"
+    CATEGORY = "TensorRT"
+    def load_unet(self, unet_name, model_type):
+        unet_path = folder_paths.get_full_path("tensorrt", unet_name)
+        if not os.path.isfile(unet_path):
+            raise FileNotFoundError(f"File {unet_path} does not exist")
+        unet = TrTUnet(unet_path)
+        if model_type == "sdxl_base":
+            conf = comfy.supported_models.SDXL({"adm_in_channels": 2816})
+            conf.unet_config["disable_unet_model_creation"] = True
+            model = comfy.model_base.SDXL(conf)
+        elif model_type == "sdxl_refiner":
+            conf = comfy.supported_models.SDXLRefiner(
+                {"adm_in_channels": 2560})
+            conf.unet_config["disable_unet_model_creation"] = True
+            model = comfy.model_base.SDXLRefiner(conf)
+        elif model_type == "sd1.x":
+            conf = comfy.supported_models.SD15({})
+            conf.unet_config["disable_unet_model_creation"] = True
+            model = comfy.model_base.BaseModel(conf)
+        elif model_type == "sd2.x-768v":
+            conf = comfy.supported_models.SD20({})
+            conf.unet_config["disable_unet_model_creation"] = True
+            model = comfy.model_base.BaseModel(conf, model_type=comfy.model_base.ModelType.V_PREDICTION)
+        elif model_type == "svd":
+            conf = comfy.supported_models.SVD_img2vid({})
+            conf.unet_config["disable_unet_model_creation"] = True
+            model = conf.get_model({})
+        elif model_type == "sd3":
+            conf = comfy.supported_models.SD3({})
+            conf.unet_config["disable_unet_model_creation"] = True
+            model = conf.get_model({})
+        elif model_type == "auraflow":
+            conf = comfy.supported_models.AuraFlow({})
+            conf.unet_config["disable_unet_model_creation"] = True
+            model = conf.get_model({})
+        elif model_type == "flux_dev":
+            conf = comfy.supported_models.Flux({})
+            conf.unet_config["disable_unet_model_creation"] = True
+            model = conf.get_model({})
+            unet.dtype = torch.bfloat16 #TODO: autodetect
+        elif model_type == "flux_schnell":
+            conf = comfy.supported_models.FluxSchnell({})
+            conf.unet_config["disable_unet_model_creation"] = True
+            model = conf.get_model({})
+            unet.dtype = torch.bfloat16 #TODO: autodetect
+        model.diffusion_model = unet
+        model.memory_required = lambda *args, **kwargs: 0 #always pass inputs batched up as much as possible, our TRT code will handle batch splitting
+        return (comfy.model_patcher.ModelPatcher(model,
+                                                 load_device=comfy.model_management.get_torch_device(),
+                                                 offload_device=comfy.model_management.unet_offload_device()),)
+NODE_CLASS_MAPPINGS = {
+    "TensorRTLoader": TensorRTLoader,
+}