ComfyUI-Upscaler-Tensorrt / trt_utilities.py

Mirror from https://github.com/yuvraj108c/ComfyUI-Upscaler-Tensorrt

afaf90f verified 4 months ago

10.1 kB

	#
	# Copyright 2022 The HuggingFace Inc. team.
	# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
	# SPDX-License-Identifier: Apache-2.0
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	#
	import torch
	from torch.cuda import nvtx
	from collections import OrderedDict
	import numpy as np
	from polygraphy.backend.common import bytes_from_path
	from polygraphy import util
	from polygraphy.backend.trt import ModifyNetworkOutputs, Profile
	from polygraphy.backend.trt import (
	engine_from_bytes,
	engine_from_network,
	network_from_onnx_path,
	save_engine,
	)
	from polygraphy.logger import G_LOGGER
	import tensorrt as trt
	from logging import error, warning
	from tqdm import tqdm
	import copy

	TRT_LOGGER = trt.Logger(trt.Logger.ERROR)
	G_LOGGER.module_severity = G_LOGGER.ERROR

	# Map of numpy dtype -> torch dtype
	numpy_to_torch_dtype_dict = {
	np.uint8: torch.uint8,
	np.int8: torch.int8,
	np.int16: torch.int16,
	np.int32: torch.int32,
	np.int64: torch.int64,
	np.float16: torch.float16,
	np.float32: torch.float32,
	np.float64: torch.float64,
	np.complex64: torch.complex64,
	np.complex128: torch.complex128,
	}
	if np.version.full_version >= "1.24.0":
	numpy_to_torch_dtype_dict[np.bool_] = torch.bool
	else:
	numpy_to_torch_dtype_dict[np.bool] = torch.bool

	# Map of torch dtype -> numpy dtype
	torch_to_numpy_dtype_dict = {
	value: key for (key, value) in numpy_to_torch_dtype_dict.items()
	}

	class TQDMProgressMonitor(trt.IProgressMonitor):
	def __init__(self):
	trt.IProgressMonitor.__init__(self)
	self._active_phases = {}
	self._step_result = True
	self.max_indent = 5

	def phase_start(self, phase_name, parent_phase, num_steps):
	leave = False
	try:
	if parent_phase is not None:
	nbIndents = (
	self._active_phases.get(parent_phase, {}).get(
	"nbIndents", self.max_indent
	)
	+ 1
	)
	if nbIndents >= self.max_indent:
	return
	else:
	nbIndents = 0
	leave = True
	self._active_phases[phase_name] = {
	"tq": tqdm(
	total=num_steps, desc=phase_name, leave=leave, position=nbIndents
	),
	"nbIndents": nbIndents,
	"parent_phase": parent_phase,
	}
	except KeyboardInterrupt:
	# The phase_start callback cannot directly cancel the build, so request the cancellation from within step_complete.
	_step_result = False

	def phase_finish(self, phase_name):
	try:
	if phase_name in self._active_phases.keys():
	self._active_phases[phase_name]["tq"].update(
	self._active_phases[phase_name]["tq"].total
	- self._active_phases[phase_name]["tq"].n
	)

	parent_phase = self._active_phases[phase_name].get("parent_phase", None)
	while parent_phase is not None:
	self._active_phases[parent_phase]["tq"].refresh()
	parent_phase = self._active_phases[parent_phase].get(
	"parent_phase", None
	)
	if (
	self._active_phases[phase_name]["parent_phase"]
	in self._active_phases.keys()
	):
	self._active_phases[
	self._active_phases[phase_name]["parent_phase"]
	]["tq"].refresh()
	del self._active_phases[phase_name]
	pass
	except KeyboardInterrupt:
	_step_result = False

	def step_complete(self, phase_name, step):
	try:
	if phase_name in self._active_phases.keys():
	self._active_phases[phase_name]["tq"].update(
	step - self._active_phases[phase_name]["tq"].n
	)
	return self._step_result
	except KeyboardInterrupt:
	# There is no need to propagate this exception to TensorRT. We can simply cancel the build.
	return False


	class Engine:
	def __init__(
	self,
	engine_path,
	):
	self.engine_path = engine_path
	self.engine = None
	self.context = None
	self.buffers = OrderedDict()
	self.tensors = OrderedDict()
	self.cuda_graph_instance = None # cuda graph

	def __del__(self):
	del self.engine
	del self.context
	del self.buffers
	del self.tensors

	def reset(self, engine_path=None):
	# del self.engine
	del self.context
	del self.buffers
	del self.tensors
	# self.engine_path = engine_path

	self.context = None
	self.buffers = OrderedDict()
	self.tensors = OrderedDict()
	self.inputs = {}
	self.outputs = {}

	def build(
	self,
	onnx_path,
	fp16,
	input_profile=None,
	enable_refit=False,
	enable_preview=False,
	enable_all_tactics=False,
	timing_cache=None,
	update_output_names=None,
	):
	p = [Profile()]
	if input_profile:
	p = [Profile() for i in range(len(input_profile))]
	for _p, i_profile in zip(p, input_profile):
	for name, dims in i_profile.items():
	assert len(dims) == 3
	_p.add(name, min=dims[0], opt=dims[1], max=dims[2])

	config_kwargs = {}
	if not enable_all_tactics:
	config_kwargs["tactic_sources"] = []

	network = network_from_onnx_path(
	onnx_path, flags=[trt.OnnxParserFlag.NATIVE_INSTANCENORM]
	)
	if update_output_names:
	print(f"Updating network outputs to {update_output_names}")
	network = ModifyNetworkOutputs(network, update_output_names)

	builder = network[0]
	config = builder.create_builder_config()
	config.progress_monitor = TQDMProgressMonitor()

	config.set_flag(trt.BuilderFlag.FP16) if fp16 else None
	config.set_flag(trt.BuilderFlag.REFIT) if enable_refit else None

	profiles = copy.deepcopy(p)
	for profile in profiles:
	# Last profile is used for set_calibration_profile.
	calib_profile = profile.fill_defaults(network[1]).to_trt(
	builder, network[1]
	)
	config.add_optimization_profile(calib_profile)

	try:
	engine = engine_from_network(
	network,
	config,
	)
	except Exception as e:
	error(f"Failed to build engine: {e}")
	return 1
	try:
	save_engine(engine, path=self.engine_path)
	except Exception as e:
	error(f"Failed to save engine: {e}")
	return 1
	return 0

	def load(self):
	self.engine = engine_from_bytes(bytes_from_path(self.engine_path))

	def activate(self, reuse_device_memory=None):
	if reuse_device_memory:
	self.context = self.engine.create_execution_context_without_device_memory()
	# self.context.device_memory = reuse_device_memory
	else:
	self.context = self.engine.create_execution_context()

	def allocate_buffers(self, shape_dict=None, device="cuda"):
	nvtx.range_push("allocate_buffers")
	for idx in range(self.engine.num_io_tensors):
	name = self.engine.get_tensor_name(idx)
	binding = self.engine[idx]
	if shape_dict and binding in shape_dict:
	shape = shape_dict[binding]["shape"]
	else:
	shape = self.context.get_tensor_shape(name)

	dtype = trt.nptype(self.engine.get_tensor_dtype(name))
	if self.engine.get_tensor_mode(name) == trt.TensorIOMode.INPUT:
	self.context.set_input_shape(name, shape)
	tensor = torch.empty(
	tuple(shape), dtype=numpy_to_torch_dtype_dict[dtype]
	).to(device=device)
	self.tensors[binding] = tensor
	nvtx.range_pop()

	def infer(self, feed_dict, stream, use_cuda_graph=False):
	nvtx.range_push("set_tensors")
	for name, buf in feed_dict.items():
	self.tensors[name].copy_(buf)

	for name, tensor in self.tensors.items():
	self.context.set_tensor_address(name, tensor.data_ptr())
	nvtx.range_pop()
	nvtx.range_push("execute")
	noerror = self.context.execute_async_v3(stream)
	if not noerror:
	raise ValueError("ERROR: inference failed.")
	nvtx.range_pop()
	return self.tensors

	def __str__(self):
	out = ""

	# When raising errors in the upscaler, this str() called by comfy's execution.py,
	# but the engine won't have the attributes required for stringification
	# If str() also raises an error, comfy gets soft-locked, not running prompts until restarted
	if not hasattr(self.engine, "num_optimization_profiles") or not hasattr(self.engine, "num_bindings"):
	return out

	for opt_profile in range(self.engine.num_optimization_profiles):
	for binding_idx in range(self.engine.num_bindings):
	name = self.engine.get_binding_name(binding_idx)
	shape = self.engine.get_profile_shape(opt_profile, name)
	out += f"\t{name} = {shape}\n"
	return out