Aatricks's picture
Deploy ZeroGPU Gradio Space snapshot
b701455
"""Weight casting utilities for efficient model loading."""
from src.Device import Device
import torch
import logging
logger = logging.getLogger(__name__)
def cast_to(weight, dtype=None, device=None, non_blocking=False, copy=False):
"""Cast a weight tensor to specified dtype and device."""
if device is None or weight.device == device:
if not copy and (dtype is None or weight.dtype == dtype):
return weight
return weight.to(dtype=dtype, copy=copy)
r = torch.empty_like(weight, dtype=dtype, device=device)
r.copy_(weight, non_blocking=non_blocking)
return r
def cast_to_input(weight, input, non_blocking=False, copy=True):
"""Cast weight tensor to match input tensor's dtype and device."""
return cast_to(weight, input.dtype, input.device, non_blocking=non_blocking, copy=copy)
def cast_bias_weight(s, input=None, dtype=None, device=None, bias_dtype=None):
"""Cast module's bias and weight to match input tensor."""
if input is not None:
dtype = dtype or input.dtype
bias_dtype = bias_dtype or dtype
device = device or input.device
non_blocking = Device.device_supports_non_blocking(device)
bias = None
if s.bias is not None:
has_fn = s.bias_function is not None
bias = cast_to(s.bias, bias_dtype, device, non_blocking=non_blocking, copy=has_fn)
if has_fn:
bias = s.bias_function(bias)
has_fn = s.weight_function is not None
weight = cast_to(s.weight, None, device, non_blocking=non_blocking, copy=has_fn)
# Handle NVFP4 dequantization
if getattr(s, "quant_format", None) == "nvfp4":
from src.Utilities.Quantization import dequantize_nvfp4
weight = dequantize_nvfp4(
weight,
s.weight_scale_2,
s.weight_scale,
s.original_shape
)
weight = weight.to(dtype)
else:
weight = weight.to(dtype)
if has_fn:
weight = s.weight_function(weight)
return weight, bias
class CastWeightBiasOp:
"""Mixin for cast weight/bias operations."""
comfy_cast_weights = False
weight_function = None
bias_function = None
class disable_weight_init:
"""Module wrappers with disabled weight initialization."""
class Linear(torch.nn.Linear, CastWeightBiasOp):
def reset_parameters(self): return None
def forward_comfy_cast_weights(self, input):
weight, bias = cast_bias_weight(self, input)
return torch.nn.functional.linear(input, weight, bias)
def forward(self, *args, **kwargs):
return self.forward_comfy_cast_weights(*args, **kwargs) if self.comfy_cast_weights else super().forward(*args, **kwargs)
class Conv1d(torch.nn.Conv1d, CastWeightBiasOp):
def reset_parameters(self): return None
def forward_comfy_cast_weights(self, input):
weight, bias = cast_bias_weight(self, input)
return self._conv_forward(input, weight, bias)
def forward(self, *args, **kwargs):
return self.forward_comfy_cast_weights(*args, **kwargs) if self.comfy_cast_weights else super().forward(*args, **kwargs)
class Conv2d(torch.nn.Conv2d, CastWeightBiasOp):
def reset_parameters(self): return None
def forward_cast_weights(self, input):
weight, bias = cast_bias_weight(self, input)
return self._conv_forward(input, weight, bias)
def forward(self, *args, **kwargs):
return self.forward_cast_weights(*args, **kwargs) if self.comfy_cast_weights else super().forward(*args, **kwargs)
class Conv3d(torch.nn.Conv3d, CastWeightBiasOp):
def reset_parameters(self): return None
def forward_comfy_cast_weights(self, input):
weight, bias = cast_bias_weight(self, input)
return self._conv_forward(input, weight, bias)
def forward(self, *args, **kwargs):
return self.forward_comfy_cast_weights(*args, **kwargs) if self.comfy_cast_weights else super().forward(*args, **kwargs)
class GroupNorm(torch.nn.GroupNorm, CastWeightBiasOp):
def reset_parameters(self): return None
def forward_comfy_cast_weights(self, input):
weight, bias = cast_bias_weight(self, input)
return torch.nn.functional.group_norm(input, self.num_groups, weight, bias, self.eps)
def forward(self, *args, **kwargs):
return self.forward_comfy_cast_weights(*args, **kwargs) if self.comfy_cast_weights else super().forward(*args, **kwargs)
class LayerNorm(torch.nn.LayerNorm, CastWeightBiasOp):
def reset_parameters(self): return None
def forward_cast_weights(self, input):
weight, bias = cast_bias_weight(self, input) if self.weight is not None else (None, None)
return torch.nn.functional.layer_norm(input, self.normalized_shape, weight, bias, self.eps)
def forward(self, *args, **kwargs):
return self.forward_cast_weights(*args, **kwargs) if self.comfy_cast_weights else super().forward(*args, **kwargs)
class ConvTranspose2d(torch.nn.ConvTranspose2d, CastWeightBiasOp):
def reset_parameters(self): return None
def forward_comfy_cast_weights(self, input, output_size=None):
output_padding = self._output_padding(input, output_size, self.stride, self.padding, self.kernel_size, 2, self.dilation)
weight, bias = cast_bias_weight(self, input)
return torch.nn.functional.conv_transpose2d(input, weight, bias, self.stride, self.padding, output_padding, self.groups, self.dilation)
def forward(self, *args, **kwargs):
return self.forward_comfy_cast_weights(*args, **kwargs) if self.comfy_cast_weights else super().forward(*args, **kwargs)
class ConvTranspose1d(torch.nn.ConvTranspose1d, CastWeightBiasOp):
def reset_parameters(self): return None
def forward_comfy_cast_weights(self, input, output_size=None):
output_padding = self._output_padding(input, output_size, self.stride, self.padding, self.kernel_size, 1, self.dilation)
weight, bias = cast_bias_weight(self, input)
return torch.nn.functional.conv_transpose1d(input, weight, bias, self.stride, self.padding, output_padding, self.groups, self.dilation)
def forward(self, *args, **kwargs):
return self.forward_comfy_cast_weights(*args, **kwargs) if self.comfy_cast_weights else super().forward(*args, **kwargs)
class Embedding(torch.nn.Embedding, CastWeightBiasOp):
def reset_parameters(self):
self.bias = None
return None
def forward_comfy_cast_weights(self, input, out_dtype=None):
output_dtype = out_dtype
if self.weight.dtype in (torch.float16, torch.bfloat16):
out_dtype = None
weight, _ = cast_bias_weight(self, device=input.device, dtype=out_dtype)
return torch.nn.functional.embedding(input, weight, self.padding_idx, self.max_norm, self.norm_type, self.scale_grad_by_freq, self.sparse).to(dtype=output_dtype)
def forward(self, *args, **kwargs):
if self.comfy_cast_weights:
return self.forward_comfy_cast_weights(*args, **kwargs)
kwargs.pop("out_dtype", None)
return super().forward(*args, **kwargs)
@classmethod
def conv_nd(cls, dims, *args, **kwargs):
"""Create Conv2d/Conv3d based on dimensions."""
if dims == 2: return cls.Conv2d(*args, **kwargs)
if dims == 3: return cls.Conv3d(*args, **kwargs)
raise ValueError(f"unsupported dimensions: {dims}")
class manual_cast(disable_weight_init):
"""Module wrappers with manual casting enabled by default."""
class Linear(disable_weight_init.Linear): comfy_cast_weights = True
class Conv1d(disable_weight_init.Conv1d): comfy_cast_weights = True
class Conv2d(disable_weight_init.Conv2d): comfy_cast_weights = True
class Conv3d(disable_weight_init.Conv3d): comfy_cast_weights = True
class GroupNorm(disable_weight_init.GroupNorm): comfy_cast_weights = True
class LayerNorm(disable_weight_init.LayerNorm): comfy_cast_weights = True
class ConvTranspose2d(disable_weight_init.ConvTranspose2d): comfy_cast_weights = True
class ConvTranspose1d(disable_weight_init.ConvTranspose1d): comfy_cast_weights = True
class Embedding(disable_weight_init.Embedding): comfy_cast_weights = True