Could you share the model quantization script or papameter? thanks.
how to quantization model for your ComfyUI-SDNQ nodes? do you use Disty0/sdnq script?
i use a custom script that currently only works for wan, qwen and flux.1. others may work but likely won't. only basic loras are supported:
import torch
from safetensors import safe_open
from safetensors.torch import save_file, load_file
from hqqsvd.linear import HQQSVDLinear
from tqdm import tqdm
from fnmatch import fnmatch
compute_dtype = torch.bfloat16
in_path = "wan2.2_t2v_low_noise_14B_fp16.safetensors"
out_path = "wan2.2_t2v_low_noise_14B_sdnq_uint4_r128.safetensors"
lora_path = None
weights_dtype = "uint4" # only uint4 supported for now
keys = list(safe_open(in_path, framework="pt").keys())
if lora_path:
lora = load_file(lora_path, "cuda")
else:
lora = None
patterns = [
#wan
"*_attn.?.weight",
"*ffn.?.weight",
#qwen
"*attn.add_?_proj.weight",
"*attn.to_add_out.weight",
"*attn.to_?.weight",
"*attn.to_out.?.weight",
"*mlp.net.?.proj.weight",
"*mlp.net.?.weight",
"*_mod.?.weight",
#flux
"*mlp.?.weight",
"*_mod.lin.weight",
"*linear?.weight"
"*modulation.lin.weight"
]
skip_patterns = [
"*.0.ffn.0.weight",
"*img_in*",
"*txt_in*",
"*guidance_in*",
"*vector_in*",
"*time_in*",
"*norm_out*",
"*proj_out*",
"*time_text_embed*",
"transformer_blocks.0.img_mod.1.weight",
]
prefixes = ("single_blocks", "double_blocks", "transformer_blocks", "blocks")
def should_quant(key):
return any(fnmatch(key, p) and key.startswith(prefixes) for p in patterns) and not any(
fnmatch(key, p) for p in skip_patterns
)
new_tensors = {}
with torch.no_grad():
for key in tqdm(keys):
# reopen for every key to reduce memory usage
with safe_open(in_path, framework="pt", device="cuda") as f:
if key in new_tensors:
continue
weight = f.get_tensor(key)
if should_quant(key):
base = key.replace(".weight", "")
weight = weight.to(torch.bfloat16)
if lora is not None:
if (base + ".lora_down.weight") in lora:
lora_down = lora[base + ".lora_down.weight"]
lora_up = lora[base + ".lora_up.weight"]
alpha = lora[base + ".alpha"]
r = lora_down.shape[0]
weight += (alpha / r) * (lora_up @ lora_down)
linear = torch.nn.Linear(
weight.shape[1],
weight.shape[0],
False, # bias is automatically added in the else branch
device="cuda",
dtype=weight.dtype,
)
linear.load_state_dict({"weight": weight})
sdnq_linear = HQQSVDLinear.from_linear(linear)
new_tensors[base + ".weight"] = sdnq_linear.weight.cpu().contiguous()
new_tensors[base + ".svd_up"] = sdnq_linear.svd_up.cpu().contiguous()
new_tensors[base + ".svd_down"] = sdnq_linear.svd_down.cpu().contiguous()
new_tensors[base + ".zero_point"] = sdnq_linear.zero_point.cpu().contiguous()
new_tensors[base + ".scale"] = sdnq_linear.scale.cpu().contiguous()
new_tensors[base + ".nbits"] = sdnq_linear.nbits
del linear
del sdnq_linear
torch.cuda.empty_cache()
else:
new_tensors[key] = weight.to(torch.bfloat16)
save_file(new_tensors, out_path)
It also may be possible to convert Disty0's diffusers format models to my format but i'm not sure
Thanks a lot! Bro, I will try to convert my "Magic-Wan-Image-V2" which refined base on Wan2.2 T2V model. πβ€οΈπ€π€π€
Haha! Much thanks! Bro, this really makes things easier for me...I'll download it and try it immediately...πβοΈπππ
@kanttouchthis , I had tested the quantized model, very goog! thanks a lot again! βοΈπππ, I just refine the version of "Real-Qwen-Image-2512" and "Magic-Wan-Image-V3" now...π
