Could you share the model quantization script or papameter? thanks.

#3
by wikeeyang - opened

how to quantization model for your ComfyUI-SDNQ nodes? do you use Disty0/sdnq script?

i use a custom script that currently only works for wan, qwen and flux.1. others may work but likely won't. only basic loras are supported:

import torch
from safetensors import safe_open
from safetensors.torch import save_file, load_file
from hqqsvd.linear import HQQSVDLinear
from tqdm import tqdm
from fnmatch import fnmatch

compute_dtype = torch.bfloat16
in_path = "wan2.2_t2v_low_noise_14B_fp16.safetensors"
out_path = "wan2.2_t2v_low_noise_14B_sdnq_uint4_r128.safetensors"
lora_path = None
weights_dtype = "uint4" # only uint4 supported for now
keys = list(safe_open(in_path, framework="pt").keys())

if lora_path:
    lora = load_file(lora_path, "cuda")
else:
    lora = None

patterns = [
    #wan
    "*_attn.?.weight",
    "*ffn.?.weight",
    #qwen
    "*attn.add_?_proj.weight",
    "*attn.to_add_out.weight",
    "*attn.to_?.weight",
    "*attn.to_out.?.weight",
    "*mlp.net.?.proj.weight",
    "*mlp.net.?.weight",
    "*_mod.?.weight",
    #flux
    "*mlp.?.weight",
    "*_mod.lin.weight",
    "*linear?.weight"
    "*modulation.lin.weight"
]
skip_patterns = [
    "*.0.ffn.0.weight",
    "*img_in*",
    "*txt_in*",
    "*guidance_in*",
    "*vector_in*",
    "*time_in*",
    "*norm_out*",
    "*proj_out*",
    "*time_text_embed*",
    "transformer_blocks.0.img_mod.1.weight",
]

prefixes = ("single_blocks", "double_blocks", "transformer_blocks", "blocks")

def should_quant(key):
    return any(fnmatch(key, p) and key.startswith(prefixes) for p in patterns) and not any(
        fnmatch(key, p) for p in skip_patterns
    )

new_tensors = {}
with torch.no_grad():
    for key in tqdm(keys):
        # reopen for every key to reduce memory usage
        with safe_open(in_path, framework="pt", device="cuda") as f:
            if key in new_tensors:
                continue
            weight = f.get_tensor(key)
            if should_quant(key):
                base = key.replace(".weight", "")
                weight = weight.to(torch.bfloat16)
                if lora is not None:
                    if (base + ".lora_down.weight") in lora:
                        lora_down = lora[base + ".lora_down.weight"]
                        lora_up = lora[base + ".lora_up.weight"]
                        alpha = lora[base + ".alpha"]
                        r = lora_down.shape[0]
                        weight += (alpha / r) * (lora_up @ lora_down)

                linear = torch.nn.Linear(
                    weight.shape[1],
                    weight.shape[0],
                    False, # bias is automatically added in the else branch
                    device="cuda",
                    dtype=weight.dtype,
                )
                linear.load_state_dict({"weight": weight})

                sdnq_linear = HQQSVDLinear.from_linear(linear)
                new_tensors[base + ".weight"] = sdnq_linear.weight.cpu().contiguous()
                new_tensors[base + ".svd_up"] = sdnq_linear.svd_up.cpu().contiguous()
                new_tensors[base + ".svd_down"] = sdnq_linear.svd_down.cpu().contiguous()
                new_tensors[base + ".zero_point"] = sdnq_linear.zero_point.cpu().contiguous()
                new_tensors[base + ".scale"] = sdnq_linear.scale.cpu().contiguous()
                new_tensors[base + ".nbits"] = sdnq_linear.nbits
                del linear
                del sdnq_linear
                torch.cuda.empty_cache()
            else:
                new_tensors[key] = weight.to(torch.bfloat16)


save_file(new_tensors, out_path)

It also may be possible to convert Disty0's diffusers format models to my format but i'm not sure

Thanks a lot! Bro, I will try to convert my "Magic-Wan-Image-V2" which refined base on Wan2.2 T2V model. πŸ‘β€οΈπŸ€πŸ€πŸ€

i already had your model downloaded, so i added it here :)

Haha! Much thanks! Bro, this really makes things easier for me...I'll download it and try it immediately...πŸ‘βœŒοΈπŸ’πŸ’πŸ’

@kanttouchthis , I had tested the quantized model, very goog! thanks a lot again! βœŒοΈπŸ‘πŸ‘πŸ‘, I just refine the version of "Real-Qwen-Image-2512" and "Magic-Wan-Image-V3" now...😜

FP8&Q41&SDNQ-Compare

Sign up or log in to comment