| | |
| | import json |
| | import torch |
| | from safetensors.torch import load_file |
| | from optimum.quanto import requantize, quantize, qint4 |
| | from hunyuan_image_3.hunyuan import HunyuanImage3ForCausalMM |
| | from transformers import AutoConfig, QuantoConfig |
| | from transformers.generation.utils import GenerationConfig |
| |
|
| |
|
| | def load_quantized_hi3_m1(model_path): |
| | print(f"Loading model architecture from {model_path} to CPU...") |
| | Qmodel = HunyuanImage3ForCausalMM.from_pretrained( |
| | model_path, |
| | dtype=torch.bfloat16, |
| | device_map=None, |
| | attn_implementation="sdpa", |
| | moe_impl="eager", |
| | moe_drop_tokens=True, |
| | trust_remote_code=True, |
| | low_cpu_mem_usage=False, |
| | ) |
| |
|
| | print("Applying int4 quantization structure...") |
| | quantize(Qmodel, weights=qint4) |
| |
|
| | print("Loading quantized weights...") |
| | state_dict = load_file(f"{model_path}/model.safetensors") |
| | Qmodel.load_state_dict(state_dict, strict=False, assign=True) |
| |
|
| | print("Moving quantized model to GPU...") |
| | Qmodel = Qmodel.to("cuda") |
| |
|
| | return Qmodel |
| |
|
| |
|
| | def load_quantized_hi3_m2(model_path): |
| |
|
| | config = AutoConfig.from_pretrained(model_path, trust_remote_code=True) |
| |
|
| | state_dict = load_file(f"{model_path}/model.safetensors") |
| | with open(f"{model_path}/quantization_map.json", "r") as f: quantization_map = json.load(f) |
| |
|
| | print("Create Meta model and Loading quantized weights to CPU...") |
| | with torch.device('meta'): Qmodel = HunyuanImage3ForCausalMM(config) |
| | Qmodel = Qmodel.to(torch.bfloat16) |
| | requantize(Qmodel, state_dict, quantization_map, device=torch.device('cpu')) |
| |
|
| | generation_config = GenerationConfig.from_pretrained(model_path) |
| | Qmodel.generation_config = generation_config |
| |
|
| | print("Moving quantized model to GPU...") |
| | Qmodel = Qmodel.to(torch.device('cuda')) |
| |
|
| | return Qmodel |
| |
|
| |
|
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| |
|