request for fp4 quants

#2
by hareram241 - opened

Hi is it possible to get fp4 quants for this model? thanks

import modelopt.torch.quantization as mtq
from modelopt.torch.utils.dataset_utils import get_dataset_dataloader
from transformers import AutoModelForCausalLM, AutoConfig, AutoTokenizer
import torch

from configuration_glm4_moe import Glm4MoeConfig
from modeling_glm4_moe import Glm4MoeForCausalLM

config = Glm4MoeConfig.from_pretrained("downloaded_models/GLM-4.5-Air")
model = Glm4MoeForCausalLM.from_pretrained("downloaded_models/GLM-4.5-Air", config=config, device_map="auto", torch_dtype=torch.bfloat16)

print(model.hf_device_map)

Select the quantization config, for example, INT8 Smooth Quant

config = mtq.NVFP4_DEFAULT_CFG
tokenizer = AutoTokenizer.from_pretrained("downloaded_models/GLM-4.5-Air")
batch_size = 1
num_samples = 64

calib_dataset = get_dataset_dataloader(
dataset_name="cnn_dailymail",
tokenizer=tokenizer,
batch_size=batch_size,
num_samples=num_samples,
)

def forward_loop(model):
for data in calib_dataset:
model(data['input_ids'])

PTQ with in-place replacement to quantized modules

model = mtq.quantize(model, config, forward_loop)

mtq.print_quant_summary(model)

from modelopt.torch.export import export_hf_checkpoint

export_dir = "downloaded_models/GLM-4.5-Air-nvfp4-1"
with torch.inference_mode():
export_hf_checkpoint(
model, # The quantized model.
export_dir = export_dir, # The directory where the exported files will be stored.
)

Sign up or log in to comment