request for fp4 quants
Hi is it possible to get fp4 quants for this model? thanks
import modelopt.torch.quantization as mtq
from modelopt.torch.utils.dataset_utils import get_dataset_dataloader
from transformers import AutoModelForCausalLM, AutoConfig, AutoTokenizer
import torch
from configuration_glm4_moe import Glm4MoeConfig
from modeling_glm4_moe import Glm4MoeForCausalLM
config = Glm4MoeConfig.from_pretrained("downloaded_models/GLM-4.5-Air")
model = Glm4MoeForCausalLM.from_pretrained("downloaded_models/GLM-4.5-Air", config=config, device_map="auto", torch_dtype=torch.bfloat16)
print(model.hf_device_map)
Select the quantization config, for example, INT8 Smooth Quant
config = mtq.NVFP4_DEFAULT_CFG
tokenizer = AutoTokenizer.from_pretrained("downloaded_models/GLM-4.5-Air")
batch_size = 1
num_samples = 64
calib_dataset = get_dataset_dataloader(
dataset_name="cnn_dailymail",
tokenizer=tokenizer,
batch_size=batch_size,
num_samples=num_samples,
)
def forward_loop(model):
for data in calib_dataset:
model(data['input_ids'])
PTQ with in-place replacement to quantized modules
model = mtq.quantize(model, config, forward_loop)
mtq.print_quant_summary(model)
from modelopt.torch.export import export_hf_checkpoint
export_dir = "downloaded_models/GLM-4.5-Air-nvfp4-1"
with torch.inference_mode():
export_hf_checkpoint(
model, # The quantized model.
export_dir = export_dir, # The directory where the exported files will be stored.
)