4bit_fp4 / quantization_info.json
Thomaschtl's picture
Upload quantization_info.json with huggingface_hub
9417e0e verified
{
"model_name": "luca-deandrea/MNLP_M3_mcqa_model",
"quantization_method": "bitsandbytes",
"config_name": "bnb_4bit_fp4",
"description": "4-bit FP4 with double quantization",
"expected_compression": "~8x",
"quantization_config": {
"load_in_4bit": true,
"load_in_8bit": false,
"bnb_4bit_quant_type": "fp4",
"bnb_4bit_use_double_quant": true,
"bnb_4bit_compute_dtype": "torch.float16",
"llm_int8_threshold": 6.0
},
"timestamp": "2025-06-10 21:31:28",
"loading_time_seconds": 4.605607509613037,
"memory_usage_gb": 0.5091643333435059,
"test_generation": "Hello, I am a bit confused about the difference between the terms of"
}