deepseek-ocr-mbq-w4bit / quantization_report.json
SamMikaelson's picture
Upload MBQ quantized model (W4A8) - 65.05% size reduction
04e51cf
raw
history blame contribute delete
918 Bytes
{
"quantization_stats": {
"original_size_mb": 6362.53515625,
"quantized_size_mb": 2223.747215270996,
"size_reduction_mb": 4138.787940979004,
"size_reduction_percent": 65.04935280260761,
"compression_ratio": 2.8611773463085064,
"n_quantized_layers": 2342
},
"safetensors_size_mb": 3352.126941680908,
"mbq_config": {
"w_bit": 4,
"a_bit": 8,
"mixed_precision": true,
"sensitivity_metric": "hessian",
"calibration_samples": 128,
"preserve_ratio": 0.15
},
"model_info": {
"base_model": "deepseek-ai/DeepSeek-OCR",
"total_params": 3336106240,
"quantized_layers": 2342
},
"bit_allocation_summary": {
"8-bit": 351,
"4-bit": 1991
},
"files": {
"model.safetensors": "3352.13 MB",
"quantized_weights.pt": "Compressed quantized weights",
"config.json": "Model configuration",
"tokenizer files": "Tokenizer configuration"
}
}