Upload quantization_info.json with huggingface_hub
Browse files- quantization_info.json +17 -0
quantization_info.json
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"quantization_method": "hybrid_int8_fp16",
|
| 3 |
+
"linear_layers": "0/0 (INT8)",
|
| 4 |
+
"embedding_layers": "1/1 (FP16)",
|
| 5 |
+
"total_quantized": "1/1",
|
| 6 |
+
"original_model": "luca-deandrea/MNLP_M3_mcqa_model",
|
| 7 |
+
"quantization_timestamp": "2025-06-10 22:10:59",
|
| 8 |
+
"pytorch_version": "2.6.0+cu118",
|
| 9 |
+
"estimated_compression_ratio": "1.3x",
|
| 10 |
+
"estimated_size_mb": 1704.5302734375,
|
| 11 |
+
"original_size_mb": 2272.70703125,
|
| 12 |
+
"formats_included": [
|
| 13 |
+
"pytorch_bin_only"
|
| 14 |
+
],
|
| 15 |
+
"lighteval_compatible": true,
|
| 16 |
+
"notes": "Linear layers: INT8 quantization, Embedding layers: FP16 conversion"
|
| 17 |
+
}
|