Thomaschtl commited on
Commit
306e94d
·
verified ·
1 Parent(s): dd956ab

Upload quantization_info.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. quantization_info.json +17 -0
quantization_info.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "quantization_method": "hybrid_int8_fp16",
3
+ "linear_layers": "0/0 (INT8)",
4
+ "embedding_layers": "1/1 (FP16)",
5
+ "total_quantized": "1/1",
6
+ "original_model": "luca-deandrea/MNLP_M3_mcqa_model",
7
+ "quantization_timestamp": "2025-06-10 22:10:59",
8
+ "pytorch_version": "2.6.0+cu118",
9
+ "estimated_compression_ratio": "1.3x",
10
+ "estimated_size_mb": 1704.5302734375,
11
+ "original_size_mb": 2272.70703125,
12
+ "formats_included": [
13
+ "pytorch_bin_only"
14
+ ],
15
+ "lighteval_compatible": true,
16
+ "notes": "Linear layers: INT8 quantization, Embedding layers: FP16 conversion"
17
+ }