Upload quantization_info.json with huggingface_hub
Browse files- quantization_info.json +19 -0
quantization_info.json
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_name": "Qwen/Qwen3-0.6B-Base",
|
| 3 |
+
"quantization_method": "bitsandbytes",
|
| 4 |
+
"config_name": "bnb_4bit_fp4",
|
| 5 |
+
"description": "4-bit FP4 with double quantization",
|
| 6 |
+
"expected_compression": "~8x",
|
| 7 |
+
"quantization_config": {
|
| 8 |
+
"load_in_4bit": true,
|
| 9 |
+
"load_in_8bit": false,
|
| 10 |
+
"bnb_4bit_quant_type": "fp4",
|
| 11 |
+
"bnb_4bit_use_double_quant": true,
|
| 12 |
+
"bnb_4bit_compute_dtype": "torch.float16",
|
| 13 |
+
"llm_int8_threshold": 6.0
|
| 14 |
+
},
|
| 15 |
+
"timestamp": "2025-06-10 21:17:24",
|
| 16 |
+
"loading_time_seconds": 4.421220541000366,
|
| 17 |
+
"memory_usage_gb": 0.5096735954284668,
|
| 18 |
+
"test_generation": "Hello, I am a student in the field of computer science and I"
|
| 19 |
+
}
|