File size: 381 Bytes
e14cc32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
{
  "bits": 4,
  "data_type": "int",
  "group_size": 128,
  "sym": true,
  "low_gpu_mem_usage": true,
  "autoround_version": "0.13.1",
  "block_name_to_quantize": "model.layers",
  "quant_method": "auto-round",
  "packing_format": "auto_round:auto_gptq",
  "extra_config": {
    ".*model\\.layers\\.[1-48]\\.mlp\\.gate.*": {
      "bits": 16,
      "data_type": "float"
    }
  }
}