Update config.json
Browse files- config.json +0 -33
config.json
CHANGED
|
@@ -94,39 +94,6 @@
|
|
| 94 |
"q_lora_rank": 1536,
|
| 95 |
"qk_nope_head_dim": 128,
|
| 96 |
"qk_rope_head_dim": 64,
|
| 97 |
-
"quantization_config": {
|
| 98 |
-
"config_groups": {
|
| 99 |
-
"group_0": {
|
| 100 |
-
"input_activations": null,
|
| 101 |
-
"output_activations": null,
|
| 102 |
-
"targets": [
|
| 103 |
-
"Linear"
|
| 104 |
-
],
|
| 105 |
-
"weights": {
|
| 106 |
-
"actorder": null,
|
| 107 |
-
"block_structure": null,
|
| 108 |
-
"dynamic": false,
|
| 109 |
-
"group_size": 32,
|
| 110 |
-
"num_bits": 4,
|
| 111 |
-
"observer": "minmax",
|
| 112 |
-
"observer_kwargs": {},
|
| 113 |
-
"strategy": "group",
|
| 114 |
-
"symmetric": true,
|
| 115 |
-
"type": "int"
|
| 116 |
-
}
|
| 117 |
-
}
|
| 118 |
-
},
|
| 119 |
-
"format": "pack-quantized",
|
| 120 |
-
"ignore": [
|
| 121 |
-
"lm_head",
|
| 122 |
-
"re:.*self_attn.*",
|
| 123 |
-
"re:.*shared_experts.*",
|
| 124 |
-
"re:.*mlp\\.(gate|up|gate_up|down)_proj.*"
|
| 125 |
-
],
|
| 126 |
-
"kv_cache_scheme": null,
|
| 127 |
-
"quant_method": "compressed-tensors",
|
| 128 |
-
"quantization_status": "compressed"
|
| 129 |
-
},
|
| 130 |
"remove_invalid_values": false,
|
| 131 |
"repetition_penalty": 1.0,
|
| 132 |
"return_dict": true,
|
|
|
|
| 94 |
"q_lora_rank": 1536,
|
| 95 |
"qk_nope_head_dim": 128,
|
| 96 |
"qk_rope_head_dim": 64,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
"remove_invalid_values": false,
|
| 98 |
"repetition_penalty": 1.0,
|
| 99 |
"return_dict": true,
|