Upload folder using huggingface_hub
Browse files- log_rank0_1767135176.txt +71 -0
- log_rank0_1767135476.txt +0 -0
- log_rank0_1767270363.txt +0 -0
- model-00002-of-00006.safetensors +2 -2
- model-00003-of-00006.safetensors +2 -2
- model-00004-of-00006.safetensors +2 -2
- model-00005-of-00006.safetensors +2 -2
- model.safetensors.index.json +95 -95
- quant_strategy.json +233 -233
log_rank0_1767135176.txt
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[2025-12-30 23:52:56 root] (main.py 611): INFO Namespace(net=None, model='/sc/home/nianhui.guo/models/qwen_vl/models--Qwen--Qwen3-VL-32B-Instruct/snapshots/0cfaf48183f594c314753d30a4c4974bc75f3ccb', cache_dir='./cache', output_dir='./log/qwen3-vl-32b-instruct-w3.5a16g128-sample128-seqlen4096-search-layer-mix-gpu-batch4-hadmad-stage1/', save_dir='./log/qwen3-vl-32b-instruct-w3.5a16g128-sample128-seqlen4096-search-layer-mix-gpu-batch4-hadmad-stage1/', resume=None, real_quant=True, calib_dataset='tulu', nsamples=256, batch_size=4, mini_batch_size=4, seed=2, epochs=3, reverse_epochs=3, limit=-1, deactive_amp=False, clip_grad=1.0, seq_length=4096, gradient_checkpoint=False, blocks=0, reverse_block_qat=False, top_calibration=True, top_calibration_blocks=1, recurrent_calibration_iterations=1, bidirection_qat=True, tasks='', eval_ppl=True, num_fewshot=0, eval_seq_length=2048, wbits=3.5, abits=16, symmetric=False, group_size=128, act_group_size=-1, qat=False, shift=True, optimizer='adamw', wd=0, norm_lr=1e-05, scale_lr=1e-05, one_bit_weight_lr=1e-05, two_bit_weight_lr=2e-05, four_bit_weight_lr=5e-06, shift_lr=5e-06, post_lr=5e-06, channel_scaler_lr=5e-06, prepare_lr=5e-06, fp_lr=1e-06, aug_loss=True, layerwise_loss=False, attention_loss=False, kl_loss=True, lm_head_loss=False, search=True, measurement=True, strategy=False, measure_dir='./measure/qwen3-vl-32b-sample128-gpu/', strategy_dir='./strategy/qwen3-vl-32b-sample128-gpu/', nonuniform_layout=False, residual_ratio=0.1, base_rank_ratio=0.01, exl_v2_pack=False, mix_precision_layout='layer_mix', rotation=False, online_rotation=False, rotate_mode='hadamard', lora_adapter=False, attn_implementation='sdpa', local_rank=-1, master_port=25901)
|
| 2 |
+
[2025-12-30 23:53:31 root] (main.py 642): INFO ====================================================================================================
|
| 3 |
+
[2025-12-30 23:53:31 root] (main.py 643): INFO === START QUANTIZATION ===
|
| 4 |
+
[2025-12-30 23:53:31 root] (main.py 644): INFO ====================================================================================================
|
| 5 |
+
[2025-12-30 23:53:59 root] (omniquant.py 155): INFO ====================================================================================================
|
| 6 |
+
[2025-12-30 23:53:59 root] (omniquant.py 156): INFO STARTING OMNIQUANT WITH MIXED-PRECISION SEARCH FOR QWEN3VL
|
| 7 |
+
[2025-12-30 23:53:59 root] (omniquant.py 157): INFO ====================================================================================================
|
| 8 |
+
[2025-12-30 23:53:59 root] (omniquant.py 168): INFO [CONFIG] Disabled use_cache in text_config (was: True)
|
| 9 |
+
[2025-12-30 23:53:59 root] (omniquant.py 186): INFO ====================================================================================================
|
| 10 |
+
[2025-12-30 23:53:59 root] (omniquant.py 187): INFO [MODEL] Detected Qwen3VL - Quantizing TEXT BRANCH ONLY
|
| 11 |
+
[2025-12-30 23:53:59 root] (omniquant.py 188): INFO [MODEL] Vision encoder will remain in FP16
|
| 12 |
+
[2025-12-30 23:53:59 root] (omniquant.py 189): INFO ====================================================================================================
|
| 13 |
+
[2025-12-30 23:54:00 root] (omniquant.py 209): INFO [MODEL] Language model: 64 layers
|
| 14 |
+
[2025-12-30 23:54:00 root] (omniquant.py 210): INFO [MODEL] Hidden size: 5120
|
| 15 |
+
[2025-12-30 23:54:00 root] (omniquant.py 211): INFO [MODEL] Num attention heads: 64
|
| 16 |
+
[2025-12-30 23:54:00 root] (omniquant.py 212): INFO [MODEL] Attention implementation: sdpa
|
| 17 |
+
[2025-12-30 23:54:00 root] (omniquant.py 218): INFO [DEVICE] Using: cuda
|
| 18 |
+
[2025-12-30 23:54:01 root] (omniquant.py 229): INFO [TRAINING] Using FP16 with AMP
|
| 19 |
+
[2025-12-30 23:54:01 root] (omniquant.py 234): INFO ====================================================================================================
|
| 20 |
+
[2025-12-30 23:54:01 root] (omniquant.py 235): INFO [INPUT CAPTURE] Starting...
|
| 21 |
+
[2025-12-30 23:54:01 root] (omniquant.py 236): INFO ====================================================================================================
|
| 22 |
+
[2025-12-30 23:54:02 root] (omniquant.py 239): INFO [INPUT CAPTURE] Allocated inps: torch.Size([256, 4096, 5120])
|
| 23 |
+
[2025-12-30 23:55:51 root] (omniquant.py 270): INFO [INPUT CAPTURE] Captured 256 samples
|
| 24 |
+
[2025-12-30 23:55:51 root] (omniquant.py 271): INFO [INPUT CAPTURE] attention_mask: torch.Size([1, 1, 4096, 4096])
|
| 25 |
+
[2025-12-30 23:55:51 root] (omniquant.py 272): INFO [INPUT CAPTURE] position_ids: torch.Size([1, 4096])
|
| 26 |
+
[2025-12-30 23:55:57 root] (omniquant.py 287): INFO ====================================================================================================
|
| 27 |
+
[2025-12-30 23:55:57 root] (omniquant.py 288): INFO [CALIBRATION DATA] Preparing...
|
| 28 |
+
[2025-12-30 23:55:57 root] (omniquant.py 289): INFO ====================================================================================================
|
| 29 |
+
[2025-12-30 23:56:01 root] (omniquant.py 295): INFO [CALIBRATION DATA] quant_inps: torch.Size([256, 4096, 5120])
|
| 30 |
+
[2025-12-30 23:56:01 root] (omniquant.py 296): INFO [CALIBRATION DATA] fp_inps: torch.Size([256, 4096, 5120])
|
| 31 |
+
[2025-12-30 23:56:01 root] (omniquant.py 298): INFO [CALIBRATION DATA] fp_inps_2: torch.Size([256, 4096, 5120])
|
| 32 |
+
[2025-12-30 23:56:01 root] (omniquant.py 320): INFO [ATTENTION MASK] Single sample: torch.Size([1, 1, 4096, 4096])
|
| 33 |
+
[2025-12-30 23:56:01 root] (omniquant.py 321): INFO [ATTENTION MASK] Batch (mini_batch_size=4): torch.Size([4, 1, 4096, 4096])
|
| 34 |
+
[2025-12-30 23:56:01 root] (omniquant.py 332): INFO ====================================================================================================
|
| 35 |
+
[2025-12-30 23:56:01 root] (omniquant.py 333): INFO [POSITION IDS] Processing...
|
| 36 |
+
[2025-12-30 23:56:01 root] (omniquant.py 334): INFO ====================================================================================================
|
| 37 |
+
[2025-12-30 23:56:01 root] (omniquant.py 337): INFO [POSITION IDS] Captured: torch.Size([1, 4096])
|
| 38 |
+
[2025-12-30 23:56:01 root] (omniquant.py 346): INFO [POSITION IDS] Base shape (will expand per batch): torch.Size([1, 4096])
|
| 39 |
+
[2025-12-30 23:56:01 root] (omniquant.py 350): INFO [ROTARY EMB] Module type: Qwen3VLTextRotaryEmbedding
|
| 40 |
+
[2025-12-30 23:56:01 root] (omniquant.py 356): INFO [POSITION EMBEDDINGS] cos shape: torch.Size([1, 4096, 128])
|
| 41 |
+
[2025-12-30 23:56:01 root] (omniquant.py 357): INFO [POSITION EMBEDDINGS] sin shape: torch.Size([1, 4096, 128])
|
| 42 |
+
[2025-12-30 23:56:02 root] (omniquant.py 370): INFO ====================================================================================================
|
| 43 |
+
[2025-12-30 23:56:02 root] (omniquant.py 371): INFO [SEARCH] Starting mixed-precision search...
|
| 44 |
+
[2025-12-30 23:56:02 root] (omniquant.py 372): INFO ====================================================================================================
|
| 45 |
+
[2025-12-30 23:56:02 root] (omniquant.py 387): INFO [SEARCH] Loading measurements from file...
|
| 46 |
+
[2025-12-30 23:56:02 root] (omniquant.py 391): INFO [SEARCH] Loaded measurements from ./measure/qwen3-vl-32b-sample128-gpu/
|
| 47 |
+
[2025-12-30 23:56:02 root] (omniquant.py 411): INFO [SEARCH] Running optimization phase...
|
| 48 |
+
[2025-12-30 23:56:08 root] (omniquant.py 422): INFO [SEARCH] Optimization complete
|
| 49 |
+
[2025-12-30 23:56:08 root] (omniquant.py 461): INFO ====================================================================================================
|
| 50 |
+
[2025-12-30 23:56:08 root] (omniquant.py 462): INFO [QUANTIZATION] Starting layer-wise quantization for 64 layers
|
| 51 |
+
[2025-12-30 23:56:08 root] (omniquant.py 463): INFO ====================================================================================================
|
| 52 |
+
[2025-12-30 23:56:08 root] (omniquant.py 473): INFO
|
| 53 |
+
====================================================================================================
|
| 54 |
+
[2025-12-30 23:56:08 root] (omniquant.py 474): INFO [LAYER 0/63] Starting quantization
|
| 55 |
+
[2025-12-30 23:56:08 root] (omniquant.py 475): INFO ====================================================================================================
|
| 56 |
+
[2025-12-30 23:56:10 root] (omniquant.py 488): INFO [LAYER 0] Applying mixed-precision strategy...
|
| 57 |
+
[2025-12-30 23:56:10 root] (omniquant.py 517): INFO model.language_model.layers.0.self_attn.q_proj: QParams(32, [4], [1], 4, 32)
|
| 58 |
+
[2025-12-30 23:56:10 root] (omniquant.py 517): INFO model.language_model.layers.0.self_attn.k_proj: QParams(32, [4], [1], 4, 32)
|
| 59 |
+
[2025-12-30 23:56:10 root] (omniquant.py 517): INFO model.language_model.layers.0.self_attn.v_proj: QParams(32, [4], [1], 4, 32)
|
| 60 |
+
[2025-12-30 23:56:10 root] (omniquant.py 517): INFO model.language_model.layers.0.self_attn.o_proj: QParams(32, [4], [1], 4, 32)
|
| 61 |
+
[2025-12-30 23:56:11 root] (omniquant.py 517): INFO model.language_model.layers.0.mlp.gate_proj: QParams(32, [4], [1], 4, 32)
|
| 62 |
+
[2025-12-30 23:56:11 root] (omniquant.py 517): INFO model.language_model.layers.0.mlp.up_proj: QParams(32, [4], [1], 4, 32)
|
| 63 |
+
[2025-12-30 23:56:11 root] (omniquant.py 517): INFO model.language_model.layers.0.mlp.down_proj: QParams(32, [4], [1], 4, 32)
|
| 64 |
+
[2025-12-30 23:56:12 root] (omniquant.py 534): INFO [LAYER 0] BPW: 5.00
|
| 65 |
+
[2025-12-30 23:56:12 root] (omniquant.py 539): INFO [LAYER 0] Computing FP reference outputs...
|
| 66 |
+
[2025-12-30 23:56:12 root] (omniquant.py 559): INFO [LAYER 0] FP reference sample 0:
|
| 67 |
+
[2025-12-30 23:56:12 root] (omniquant.py 560): INFO fp_inp_batch: torch.Size([1, 4096, 5120])
|
| 68 |
+
[2025-12-30 23:56:12 root] (omniquant.py 561): INFO batch_position_ids: torch.Size([1, 4096])
|
| 69 |
+
[2025-12-30 23:56:12 root] (omniquant.py 567): INFO position_embeddings[0] (cos): torch.Size([1, 4096, 128])
|
| 70 |
+
[2025-12-30 23:56:12 root] (omniquant.py 568): INFO position_embeddings[1] (sin): torch.Size([1, 4096, 128])
|
| 71 |
+
[2025-12-30 23:56:12 root] (omniquant.py 587): INFO fp_hidden_states: torch.Size([1, 4096, 5120])
|
log_rank0_1767135476.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
log_rank0_1767270363.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
model-00002-of-00006.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6b5484e90778f0393ec54c0c5ea0b30c8ce8d240b91c90c2f7b9655645da0013
|
| 3 |
+
size 4989486632
|
model-00003-of-00006.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ab8a3b79c0f9a632967d5e1e3dcb001e603838225789658bdc4e986b8752a330
|
| 3 |
+
size 4989825872
|
model-00004-of-00006.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c0c48c5c661d230ab351e18ea84272492dbcf3dfd8df7c2b2a8936fb0623f340
|
| 3 |
+
size 4984013472
|
model-00005-of-00006.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f01c987898ac5ec7898287607f3457efdaee8098c022d447ad18235baf7ffc7a
|
| 3 |
+
size 3192028080
|
model.safetensors.index.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"metadata": {
|
| 3 |
"total_parameters": 2151768304,
|
| 4 |
-
"total_size":
|
| 5 |
},
|
| 6 |
"weight_map": {
|
| 7 |
"lm_head.weight": "model-00006-of-00006.safetensors",
|
|
@@ -864,12 +864,12 @@
|
|
| 864 |
"model.language_model.layers.28.self_attn.v_proj.qweight": "model-00003-of-00006.safetensors",
|
| 865 |
"model.language_model.layers.28.self_attn.v_proj.scales": "model-00003-of-00006.safetensors",
|
| 866 |
"model.language_model.layers.28.self_attn.v_proj.zeros": "model-00003-of-00006.safetensors",
|
| 867 |
-
"model.language_model.layers.29.input_layernorm.weight": "model-
|
| 868 |
"model.language_model.layers.29.mlp.down_proj.channel_scale": "model-00003-of-00006.safetensors",
|
| 869 |
"model.language_model.layers.29.mlp.down_proj.q_perm": "model-00003-of-00006.safetensors",
|
| 870 |
-
"model.language_model.layers.29.mlp.down_proj.qweight": "model-
|
| 871 |
-
"model.language_model.layers.29.mlp.down_proj.scales": "model-
|
| 872 |
-
"model.language_model.layers.29.mlp.down_proj.zeros": "model-
|
| 873 |
"model.language_model.layers.29.mlp.gate_proj.channel_scale": "model-00003-of-00006.safetensors",
|
| 874 |
"model.language_model.layers.29.mlp.gate_proj.q_perm": "model-00003-of-00006.safetensors",
|
| 875 |
"model.language_model.layers.29.mlp.gate_proj.qweight": "model-00003-of-00006.safetensors",
|
|
@@ -880,7 +880,7 @@
|
|
| 880 |
"model.language_model.layers.29.mlp.up_proj.qweight": "model-00003-of-00006.safetensors",
|
| 881 |
"model.language_model.layers.29.mlp.up_proj.scales": "model-00003-of-00006.safetensors",
|
| 882 |
"model.language_model.layers.29.mlp.up_proj.zeros": "model-00003-of-00006.safetensors",
|
| 883 |
-
"model.language_model.layers.29.post_attention_layernorm.weight": "model-
|
| 884 |
"model.language_model.layers.29.self_attn.k_norm.weight": "model-00003-of-00006.safetensors",
|
| 885 |
"model.language_model.layers.29.self_attn.k_proj.channel_scale": "model-00003-of-00006.safetensors",
|
| 886 |
"model.language_model.layers.29.self_attn.k_proj.q_perm": "model-00003-of-00006.safetensors",
|
|
@@ -960,27 +960,27 @@
|
|
| 960 |
"model.language_model.layers.30.mlp.up_proj.zeros": "model-00004-of-00006.safetensors",
|
| 961 |
"model.language_model.layers.30.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
|
| 962 |
"model.language_model.layers.30.self_attn.k_norm.weight": "model-00004-of-00006.safetensors",
|
| 963 |
-
"model.language_model.layers.30.self_attn.k_proj.channel_scale": "model-
|
| 964 |
-
"model.language_model.layers.30.self_attn.k_proj.q_perm": "model-
|
| 965 |
-
"model.language_model.layers.30.self_attn.k_proj.qweight": "model-
|
| 966 |
-
"model.language_model.layers.30.self_attn.k_proj.scales": "model-
|
| 967 |
-
"model.language_model.layers.30.self_attn.k_proj.zeros": "model-
|
| 968 |
-
"model.language_model.layers.30.self_attn.o_proj.channel_scale": "model-
|
| 969 |
-
"model.language_model.layers.30.self_attn.o_proj.q_perm": "model-
|
| 970 |
"model.language_model.layers.30.self_attn.o_proj.qweight": "model-00004-of-00006.safetensors",
|
| 971 |
"model.language_model.layers.30.self_attn.o_proj.scales": "model-00004-of-00006.safetensors",
|
| 972 |
"model.language_model.layers.30.self_attn.o_proj.zeros": "model-00004-of-00006.safetensors",
|
| 973 |
"model.language_model.layers.30.self_attn.q_norm.weight": "model-00004-of-00006.safetensors",
|
| 974 |
-
"model.language_model.layers.30.self_attn.q_proj.channel_scale": "model-
|
| 975 |
-
"model.language_model.layers.30.self_attn.q_proj.q_perm": "model-
|
| 976 |
-
"model.language_model.layers.30.self_attn.q_proj.qweight": "model-
|
| 977 |
-
"model.language_model.layers.30.self_attn.q_proj.scales": "model-
|
| 978 |
-
"model.language_model.layers.30.self_attn.q_proj.zeros": "model-
|
| 979 |
-
"model.language_model.layers.30.self_attn.v_proj.channel_scale": "model-
|
| 980 |
-
"model.language_model.layers.30.self_attn.v_proj.q_perm": "model-
|
| 981 |
-
"model.language_model.layers.30.self_attn.v_proj.qweight": "model-
|
| 982 |
-
"model.language_model.layers.30.self_attn.v_proj.scales": "model-
|
| 983 |
-
"model.language_model.layers.30.self_attn.v_proj.zeros": "model-
|
| 984 |
"model.language_model.layers.31.input_layernorm.weight": "model-00004-of-00006.safetensors",
|
| 985 |
"model.language_model.layers.31.mlp.down_proj.channel_scale": "model-00004-of-00006.safetensors",
|
| 986 |
"model.language_model.layers.31.mlp.down_proj.q_perm": "model-00004-of-00006.safetensors",
|
|
@@ -1722,12 +1722,12 @@
|
|
| 1722 |
"model.language_model.layers.48.self_attn.v_proj.qweight": "model-00004-of-00006.safetensors",
|
| 1723 |
"model.language_model.layers.48.self_attn.v_proj.scales": "model-00004-of-00006.safetensors",
|
| 1724 |
"model.language_model.layers.48.self_attn.v_proj.zeros": "model-00004-of-00006.safetensors",
|
| 1725 |
-
"model.language_model.layers.49.input_layernorm.weight": "model-
|
| 1726 |
"model.language_model.layers.49.mlp.down_proj.channel_scale": "model-00004-of-00006.safetensors",
|
| 1727 |
"model.language_model.layers.49.mlp.down_proj.q_perm": "model-00004-of-00006.safetensors",
|
| 1728 |
-
"model.language_model.layers.49.mlp.down_proj.qweight": "model-
|
| 1729 |
-
"model.language_model.layers.49.mlp.down_proj.scales": "model-
|
| 1730 |
-
"model.language_model.layers.49.mlp.down_proj.zeros": "model-
|
| 1731 |
"model.language_model.layers.49.mlp.gate_proj.channel_scale": "model-00004-of-00006.safetensors",
|
| 1732 |
"model.language_model.layers.49.mlp.gate_proj.q_perm": "model-00004-of-00006.safetensors",
|
| 1733 |
"model.language_model.layers.49.mlp.gate_proj.qweight": "model-00004-of-00006.safetensors",
|
|
@@ -1738,7 +1738,7 @@
|
|
| 1738 |
"model.language_model.layers.49.mlp.up_proj.qweight": "model-00004-of-00006.safetensors",
|
| 1739 |
"model.language_model.layers.49.mlp.up_proj.scales": "model-00004-of-00006.safetensors",
|
| 1740 |
"model.language_model.layers.49.mlp.up_proj.zeros": "model-00004-of-00006.safetensors",
|
| 1741 |
-
"model.language_model.layers.49.post_attention_layernorm.weight": "model-
|
| 1742 |
"model.language_model.layers.49.self_attn.k_norm.weight": "model-00004-of-00006.safetensors",
|
| 1743 |
"model.language_model.layers.49.self_attn.k_proj.channel_scale": "model-00004-of-00006.safetensors",
|
| 1744 |
"model.language_model.layers.49.self_attn.k_proj.q_perm": "model-00004-of-00006.safetensors",
|
|
@@ -1801,44 +1801,44 @@
|
|
| 1801 |
"model.language_model.layers.5.self_attn.v_proj.scales": "model-00002-of-00006.safetensors",
|
| 1802 |
"model.language_model.layers.5.self_attn.v_proj.zeros": "model-00002-of-00006.safetensors",
|
| 1803 |
"model.language_model.layers.50.input_layernorm.weight": "model-00005-of-00006.safetensors",
|
| 1804 |
-
"model.language_model.layers.50.mlp.down_proj.channel_scale": "model-
|
| 1805 |
-
"model.language_model.layers.50.mlp.down_proj.q_perm": "model-
|
| 1806 |
"model.language_model.layers.50.mlp.down_proj.qweight": "model-00005-of-00006.safetensors",
|
| 1807 |
"model.language_model.layers.50.mlp.down_proj.scales": "model-00005-of-00006.safetensors",
|
| 1808 |
"model.language_model.layers.50.mlp.down_proj.zeros": "model-00005-of-00006.safetensors",
|
| 1809 |
-
"model.language_model.layers.50.mlp.gate_proj.channel_scale": "model-
|
| 1810 |
-
"model.language_model.layers.50.mlp.gate_proj.q_perm": "model-
|
| 1811 |
-
"model.language_model.layers.50.mlp.gate_proj.qweight": "model-
|
| 1812 |
-
"model.language_model.layers.50.mlp.gate_proj.scales": "model-
|
| 1813 |
-
"model.language_model.layers.50.mlp.gate_proj.zeros": "model-
|
| 1814 |
-
"model.language_model.layers.50.mlp.up_proj.channel_scale": "model-
|
| 1815 |
-
"model.language_model.layers.50.mlp.up_proj.q_perm": "model-
|
| 1816 |
-
"model.language_model.layers.50.mlp.up_proj.qweight": "model-
|
| 1817 |
-
"model.language_model.layers.50.mlp.up_proj.scales": "model-
|
| 1818 |
-
"model.language_model.layers.50.mlp.up_proj.zeros": "model-
|
| 1819 |
"model.language_model.layers.50.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
|
| 1820 |
-
"model.language_model.layers.50.self_attn.k_norm.weight": "model-
|
| 1821 |
-
"model.language_model.layers.50.self_attn.k_proj.channel_scale": "model-
|
| 1822 |
-
"model.language_model.layers.50.self_attn.k_proj.q_perm": "model-
|
| 1823 |
-
"model.language_model.layers.50.self_attn.k_proj.qweight": "model-
|
| 1824 |
-
"model.language_model.layers.50.self_attn.k_proj.scales": "model-
|
| 1825 |
-
"model.language_model.layers.50.self_attn.k_proj.zeros": "model-
|
| 1826 |
-
"model.language_model.layers.50.self_attn.o_proj.channel_scale": "model-
|
| 1827 |
-
"model.language_model.layers.50.self_attn.o_proj.q_perm": "model-
|
| 1828 |
-
"model.language_model.layers.50.self_attn.o_proj.qweight": "model-
|
| 1829 |
-
"model.language_model.layers.50.self_attn.o_proj.scales": "model-
|
| 1830 |
-
"model.language_model.layers.50.self_attn.o_proj.zeros": "model-
|
| 1831 |
-
"model.language_model.layers.50.self_attn.q_norm.weight": "model-
|
| 1832 |
-
"model.language_model.layers.50.self_attn.q_proj.channel_scale": "model-
|
| 1833 |
-
"model.language_model.layers.50.self_attn.q_proj.q_perm": "model-
|
| 1834 |
-
"model.language_model.layers.50.self_attn.q_proj.qweight": "model-
|
| 1835 |
-
"model.language_model.layers.50.self_attn.q_proj.scales": "model-
|
| 1836 |
-
"model.language_model.layers.50.self_attn.q_proj.zeros": "model-
|
| 1837 |
-
"model.language_model.layers.50.self_attn.v_proj.channel_scale": "model-
|
| 1838 |
-
"model.language_model.layers.50.self_attn.v_proj.q_perm": "model-
|
| 1839 |
-
"model.language_model.layers.50.self_attn.v_proj.qweight": "model-
|
| 1840 |
-
"model.language_model.layers.50.self_attn.v_proj.scales": "model-
|
| 1841 |
-
"model.language_model.layers.50.self_attn.v_proj.zeros": "model-
|
| 1842 |
"model.language_model.layers.51.input_layernorm.weight": "model-00005-of-00006.safetensors",
|
| 1843 |
"model.language_model.layers.51.mlp.down_proj.channel_scale": "model-00005-of-00006.safetensors",
|
| 1844 |
"model.language_model.layers.51.mlp.down_proj.q_perm": "model-00005-of-00006.safetensors",
|
|
@@ -2190,12 +2190,12 @@
|
|
| 2190 |
"model.language_model.layers.59.self_attn.v_proj.qweight": "model-00005-of-00006.safetensors",
|
| 2191 |
"model.language_model.layers.59.self_attn.v_proj.scales": "model-00005-of-00006.safetensors",
|
| 2192 |
"model.language_model.layers.59.self_attn.v_proj.zeros": "model-00005-of-00006.safetensors",
|
| 2193 |
-
"model.language_model.layers.6.input_layernorm.weight": "model-
|
| 2194 |
-
"model.language_model.layers.6.mlp.down_proj.channel_scale": "model-
|
| 2195 |
-
"model.language_model.layers.6.mlp.down_proj.q_perm": "model-
|
| 2196 |
-
"model.language_model.layers.6.mlp.down_proj.qweight": "model-
|
| 2197 |
-
"model.language_model.layers.6.mlp.down_proj.scales": "model-
|
| 2198 |
-
"model.language_model.layers.6.mlp.down_proj.zeros": "model-
|
| 2199 |
"model.language_model.layers.6.mlp.gate_proj.channel_scale": "model-00002-of-00006.safetensors",
|
| 2200 |
"model.language_model.layers.6.mlp.gate_proj.q_perm": "model-00002-of-00006.safetensors",
|
| 2201 |
"model.language_model.layers.6.mlp.gate_proj.qweight": "model-00002-of-00006.safetensors",
|
|
@@ -2204,9 +2204,9 @@
|
|
| 2204 |
"model.language_model.layers.6.mlp.up_proj.channel_scale": "model-00002-of-00006.safetensors",
|
| 2205 |
"model.language_model.layers.6.mlp.up_proj.q_perm": "model-00002-of-00006.safetensors",
|
| 2206 |
"model.language_model.layers.6.mlp.up_proj.qweight": "model-00002-of-00006.safetensors",
|
| 2207 |
-
"model.language_model.layers.6.mlp.up_proj.scales": "model-
|
| 2208 |
-
"model.language_model.layers.6.mlp.up_proj.zeros": "model-
|
| 2209 |
-
"model.language_model.layers.6.post_attention_layernorm.weight": "model-
|
| 2210 |
"model.language_model.layers.6.self_attn.k_norm.weight": "model-00002-of-00006.safetensors",
|
| 2211 |
"model.language_model.layers.6.self_attn.k_proj.channel_scale": "model-00002-of-00006.safetensors",
|
| 2212 |
"model.language_model.layers.6.self_attn.k_proj.q_perm": "model-00002-of-00006.safetensors",
|
|
@@ -2391,8 +2391,8 @@
|
|
| 2391 |
"model.language_model.layers.7.mlp.down_proj.qweight": "model-00003-of-00006.safetensors",
|
| 2392 |
"model.language_model.layers.7.mlp.down_proj.scales": "model-00003-of-00006.safetensors",
|
| 2393 |
"model.language_model.layers.7.mlp.down_proj.zeros": "model-00003-of-00006.safetensors",
|
| 2394 |
-
"model.language_model.layers.7.mlp.gate_proj.channel_scale": "model-
|
| 2395 |
-
"model.language_model.layers.7.mlp.gate_proj.q_perm": "model-
|
| 2396 |
"model.language_model.layers.7.mlp.gate_proj.qweight": "model-00003-of-00006.safetensors",
|
| 2397 |
"model.language_model.layers.7.mlp.gate_proj.scales": "model-00003-of-00006.safetensors",
|
| 2398 |
"model.language_model.layers.7.mlp.gate_proj.zeros": "model-00003-of-00006.safetensors",
|
|
@@ -2402,28 +2402,28 @@
|
|
| 2402 |
"model.language_model.layers.7.mlp.up_proj.scales": "model-00003-of-00006.safetensors",
|
| 2403 |
"model.language_model.layers.7.mlp.up_proj.zeros": "model-00003-of-00006.safetensors",
|
| 2404 |
"model.language_model.layers.7.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
|
| 2405 |
-
"model.language_model.layers.7.self_attn.k_norm.weight": "model-
|
| 2406 |
-
"model.language_model.layers.7.self_attn.k_proj.channel_scale": "model-
|
| 2407 |
-
"model.language_model.layers.7.self_attn.k_proj.q_perm": "model-
|
| 2408 |
-
"model.language_model.layers.7.self_attn.k_proj.qweight": "model-
|
| 2409 |
-
"model.language_model.layers.7.self_attn.k_proj.scales": "model-
|
| 2410 |
-
"model.language_model.layers.7.self_attn.k_proj.zeros": "model-
|
| 2411 |
-
"model.language_model.layers.7.self_attn.o_proj.channel_scale": "model-
|
| 2412 |
-
"model.language_model.layers.7.self_attn.o_proj.q_perm": "model-
|
| 2413 |
-
"model.language_model.layers.7.self_attn.o_proj.qweight": "model-
|
| 2414 |
-
"model.language_model.layers.7.self_attn.o_proj.scales": "model-
|
| 2415 |
-
"model.language_model.layers.7.self_attn.o_proj.zeros": "model-
|
| 2416 |
-
"model.language_model.layers.7.self_attn.q_norm.weight": "model-
|
| 2417 |
-
"model.language_model.layers.7.self_attn.q_proj.channel_scale": "model-
|
| 2418 |
-
"model.language_model.layers.7.self_attn.q_proj.q_perm": "model-
|
| 2419 |
-
"model.language_model.layers.7.self_attn.q_proj.qweight": "model-
|
| 2420 |
-
"model.language_model.layers.7.self_attn.q_proj.scales": "model-
|
| 2421 |
-
"model.language_model.layers.7.self_attn.q_proj.zeros": "model-
|
| 2422 |
-
"model.language_model.layers.7.self_attn.v_proj.channel_scale": "model-
|
| 2423 |
-
"model.language_model.layers.7.self_attn.v_proj.q_perm": "model-
|
| 2424 |
-
"model.language_model.layers.7.self_attn.v_proj.qweight": "model-
|
| 2425 |
-
"model.language_model.layers.7.self_attn.v_proj.scales": "model-
|
| 2426 |
-
"model.language_model.layers.7.self_attn.v_proj.zeros": "model-
|
| 2427 |
"model.language_model.layers.8.input_layernorm.weight": "model-00003-of-00006.safetensors",
|
| 2428 |
"model.language_model.layers.8.mlp.down_proj.channel_scale": "model-00003-of-00006.safetensors",
|
| 2429 |
"model.language_model.layers.8.mlp.down_proj.q_perm": "model-00003-of-00006.safetensors",
|
|
|
|
| 1 |
{
|
| 2 |
"metadata": {
|
| 3 |
"total_parameters": 2151768304,
|
| 4 |
+
"total_size": 23647749056
|
| 5 |
},
|
| 6 |
"weight_map": {
|
| 7 |
"lm_head.weight": "model-00006-of-00006.safetensors",
|
|
|
|
| 864 |
"model.language_model.layers.28.self_attn.v_proj.qweight": "model-00003-of-00006.safetensors",
|
| 865 |
"model.language_model.layers.28.self_attn.v_proj.scales": "model-00003-of-00006.safetensors",
|
| 866 |
"model.language_model.layers.28.self_attn.v_proj.zeros": "model-00003-of-00006.safetensors",
|
| 867 |
+
"model.language_model.layers.29.input_layernorm.weight": "model-00003-of-00006.safetensors",
|
| 868 |
"model.language_model.layers.29.mlp.down_proj.channel_scale": "model-00003-of-00006.safetensors",
|
| 869 |
"model.language_model.layers.29.mlp.down_proj.q_perm": "model-00003-of-00006.safetensors",
|
| 870 |
+
"model.language_model.layers.29.mlp.down_proj.qweight": "model-00003-of-00006.safetensors",
|
| 871 |
+
"model.language_model.layers.29.mlp.down_proj.scales": "model-00003-of-00006.safetensors",
|
| 872 |
+
"model.language_model.layers.29.mlp.down_proj.zeros": "model-00003-of-00006.safetensors",
|
| 873 |
"model.language_model.layers.29.mlp.gate_proj.channel_scale": "model-00003-of-00006.safetensors",
|
| 874 |
"model.language_model.layers.29.mlp.gate_proj.q_perm": "model-00003-of-00006.safetensors",
|
| 875 |
"model.language_model.layers.29.mlp.gate_proj.qweight": "model-00003-of-00006.safetensors",
|
|
|
|
| 880 |
"model.language_model.layers.29.mlp.up_proj.qweight": "model-00003-of-00006.safetensors",
|
| 881 |
"model.language_model.layers.29.mlp.up_proj.scales": "model-00003-of-00006.safetensors",
|
| 882 |
"model.language_model.layers.29.mlp.up_proj.zeros": "model-00003-of-00006.safetensors",
|
| 883 |
+
"model.language_model.layers.29.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
|
| 884 |
"model.language_model.layers.29.self_attn.k_norm.weight": "model-00003-of-00006.safetensors",
|
| 885 |
"model.language_model.layers.29.self_attn.k_proj.channel_scale": "model-00003-of-00006.safetensors",
|
| 886 |
"model.language_model.layers.29.self_attn.k_proj.q_perm": "model-00003-of-00006.safetensors",
|
|
|
|
| 960 |
"model.language_model.layers.30.mlp.up_proj.zeros": "model-00004-of-00006.safetensors",
|
| 961 |
"model.language_model.layers.30.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
|
| 962 |
"model.language_model.layers.30.self_attn.k_norm.weight": "model-00004-of-00006.safetensors",
|
| 963 |
+
"model.language_model.layers.30.self_attn.k_proj.channel_scale": "model-00003-of-00006.safetensors",
|
| 964 |
+
"model.language_model.layers.30.self_attn.k_proj.q_perm": "model-00003-of-00006.safetensors",
|
| 965 |
+
"model.language_model.layers.30.self_attn.k_proj.qweight": "model-00003-of-00006.safetensors",
|
| 966 |
+
"model.language_model.layers.30.self_attn.k_proj.scales": "model-00003-of-00006.safetensors",
|
| 967 |
+
"model.language_model.layers.30.self_attn.k_proj.zeros": "model-00003-of-00006.safetensors",
|
| 968 |
+
"model.language_model.layers.30.self_attn.o_proj.channel_scale": "model-00003-of-00006.safetensors",
|
| 969 |
+
"model.language_model.layers.30.self_attn.o_proj.q_perm": "model-00003-of-00006.safetensors",
|
| 970 |
"model.language_model.layers.30.self_attn.o_proj.qweight": "model-00004-of-00006.safetensors",
|
| 971 |
"model.language_model.layers.30.self_attn.o_proj.scales": "model-00004-of-00006.safetensors",
|
| 972 |
"model.language_model.layers.30.self_attn.o_proj.zeros": "model-00004-of-00006.safetensors",
|
| 973 |
"model.language_model.layers.30.self_attn.q_norm.weight": "model-00004-of-00006.safetensors",
|
| 974 |
+
"model.language_model.layers.30.self_attn.q_proj.channel_scale": "model-00003-of-00006.safetensors",
|
| 975 |
+
"model.language_model.layers.30.self_attn.q_proj.q_perm": "model-00003-of-00006.safetensors",
|
| 976 |
+
"model.language_model.layers.30.self_attn.q_proj.qweight": "model-00003-of-00006.safetensors",
|
| 977 |
+
"model.language_model.layers.30.self_attn.q_proj.scales": "model-00003-of-00006.safetensors",
|
| 978 |
+
"model.language_model.layers.30.self_attn.q_proj.zeros": "model-00003-of-00006.safetensors",
|
| 979 |
+
"model.language_model.layers.30.self_attn.v_proj.channel_scale": "model-00003-of-00006.safetensors",
|
| 980 |
+
"model.language_model.layers.30.self_attn.v_proj.q_perm": "model-00003-of-00006.safetensors",
|
| 981 |
+
"model.language_model.layers.30.self_attn.v_proj.qweight": "model-00003-of-00006.safetensors",
|
| 982 |
+
"model.language_model.layers.30.self_attn.v_proj.scales": "model-00003-of-00006.safetensors",
|
| 983 |
+
"model.language_model.layers.30.self_attn.v_proj.zeros": "model-00003-of-00006.safetensors",
|
| 984 |
"model.language_model.layers.31.input_layernorm.weight": "model-00004-of-00006.safetensors",
|
| 985 |
"model.language_model.layers.31.mlp.down_proj.channel_scale": "model-00004-of-00006.safetensors",
|
| 986 |
"model.language_model.layers.31.mlp.down_proj.q_perm": "model-00004-of-00006.safetensors",
|
|
|
|
| 1722 |
"model.language_model.layers.48.self_attn.v_proj.qweight": "model-00004-of-00006.safetensors",
|
| 1723 |
"model.language_model.layers.48.self_attn.v_proj.scales": "model-00004-of-00006.safetensors",
|
| 1724 |
"model.language_model.layers.48.self_attn.v_proj.zeros": "model-00004-of-00006.safetensors",
|
| 1725 |
+
"model.language_model.layers.49.input_layernorm.weight": "model-00004-of-00006.safetensors",
|
| 1726 |
"model.language_model.layers.49.mlp.down_proj.channel_scale": "model-00004-of-00006.safetensors",
|
| 1727 |
"model.language_model.layers.49.mlp.down_proj.q_perm": "model-00004-of-00006.safetensors",
|
| 1728 |
+
"model.language_model.layers.49.mlp.down_proj.qweight": "model-00004-of-00006.safetensors",
|
| 1729 |
+
"model.language_model.layers.49.mlp.down_proj.scales": "model-00004-of-00006.safetensors",
|
| 1730 |
+
"model.language_model.layers.49.mlp.down_proj.zeros": "model-00004-of-00006.safetensors",
|
| 1731 |
"model.language_model.layers.49.mlp.gate_proj.channel_scale": "model-00004-of-00006.safetensors",
|
| 1732 |
"model.language_model.layers.49.mlp.gate_proj.q_perm": "model-00004-of-00006.safetensors",
|
| 1733 |
"model.language_model.layers.49.mlp.gate_proj.qweight": "model-00004-of-00006.safetensors",
|
|
|
|
| 1738 |
"model.language_model.layers.49.mlp.up_proj.qweight": "model-00004-of-00006.safetensors",
|
| 1739 |
"model.language_model.layers.49.mlp.up_proj.scales": "model-00004-of-00006.safetensors",
|
| 1740 |
"model.language_model.layers.49.mlp.up_proj.zeros": "model-00004-of-00006.safetensors",
|
| 1741 |
+
"model.language_model.layers.49.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
|
| 1742 |
"model.language_model.layers.49.self_attn.k_norm.weight": "model-00004-of-00006.safetensors",
|
| 1743 |
"model.language_model.layers.49.self_attn.k_proj.channel_scale": "model-00004-of-00006.safetensors",
|
| 1744 |
"model.language_model.layers.49.self_attn.k_proj.q_perm": "model-00004-of-00006.safetensors",
|
|
|
|
| 1801 |
"model.language_model.layers.5.self_attn.v_proj.scales": "model-00002-of-00006.safetensors",
|
| 1802 |
"model.language_model.layers.5.self_attn.v_proj.zeros": "model-00002-of-00006.safetensors",
|
| 1803 |
"model.language_model.layers.50.input_layernorm.weight": "model-00005-of-00006.safetensors",
|
| 1804 |
+
"model.language_model.layers.50.mlp.down_proj.channel_scale": "model-00004-of-00006.safetensors",
|
| 1805 |
+
"model.language_model.layers.50.mlp.down_proj.q_perm": "model-00004-of-00006.safetensors",
|
| 1806 |
"model.language_model.layers.50.mlp.down_proj.qweight": "model-00005-of-00006.safetensors",
|
| 1807 |
"model.language_model.layers.50.mlp.down_proj.scales": "model-00005-of-00006.safetensors",
|
| 1808 |
"model.language_model.layers.50.mlp.down_proj.zeros": "model-00005-of-00006.safetensors",
|
| 1809 |
+
"model.language_model.layers.50.mlp.gate_proj.channel_scale": "model-00004-of-00006.safetensors",
|
| 1810 |
+
"model.language_model.layers.50.mlp.gate_proj.q_perm": "model-00004-of-00006.safetensors",
|
| 1811 |
+
"model.language_model.layers.50.mlp.gate_proj.qweight": "model-00004-of-00006.safetensors",
|
| 1812 |
+
"model.language_model.layers.50.mlp.gate_proj.scales": "model-00004-of-00006.safetensors",
|
| 1813 |
+
"model.language_model.layers.50.mlp.gate_proj.zeros": "model-00004-of-00006.safetensors",
|
| 1814 |
+
"model.language_model.layers.50.mlp.up_proj.channel_scale": "model-00004-of-00006.safetensors",
|
| 1815 |
+
"model.language_model.layers.50.mlp.up_proj.q_perm": "model-00004-of-00006.safetensors",
|
| 1816 |
+
"model.language_model.layers.50.mlp.up_proj.qweight": "model-00004-of-00006.safetensors",
|
| 1817 |
+
"model.language_model.layers.50.mlp.up_proj.scales": "model-00004-of-00006.safetensors",
|
| 1818 |
+
"model.language_model.layers.50.mlp.up_proj.zeros": "model-00004-of-00006.safetensors",
|
| 1819 |
"model.language_model.layers.50.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
|
| 1820 |
+
"model.language_model.layers.50.self_attn.k_norm.weight": "model-00004-of-00006.safetensors",
|
| 1821 |
+
"model.language_model.layers.50.self_attn.k_proj.channel_scale": "model-00004-of-00006.safetensors",
|
| 1822 |
+
"model.language_model.layers.50.self_attn.k_proj.q_perm": "model-00004-of-00006.safetensors",
|
| 1823 |
+
"model.language_model.layers.50.self_attn.k_proj.qweight": "model-00004-of-00006.safetensors",
|
| 1824 |
+
"model.language_model.layers.50.self_attn.k_proj.scales": "model-00004-of-00006.safetensors",
|
| 1825 |
+
"model.language_model.layers.50.self_attn.k_proj.zeros": "model-00004-of-00006.safetensors",
|
| 1826 |
+
"model.language_model.layers.50.self_attn.o_proj.channel_scale": "model-00004-of-00006.safetensors",
|
| 1827 |
+
"model.language_model.layers.50.self_attn.o_proj.q_perm": "model-00004-of-00006.safetensors",
|
| 1828 |
+
"model.language_model.layers.50.self_attn.o_proj.qweight": "model-00004-of-00006.safetensors",
|
| 1829 |
+
"model.language_model.layers.50.self_attn.o_proj.scales": "model-00004-of-00006.safetensors",
|
| 1830 |
+
"model.language_model.layers.50.self_attn.o_proj.zeros": "model-00004-of-00006.safetensors",
|
| 1831 |
+
"model.language_model.layers.50.self_attn.q_norm.weight": "model-00004-of-00006.safetensors",
|
| 1832 |
+
"model.language_model.layers.50.self_attn.q_proj.channel_scale": "model-00004-of-00006.safetensors",
|
| 1833 |
+
"model.language_model.layers.50.self_attn.q_proj.q_perm": "model-00004-of-00006.safetensors",
|
| 1834 |
+
"model.language_model.layers.50.self_attn.q_proj.qweight": "model-00004-of-00006.safetensors",
|
| 1835 |
+
"model.language_model.layers.50.self_attn.q_proj.scales": "model-00004-of-00006.safetensors",
|
| 1836 |
+
"model.language_model.layers.50.self_attn.q_proj.zeros": "model-00004-of-00006.safetensors",
|
| 1837 |
+
"model.language_model.layers.50.self_attn.v_proj.channel_scale": "model-00004-of-00006.safetensors",
|
| 1838 |
+
"model.language_model.layers.50.self_attn.v_proj.q_perm": "model-00004-of-00006.safetensors",
|
| 1839 |
+
"model.language_model.layers.50.self_attn.v_proj.qweight": "model-00004-of-00006.safetensors",
|
| 1840 |
+
"model.language_model.layers.50.self_attn.v_proj.scales": "model-00004-of-00006.safetensors",
|
| 1841 |
+
"model.language_model.layers.50.self_attn.v_proj.zeros": "model-00004-of-00006.safetensors",
|
| 1842 |
"model.language_model.layers.51.input_layernorm.weight": "model-00005-of-00006.safetensors",
|
| 1843 |
"model.language_model.layers.51.mlp.down_proj.channel_scale": "model-00005-of-00006.safetensors",
|
| 1844 |
"model.language_model.layers.51.mlp.down_proj.q_perm": "model-00005-of-00006.safetensors",
|
|
|
|
| 2190 |
"model.language_model.layers.59.self_attn.v_proj.qweight": "model-00005-of-00006.safetensors",
|
| 2191 |
"model.language_model.layers.59.self_attn.v_proj.scales": "model-00005-of-00006.safetensors",
|
| 2192 |
"model.language_model.layers.59.self_attn.v_proj.zeros": "model-00005-of-00006.safetensors",
|
| 2193 |
+
"model.language_model.layers.6.input_layernorm.weight": "model-00002-of-00006.safetensors",
|
| 2194 |
+
"model.language_model.layers.6.mlp.down_proj.channel_scale": "model-00002-of-00006.safetensors",
|
| 2195 |
+
"model.language_model.layers.6.mlp.down_proj.q_perm": "model-00002-of-00006.safetensors",
|
| 2196 |
+
"model.language_model.layers.6.mlp.down_proj.qweight": "model-00002-of-00006.safetensors",
|
| 2197 |
+
"model.language_model.layers.6.mlp.down_proj.scales": "model-00002-of-00006.safetensors",
|
| 2198 |
+
"model.language_model.layers.6.mlp.down_proj.zeros": "model-00002-of-00006.safetensors",
|
| 2199 |
"model.language_model.layers.6.mlp.gate_proj.channel_scale": "model-00002-of-00006.safetensors",
|
| 2200 |
"model.language_model.layers.6.mlp.gate_proj.q_perm": "model-00002-of-00006.safetensors",
|
| 2201 |
"model.language_model.layers.6.mlp.gate_proj.qweight": "model-00002-of-00006.safetensors",
|
|
|
|
| 2204 |
"model.language_model.layers.6.mlp.up_proj.channel_scale": "model-00002-of-00006.safetensors",
|
| 2205 |
"model.language_model.layers.6.mlp.up_proj.q_perm": "model-00002-of-00006.safetensors",
|
| 2206 |
"model.language_model.layers.6.mlp.up_proj.qweight": "model-00002-of-00006.safetensors",
|
| 2207 |
+
"model.language_model.layers.6.mlp.up_proj.scales": "model-00002-of-00006.safetensors",
|
| 2208 |
+
"model.language_model.layers.6.mlp.up_proj.zeros": "model-00002-of-00006.safetensors",
|
| 2209 |
+
"model.language_model.layers.6.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
|
| 2210 |
"model.language_model.layers.6.self_attn.k_norm.weight": "model-00002-of-00006.safetensors",
|
| 2211 |
"model.language_model.layers.6.self_attn.k_proj.channel_scale": "model-00002-of-00006.safetensors",
|
| 2212 |
"model.language_model.layers.6.self_attn.k_proj.q_perm": "model-00002-of-00006.safetensors",
|
|
|
|
| 2391 |
"model.language_model.layers.7.mlp.down_proj.qweight": "model-00003-of-00006.safetensors",
|
| 2392 |
"model.language_model.layers.7.mlp.down_proj.scales": "model-00003-of-00006.safetensors",
|
| 2393 |
"model.language_model.layers.7.mlp.down_proj.zeros": "model-00003-of-00006.safetensors",
|
| 2394 |
+
"model.language_model.layers.7.mlp.gate_proj.channel_scale": "model-00002-of-00006.safetensors",
|
| 2395 |
+
"model.language_model.layers.7.mlp.gate_proj.q_perm": "model-00002-of-00006.safetensors",
|
| 2396 |
"model.language_model.layers.7.mlp.gate_proj.qweight": "model-00003-of-00006.safetensors",
|
| 2397 |
"model.language_model.layers.7.mlp.gate_proj.scales": "model-00003-of-00006.safetensors",
|
| 2398 |
"model.language_model.layers.7.mlp.gate_proj.zeros": "model-00003-of-00006.safetensors",
|
|
|
|
| 2402 |
"model.language_model.layers.7.mlp.up_proj.scales": "model-00003-of-00006.safetensors",
|
| 2403 |
"model.language_model.layers.7.mlp.up_proj.zeros": "model-00003-of-00006.safetensors",
|
| 2404 |
"model.language_model.layers.7.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
|
| 2405 |
+
"model.language_model.layers.7.self_attn.k_norm.weight": "model-00002-of-00006.safetensors",
|
| 2406 |
+
"model.language_model.layers.7.self_attn.k_proj.channel_scale": "model-00002-of-00006.safetensors",
|
| 2407 |
+
"model.language_model.layers.7.self_attn.k_proj.q_perm": "model-00002-of-00006.safetensors",
|
| 2408 |
+
"model.language_model.layers.7.self_attn.k_proj.qweight": "model-00002-of-00006.safetensors",
|
| 2409 |
+
"model.language_model.layers.7.self_attn.k_proj.scales": "model-00002-of-00006.safetensors",
|
| 2410 |
+
"model.language_model.layers.7.self_attn.k_proj.zeros": "model-00002-of-00006.safetensors",
|
| 2411 |
+
"model.language_model.layers.7.self_attn.o_proj.channel_scale": "model-00002-of-00006.safetensors",
|
| 2412 |
+
"model.language_model.layers.7.self_attn.o_proj.q_perm": "model-00002-of-00006.safetensors",
|
| 2413 |
+
"model.language_model.layers.7.self_attn.o_proj.qweight": "model-00002-of-00006.safetensors",
|
| 2414 |
+
"model.language_model.layers.7.self_attn.o_proj.scales": "model-00002-of-00006.safetensors",
|
| 2415 |
+
"model.language_model.layers.7.self_attn.o_proj.zeros": "model-00002-of-00006.safetensors",
|
| 2416 |
+
"model.language_model.layers.7.self_attn.q_norm.weight": "model-00002-of-00006.safetensors",
|
| 2417 |
+
"model.language_model.layers.7.self_attn.q_proj.channel_scale": "model-00002-of-00006.safetensors",
|
| 2418 |
+
"model.language_model.layers.7.self_attn.q_proj.q_perm": "model-00002-of-00006.safetensors",
|
| 2419 |
+
"model.language_model.layers.7.self_attn.q_proj.qweight": "model-00002-of-00006.safetensors",
|
| 2420 |
+
"model.language_model.layers.7.self_attn.q_proj.scales": "model-00002-of-00006.safetensors",
|
| 2421 |
+
"model.language_model.layers.7.self_attn.q_proj.zeros": "model-00002-of-00006.safetensors",
|
| 2422 |
+
"model.language_model.layers.7.self_attn.v_proj.channel_scale": "model-00002-of-00006.safetensors",
|
| 2423 |
+
"model.language_model.layers.7.self_attn.v_proj.q_perm": "model-00002-of-00006.safetensors",
|
| 2424 |
+
"model.language_model.layers.7.self_attn.v_proj.qweight": "model-00002-of-00006.safetensors",
|
| 2425 |
+
"model.language_model.layers.7.self_attn.v_proj.scales": "model-00002-of-00006.safetensors",
|
| 2426 |
+
"model.language_model.layers.7.self_attn.v_proj.zeros": "model-00002-of-00006.safetensors",
|
| 2427 |
"model.language_model.layers.8.input_layernorm.weight": "model-00003-of-00006.safetensors",
|
| 2428 |
"model.language_model.layers.8.mlp.down_proj.channel_scale": "model-00003-of-00006.safetensors",
|
| 2429 |
"model.language_model.layers.8.mlp.down_proj.q_perm": "model-00003-of-00006.safetensors",
|
quant_strategy.json
CHANGED
|
@@ -1,11 +1,11 @@
|
|
| 1 |
{
|
| 2 |
"measurement": {
|
| 3 |
"model.layers.0": {
|
| 4 |
-
"accuracy": 0.
|
| 5 |
-
"total_bits":
|
| 6 |
"o_proj": {
|
| 7 |
"group_size": {
|
| 8 |
-
"4":
|
| 9 |
},
|
| 10 |
"bits": [
|
| 11 |
4
|
|
@@ -18,7 +18,7 @@
|
|
| 18 |
},
|
| 19 |
"down_proj": {
|
| 20 |
"group_size": {
|
| 21 |
-
"4":
|
| 22 |
},
|
| 23 |
"bits": [
|
| 24 |
4
|
|
@@ -31,7 +31,7 @@
|
|
| 31 |
},
|
| 32 |
"q_proj": {
|
| 33 |
"group_size": {
|
| 34 |
-
"4":
|
| 35 |
},
|
| 36 |
"bits": [
|
| 37 |
4
|
|
@@ -44,7 +44,7 @@
|
|
| 44 |
},
|
| 45 |
"k_proj": {
|
| 46 |
"group_size": {
|
| 47 |
-
"4":
|
| 48 |
},
|
| 49 |
"bits": [
|
| 50 |
4
|
|
@@ -57,7 +57,7 @@
|
|
| 57 |
},
|
| 58 |
"v_proj": {
|
| 59 |
"group_size": {
|
| 60 |
-
"4":
|
| 61 |
},
|
| 62 |
"bits": [
|
| 63 |
4
|
|
@@ -70,7 +70,7 @@
|
|
| 70 |
},
|
| 71 |
"gate_proj": {
|
| 72 |
"group_size": {
|
| 73 |
-
"4":
|
| 74 |
},
|
| 75 |
"bits": [
|
| 76 |
4
|
|
@@ -83,7 +83,7 @@
|
|
| 83 |
},
|
| 84 |
"up_proj": {
|
| 85 |
"group_size": {
|
| 86 |
-
"4":
|
| 87 |
},
|
| 88 |
"bits": [
|
| 89 |
4
|
|
@@ -96,11 +96,11 @@
|
|
| 96 |
}
|
| 97 |
},
|
| 98 |
"model.layers.1": {
|
| 99 |
-
"accuracy": 0.
|
| 100 |
-
"total_bits":
|
| 101 |
"o_proj": {
|
| 102 |
"group_size": {
|
| 103 |
-
"4":
|
| 104 |
},
|
| 105 |
"bits": [
|
| 106 |
4
|
|
@@ -113,7 +113,7 @@
|
|
| 113 |
},
|
| 114 |
"down_proj": {
|
| 115 |
"group_size": {
|
| 116 |
-
"4":
|
| 117 |
},
|
| 118 |
"bits": [
|
| 119 |
4
|
|
@@ -126,7 +126,7 @@
|
|
| 126 |
},
|
| 127 |
"q_proj": {
|
| 128 |
"group_size": {
|
| 129 |
-
"4":
|
| 130 |
},
|
| 131 |
"bits": [
|
| 132 |
4
|
|
@@ -139,7 +139,7 @@
|
|
| 139 |
},
|
| 140 |
"k_proj": {
|
| 141 |
"group_size": {
|
| 142 |
-
"4":
|
| 143 |
},
|
| 144 |
"bits": [
|
| 145 |
4
|
|
@@ -152,7 +152,7 @@
|
|
| 152 |
},
|
| 153 |
"v_proj": {
|
| 154 |
"group_size": {
|
| 155 |
-
"4":
|
| 156 |
},
|
| 157 |
"bits": [
|
| 158 |
4
|
|
@@ -165,7 +165,7 @@
|
|
| 165 |
},
|
| 166 |
"gate_proj": {
|
| 167 |
"group_size": {
|
| 168 |
-
"4":
|
| 169 |
},
|
| 170 |
"bits": [
|
| 171 |
4
|
|
@@ -178,7 +178,7 @@
|
|
| 178 |
},
|
| 179 |
"up_proj": {
|
| 180 |
"group_size": {
|
| 181 |
-
"4":
|
| 182 |
},
|
| 183 |
"bits": [
|
| 184 |
4
|
|
@@ -286,8 +286,8 @@
|
|
| 286 |
}
|
| 287 |
},
|
| 288 |
"model.layers.3": {
|
| 289 |
-
"accuracy": 0.
|
| 290 |
-
"total_bits":
|
| 291 |
"o_proj": {
|
| 292 |
"group_size": {
|
| 293 |
"4": 128
|
|
@@ -316,10 +316,10 @@
|
|
| 316 |
},
|
| 317 |
"q_proj": {
|
| 318 |
"group_size": {
|
| 319 |
-
"
|
| 320 |
},
|
| 321 |
"bits": [
|
| 322 |
-
|
| 323 |
],
|
| 324 |
"bits_prop": [
|
| 325 |
1
|
|
@@ -476,11 +476,11 @@
|
|
| 476 |
}
|
| 477 |
},
|
| 478 |
"model.layers.5": {
|
| 479 |
-
"accuracy": 0.
|
| 480 |
-
"total_bits":
|
| 481 |
"o_proj": {
|
| 482 |
"group_size": {
|
| 483 |
-
"4":
|
| 484 |
},
|
| 485 |
"bits": [
|
| 486 |
4
|
|
@@ -493,7 +493,7 @@
|
|
| 493 |
},
|
| 494 |
"down_proj": {
|
| 495 |
"group_size": {
|
| 496 |
-
"4":
|
| 497 |
},
|
| 498 |
"bits": [
|
| 499 |
4
|
|
@@ -506,7 +506,7 @@
|
|
| 506 |
},
|
| 507 |
"q_proj": {
|
| 508 |
"group_size": {
|
| 509 |
-
"4":
|
| 510 |
},
|
| 511 |
"bits": [
|
| 512 |
4
|
|
@@ -519,7 +519,7 @@
|
|
| 519 |
},
|
| 520 |
"k_proj": {
|
| 521 |
"group_size": {
|
| 522 |
-
"4":
|
| 523 |
},
|
| 524 |
"bits": [
|
| 525 |
4
|
|
@@ -532,7 +532,7 @@
|
|
| 532 |
},
|
| 533 |
"v_proj": {
|
| 534 |
"group_size": {
|
| 535 |
-
"4":
|
| 536 |
},
|
| 537 |
"bits": [
|
| 538 |
4
|
|
@@ -545,7 +545,7 @@
|
|
| 545 |
},
|
| 546 |
"gate_proj": {
|
| 547 |
"group_size": {
|
| 548 |
-
"4":
|
| 549 |
},
|
| 550 |
"bits": [
|
| 551 |
4
|
|
@@ -558,7 +558,7 @@
|
|
| 558 |
},
|
| 559 |
"up_proj": {
|
| 560 |
"group_size": {
|
| 561 |
-
"4":
|
| 562 |
},
|
| 563 |
"bits": [
|
| 564 |
4
|
|
@@ -856,14 +856,14 @@
|
|
| 856 |
}
|
| 857 |
},
|
| 858 |
"model.layers.9": {
|
| 859 |
-
"accuracy": 0.
|
| 860 |
-
"total_bits":
|
| 861 |
"o_proj": {
|
| 862 |
"group_size": {
|
| 863 |
-
"
|
| 864 |
},
|
| 865 |
"bits": [
|
| 866 |
-
|
| 867 |
],
|
| 868 |
"bits_prop": [
|
| 869 |
1
|
|
@@ -951,8 +951,8 @@
|
|
| 951 |
}
|
| 952 |
},
|
| 953 |
"model.layers.10": {
|
| 954 |
-
"accuracy": 0.
|
| 955 |
-
"total_bits":
|
| 956 |
"o_proj": {
|
| 957 |
"group_size": {
|
| 958 |
"4": 128
|
|
@@ -968,10 +968,10 @@
|
|
| 968 |
},
|
| 969 |
"down_proj": {
|
| 970 |
"group_size": {
|
| 971 |
-
"
|
| 972 |
},
|
| 973 |
"bits": [
|
| 974 |
-
|
| 975 |
],
|
| 976 |
"bits_prop": [
|
| 977 |
1
|
|
@@ -1046,14 +1046,14 @@
|
|
| 1046 |
}
|
| 1047 |
},
|
| 1048 |
"model.layers.11": {
|
| 1049 |
-
"accuracy": 0.
|
| 1050 |
-
"total_bits":
|
| 1051 |
"o_proj": {
|
| 1052 |
"group_size": {
|
| 1053 |
-
"
|
| 1054 |
},
|
| 1055 |
"bits": [
|
| 1056 |
-
|
| 1057 |
],
|
| 1058 |
"bits_prop": [
|
| 1059 |
1
|
|
@@ -1089,10 +1089,10 @@
|
|
| 1089 |
},
|
| 1090 |
"k_proj": {
|
| 1091 |
"group_size": {
|
| 1092 |
-
"
|
| 1093 |
},
|
| 1094 |
"bits": [
|
| 1095 |
-
|
| 1096 |
],
|
| 1097 |
"bits_prop": [
|
| 1098 |
1
|
|
@@ -1141,14 +1141,14 @@
|
|
| 1141 |
}
|
| 1142 |
},
|
| 1143 |
"model.layers.12": {
|
| 1144 |
-
"accuracy": 0.
|
| 1145 |
-
"total_bits":
|
| 1146 |
"o_proj": {
|
| 1147 |
"group_size": {
|
| 1148 |
-
"
|
| 1149 |
},
|
| 1150 |
"bits": [
|
| 1151 |
-
|
| 1152 |
],
|
| 1153 |
"bits_prop": [
|
| 1154 |
1
|
|
@@ -1236,14 +1236,14 @@
|
|
| 1236 |
}
|
| 1237 |
},
|
| 1238 |
"model.layers.13": {
|
| 1239 |
-
"accuracy": 0.
|
| 1240 |
-
"total_bits":
|
| 1241 |
"o_proj": {
|
| 1242 |
"group_size": {
|
| 1243 |
-
"
|
| 1244 |
},
|
| 1245 |
"bits": [
|
| 1246 |
-
|
| 1247 |
],
|
| 1248 |
"bits_prop": [
|
| 1249 |
1
|
|
@@ -1279,10 +1279,10 @@
|
|
| 1279 |
},
|
| 1280 |
"k_proj": {
|
| 1281 |
"group_size": {
|
| 1282 |
-
"
|
| 1283 |
},
|
| 1284 |
"bits": [
|
| 1285 |
-
|
| 1286 |
],
|
| 1287 |
"bits_prop": [
|
| 1288 |
1
|
|
@@ -1331,14 +1331,14 @@
|
|
| 1331 |
}
|
| 1332 |
},
|
| 1333 |
"model.layers.14": {
|
| 1334 |
-
"accuracy": 0.
|
| 1335 |
-
"total_bits":
|
| 1336 |
"o_proj": {
|
| 1337 |
"group_size": {
|
| 1338 |
-
"
|
| 1339 |
},
|
| 1340 |
"bits": [
|
| 1341 |
-
|
| 1342 |
],
|
| 1343 |
"bits_prop": [
|
| 1344 |
1
|
|
@@ -1374,10 +1374,10 @@
|
|
| 1374 |
},
|
| 1375 |
"k_proj": {
|
| 1376 |
"group_size": {
|
| 1377 |
-
"
|
| 1378 |
},
|
| 1379 |
"bits": [
|
| 1380 |
-
|
| 1381 |
],
|
| 1382 |
"bits_prop": [
|
| 1383 |
1
|
|
@@ -1426,14 +1426,14 @@
|
|
| 1426 |
}
|
| 1427 |
},
|
| 1428 |
"model.layers.15": {
|
| 1429 |
-
"accuracy": 0.
|
| 1430 |
-
"total_bits":
|
| 1431 |
"o_proj": {
|
| 1432 |
"group_size": {
|
| 1433 |
-
"
|
| 1434 |
},
|
| 1435 |
"bits": [
|
| 1436 |
-
|
| 1437 |
],
|
| 1438 |
"bits_prop": [
|
| 1439 |
1
|
|
@@ -1469,10 +1469,10 @@
|
|
| 1469 |
},
|
| 1470 |
"k_proj": {
|
| 1471 |
"group_size": {
|
| 1472 |
-
"
|
| 1473 |
},
|
| 1474 |
"bits": [
|
| 1475 |
-
|
| 1476 |
],
|
| 1477 |
"bits_prop": [
|
| 1478 |
1
|
|
@@ -1521,8 +1521,8 @@
|
|
| 1521 |
}
|
| 1522 |
},
|
| 1523 |
"model.layers.16": {
|
| 1524 |
-
"accuracy": 0.
|
| 1525 |
-
"total_bits":
|
| 1526 |
"o_proj": {
|
| 1527 |
"group_size": {
|
| 1528 |
"4": 128
|
|
@@ -1538,10 +1538,10 @@
|
|
| 1538 |
},
|
| 1539 |
"down_proj": {
|
| 1540 |
"group_size": {
|
| 1541 |
-
"
|
| 1542 |
},
|
| 1543 |
"bits": [
|
| 1544 |
-
|
| 1545 |
],
|
| 1546 |
"bits_prop": [
|
| 1547 |
1
|
|
@@ -1564,10 +1564,10 @@
|
|
| 1564 |
},
|
| 1565 |
"k_proj": {
|
| 1566 |
"group_size": {
|
| 1567 |
-
"
|
| 1568 |
},
|
| 1569 |
"bits": [
|
| 1570 |
-
|
| 1571 |
],
|
| 1572 |
"bits_prop": [
|
| 1573 |
1
|
|
@@ -1616,8 +1616,8 @@
|
|
| 1616 |
}
|
| 1617 |
},
|
| 1618 |
"model.layers.17": {
|
| 1619 |
-
"accuracy": 0.
|
| 1620 |
-
"total_bits":
|
| 1621 |
"o_proj": {
|
| 1622 |
"group_size": {
|
| 1623 |
"4": 128
|
|
@@ -1633,10 +1633,10 @@
|
|
| 1633 |
},
|
| 1634 |
"down_proj": {
|
| 1635 |
"group_size": {
|
| 1636 |
-
"
|
| 1637 |
},
|
| 1638 |
"bits": [
|
| 1639 |
-
|
| 1640 |
],
|
| 1641 |
"bits_prop": [
|
| 1642 |
1
|
|
@@ -1659,10 +1659,10 @@
|
|
| 1659 |
},
|
| 1660 |
"k_proj": {
|
| 1661 |
"group_size": {
|
| 1662 |
-
"
|
| 1663 |
},
|
| 1664 |
"bits": [
|
| 1665 |
-
|
| 1666 |
],
|
| 1667 |
"bits_prop": [
|
| 1668 |
1
|
|
@@ -1711,14 +1711,14 @@
|
|
| 1711 |
}
|
| 1712 |
},
|
| 1713 |
"model.layers.18": {
|
| 1714 |
-
"accuracy": 0.
|
| 1715 |
-
"total_bits":
|
| 1716 |
"o_proj": {
|
| 1717 |
"group_size": {
|
| 1718 |
-
"
|
| 1719 |
},
|
| 1720 |
"bits": [
|
| 1721 |
-
|
| 1722 |
],
|
| 1723 |
"bits_prop": [
|
| 1724 |
1
|
|
@@ -1754,10 +1754,10 @@
|
|
| 1754 |
},
|
| 1755 |
"k_proj": {
|
| 1756 |
"group_size": {
|
| 1757 |
-
"
|
| 1758 |
},
|
| 1759 |
"bits": [
|
| 1760 |
-
|
| 1761 |
],
|
| 1762 |
"bits_prop": [
|
| 1763 |
1
|
|
@@ -1901,14 +1901,14 @@
|
|
| 1901 |
}
|
| 1902 |
},
|
| 1903 |
"model.layers.20": {
|
| 1904 |
-
"accuracy": 0.
|
| 1905 |
-
"total_bits":
|
| 1906 |
"o_proj": {
|
| 1907 |
"group_size": {
|
| 1908 |
-
"
|
| 1909 |
},
|
| 1910 |
"bits": [
|
| 1911 |
-
|
| 1912 |
],
|
| 1913 |
"bits_prop": [
|
| 1914 |
1
|
|
@@ -1944,10 +1944,10 @@
|
|
| 1944 |
},
|
| 1945 |
"k_proj": {
|
| 1946 |
"group_size": {
|
| 1947 |
-
"
|
| 1948 |
},
|
| 1949 |
"bits": [
|
| 1950 |
-
|
| 1951 |
],
|
| 1952 |
"bits_prop": [
|
| 1953 |
1
|
|
@@ -2091,8 +2091,8 @@
|
|
| 2091 |
}
|
| 2092 |
},
|
| 2093 |
"model.layers.22": {
|
| 2094 |
-
"accuracy": 0.
|
| 2095 |
-
"total_bits":
|
| 2096 |
"o_proj": {
|
| 2097 |
"group_size": {
|
| 2098 |
"4": 128
|
|
@@ -2134,10 +2134,10 @@
|
|
| 2134 |
},
|
| 2135 |
"k_proj": {
|
| 2136 |
"group_size": {
|
| 2137 |
-
"
|
| 2138 |
},
|
| 2139 |
"bits": [
|
| 2140 |
-
|
| 2141 |
],
|
| 2142 |
"bits_prop": [
|
| 2143 |
1
|
|
@@ -2186,8 +2186,8 @@
|
|
| 2186 |
}
|
| 2187 |
},
|
| 2188 |
"model.layers.23": {
|
| 2189 |
-
"accuracy": 0.
|
| 2190 |
-
"total_bits":
|
| 2191 |
"o_proj": {
|
| 2192 |
"group_size": {
|
| 2193 |
"4": 128
|
|
@@ -2216,10 +2216,10 @@
|
|
| 2216 |
},
|
| 2217 |
"q_proj": {
|
| 2218 |
"group_size": {
|
| 2219 |
-
"
|
| 2220 |
},
|
| 2221 |
"bits": [
|
| 2222 |
-
|
| 2223 |
],
|
| 2224 |
"bits_prop": [
|
| 2225 |
1
|
|
@@ -2281,8 +2281,8 @@
|
|
| 2281 |
}
|
| 2282 |
},
|
| 2283 |
"model.layers.24": {
|
| 2284 |
-
"accuracy": 0.
|
| 2285 |
-
"total_bits":
|
| 2286 |
"o_proj": {
|
| 2287 |
"group_size": {
|
| 2288 |
"4": 128
|
|
@@ -2350,10 +2350,10 @@
|
|
| 2350 |
},
|
| 2351 |
"gate_proj": {
|
| 2352 |
"group_size": {
|
| 2353 |
-
"
|
| 2354 |
},
|
| 2355 |
"bits": [
|
| 2356 |
-
|
| 2357 |
],
|
| 2358 |
"bits_prop": [
|
| 2359 |
1
|
|
@@ -2363,10 +2363,10 @@
|
|
| 2363 |
},
|
| 2364 |
"up_proj": {
|
| 2365 |
"group_size": {
|
| 2366 |
-
"
|
| 2367 |
},
|
| 2368 |
"bits": [
|
| 2369 |
-
|
| 2370 |
],
|
| 2371 |
"bits_prop": [
|
| 2372 |
1
|
|
@@ -2566,8 +2566,8 @@
|
|
| 2566 |
}
|
| 2567 |
},
|
| 2568 |
"model.layers.27": {
|
| 2569 |
-
"accuracy": 0.
|
| 2570 |
-
"total_bits":
|
| 2571 |
"o_proj": {
|
| 2572 |
"group_size": {
|
| 2573 |
"4": 128
|
|
@@ -2596,7 +2596,7 @@
|
|
| 2596 |
},
|
| 2597 |
"q_proj": {
|
| 2598 |
"group_size": {
|
| 2599 |
-
"2":
|
| 2600 |
},
|
| 2601 |
"bits": [
|
| 2602 |
2
|
|
@@ -2609,10 +2609,10 @@
|
|
| 2609 |
},
|
| 2610 |
"k_proj": {
|
| 2611 |
"group_size": {
|
| 2612 |
-
"
|
| 2613 |
},
|
| 2614 |
"bits": [
|
| 2615 |
-
|
| 2616 |
],
|
| 2617 |
"bits_prop": [
|
| 2618 |
1
|
|
@@ -2635,10 +2635,10 @@
|
|
| 2635 |
},
|
| 2636 |
"gate_proj": {
|
| 2637 |
"group_size": {
|
| 2638 |
-
"
|
| 2639 |
},
|
| 2640 |
"bits": [
|
| 2641 |
-
|
| 2642 |
],
|
| 2643 |
"bits_prop": [
|
| 2644 |
1
|
|
@@ -2661,8 +2661,8 @@
|
|
| 2661 |
}
|
| 2662 |
},
|
| 2663 |
"model.layers.28": {
|
| 2664 |
-
"accuracy": 0.
|
| 2665 |
-
"total_bits":
|
| 2666 |
"o_proj": {
|
| 2667 |
"group_size": {
|
| 2668 |
"4": 128
|
|
@@ -2691,7 +2691,7 @@
|
|
| 2691 |
},
|
| 2692 |
"q_proj": {
|
| 2693 |
"group_size": {
|
| 2694 |
-
"2":
|
| 2695 |
},
|
| 2696 |
"bits": [
|
| 2697 |
2
|
|
@@ -2704,10 +2704,10 @@
|
|
| 2704 |
},
|
| 2705 |
"k_proj": {
|
| 2706 |
"group_size": {
|
| 2707 |
-
"
|
| 2708 |
},
|
| 2709 |
"bits": [
|
| 2710 |
-
|
| 2711 |
],
|
| 2712 |
"bits_prop": [
|
| 2713 |
1
|
|
@@ -2730,10 +2730,10 @@
|
|
| 2730 |
},
|
| 2731 |
"gate_proj": {
|
| 2732 |
"group_size": {
|
| 2733 |
-
"
|
| 2734 |
},
|
| 2735 |
"bits": [
|
| 2736 |
-
|
| 2737 |
],
|
| 2738 |
"bits_prop": [
|
| 2739 |
1
|
|
@@ -2756,8 +2756,8 @@
|
|
| 2756 |
}
|
| 2757 |
},
|
| 2758 |
"model.layers.29": {
|
| 2759 |
-
"accuracy": 0.
|
| 2760 |
-
"total_bits":
|
| 2761 |
"o_proj": {
|
| 2762 |
"group_size": {
|
| 2763 |
"4": 128
|
|
@@ -2786,7 +2786,7 @@
|
|
| 2786 |
},
|
| 2787 |
"q_proj": {
|
| 2788 |
"group_size": {
|
| 2789 |
-
"2":
|
| 2790 |
},
|
| 2791 |
"bits": [
|
| 2792 |
2
|
|
@@ -2799,10 +2799,10 @@
|
|
| 2799 |
},
|
| 2800 |
"k_proj": {
|
| 2801 |
"group_size": {
|
| 2802 |
-
"
|
| 2803 |
},
|
| 2804 |
"bits": [
|
| 2805 |
-
|
| 2806 |
],
|
| 2807 |
"bits_prop": [
|
| 2808 |
1
|
|
@@ -2825,10 +2825,10 @@
|
|
| 2825 |
},
|
| 2826 |
"gate_proj": {
|
| 2827 |
"group_size": {
|
| 2828 |
-
"
|
| 2829 |
},
|
| 2830 |
"bits": [
|
| 2831 |
-
|
| 2832 |
],
|
| 2833 |
"bits_prop": [
|
| 2834 |
1
|
|
@@ -3421,8 +3421,8 @@
|
|
| 3421 |
}
|
| 3422 |
},
|
| 3423 |
"model.layers.36": {
|
| 3424 |
-
"accuracy": 0.
|
| 3425 |
-
"total_bits":
|
| 3426 |
"o_proj": {
|
| 3427 |
"group_size": {
|
| 3428 |
"4": 128
|
|
@@ -3451,7 +3451,7 @@
|
|
| 3451 |
},
|
| 3452 |
"q_proj": {
|
| 3453 |
"group_size": {
|
| 3454 |
-
"2":
|
| 3455 |
},
|
| 3456 |
"bits": [
|
| 3457 |
2
|
|
@@ -3464,10 +3464,10 @@
|
|
| 3464 |
},
|
| 3465 |
"k_proj": {
|
| 3466 |
"group_size": {
|
| 3467 |
-
"
|
| 3468 |
},
|
| 3469 |
"bits": [
|
| 3470 |
-
|
| 3471 |
],
|
| 3472 |
"bits_prop": [
|
| 3473 |
1
|
|
@@ -3490,10 +3490,10 @@
|
|
| 3490 |
},
|
| 3491 |
"gate_proj": {
|
| 3492 |
"group_size": {
|
| 3493 |
-
"
|
| 3494 |
},
|
| 3495 |
"bits": [
|
| 3496 |
-
|
| 3497 |
],
|
| 3498 |
"bits_prop": [
|
| 3499 |
1
|
|
@@ -3516,8 +3516,8 @@
|
|
| 3516 |
}
|
| 3517 |
},
|
| 3518 |
"model.layers.37": {
|
| 3519 |
-
"accuracy": 0.
|
| 3520 |
-
"total_bits":
|
| 3521 |
"o_proj": {
|
| 3522 |
"group_size": {
|
| 3523 |
"4": 128
|
|
@@ -3546,7 +3546,7 @@
|
|
| 3546 |
},
|
| 3547 |
"q_proj": {
|
| 3548 |
"group_size": {
|
| 3549 |
-
"2":
|
| 3550 |
},
|
| 3551 |
"bits": [
|
| 3552 |
2
|
|
@@ -3559,10 +3559,10 @@
|
|
| 3559 |
},
|
| 3560 |
"k_proj": {
|
| 3561 |
"group_size": {
|
| 3562 |
-
"
|
| 3563 |
},
|
| 3564 |
"bits": [
|
| 3565 |
-
|
| 3566 |
],
|
| 3567 |
"bits_prop": [
|
| 3568 |
1
|
|
@@ -3585,10 +3585,10 @@
|
|
| 3585 |
},
|
| 3586 |
"gate_proj": {
|
| 3587 |
"group_size": {
|
| 3588 |
-
"
|
| 3589 |
},
|
| 3590 |
"bits": [
|
| 3591 |
-
|
| 3592 |
],
|
| 3593 |
"bits_prop": [
|
| 3594 |
1
|
|
@@ -3611,8 +3611,8 @@
|
|
| 3611 |
}
|
| 3612 |
},
|
| 3613 |
"model.layers.38": {
|
| 3614 |
-
"accuracy": 0.
|
| 3615 |
-
"total_bits":
|
| 3616 |
"o_proj": {
|
| 3617 |
"group_size": {
|
| 3618 |
"4": 128
|
|
@@ -3641,7 +3641,7 @@
|
|
| 3641 |
},
|
| 3642 |
"q_proj": {
|
| 3643 |
"group_size": {
|
| 3644 |
-
"2":
|
| 3645 |
},
|
| 3646 |
"bits": [
|
| 3647 |
2
|
|
@@ -3654,10 +3654,10 @@
|
|
| 3654 |
},
|
| 3655 |
"k_proj": {
|
| 3656 |
"group_size": {
|
| 3657 |
-
"
|
| 3658 |
},
|
| 3659 |
"bits": [
|
| 3660 |
-
|
| 3661 |
],
|
| 3662 |
"bits_prop": [
|
| 3663 |
1
|
|
@@ -3680,10 +3680,10 @@
|
|
| 3680 |
},
|
| 3681 |
"gate_proj": {
|
| 3682 |
"group_size": {
|
| 3683 |
-
"
|
| 3684 |
},
|
| 3685 |
"bits": [
|
| 3686 |
-
|
| 3687 |
],
|
| 3688 |
"bits_prop": [
|
| 3689 |
1
|
|
@@ -3706,8 +3706,8 @@
|
|
| 3706 |
}
|
| 3707 |
},
|
| 3708 |
"model.layers.39": {
|
| 3709 |
-
"accuracy": 0.
|
| 3710 |
-
"total_bits":
|
| 3711 |
"o_proj": {
|
| 3712 |
"group_size": {
|
| 3713 |
"4": 128
|
|
@@ -3736,7 +3736,7 @@
|
|
| 3736 |
},
|
| 3737 |
"q_proj": {
|
| 3738 |
"group_size": {
|
| 3739 |
-
"2":
|
| 3740 |
},
|
| 3741 |
"bits": [
|
| 3742 |
2
|
|
@@ -3749,10 +3749,10 @@
|
|
| 3749 |
},
|
| 3750 |
"k_proj": {
|
| 3751 |
"group_size": {
|
| 3752 |
-
"
|
| 3753 |
},
|
| 3754 |
"bits": [
|
| 3755 |
-
|
| 3756 |
],
|
| 3757 |
"bits_prop": [
|
| 3758 |
1
|
|
@@ -3775,10 +3775,10 @@
|
|
| 3775 |
},
|
| 3776 |
"gate_proj": {
|
| 3777 |
"group_size": {
|
| 3778 |
-
"
|
| 3779 |
},
|
| 3780 |
"bits": [
|
| 3781 |
-
|
| 3782 |
],
|
| 3783 |
"bits_prop": [
|
| 3784 |
1
|
|
@@ -4181,8 +4181,8 @@
|
|
| 4181 |
}
|
| 4182 |
},
|
| 4183 |
"model.layers.44": {
|
| 4184 |
-
"accuracy": 0.
|
| 4185 |
-
"total_bits":
|
| 4186 |
"o_proj": {
|
| 4187 |
"group_size": {
|
| 4188 |
"4": 128
|
|
@@ -4211,7 +4211,7 @@
|
|
| 4211 |
},
|
| 4212 |
"q_proj": {
|
| 4213 |
"group_size": {
|
| 4214 |
-
"2":
|
| 4215 |
},
|
| 4216 |
"bits": [
|
| 4217 |
2
|
|
@@ -4224,10 +4224,10 @@
|
|
| 4224 |
},
|
| 4225 |
"k_proj": {
|
| 4226 |
"group_size": {
|
| 4227 |
-
"
|
| 4228 |
},
|
| 4229 |
"bits": [
|
| 4230 |
-
|
| 4231 |
],
|
| 4232 |
"bits_prop": [
|
| 4233 |
1
|
|
@@ -4250,10 +4250,10 @@
|
|
| 4250 |
},
|
| 4251 |
"gate_proj": {
|
| 4252 |
"group_size": {
|
| 4253 |
-
"
|
| 4254 |
},
|
| 4255 |
"bits": [
|
| 4256 |
-
|
| 4257 |
],
|
| 4258 |
"bits_prop": [
|
| 4259 |
1
|
|
@@ -4276,8 +4276,8 @@
|
|
| 4276 |
}
|
| 4277 |
},
|
| 4278 |
"model.layers.45": {
|
| 4279 |
-
"accuracy": 0.
|
| 4280 |
-
"total_bits":
|
| 4281 |
"o_proj": {
|
| 4282 |
"group_size": {
|
| 4283 |
"4": 128
|
|
@@ -4306,7 +4306,7 @@
|
|
| 4306 |
},
|
| 4307 |
"q_proj": {
|
| 4308 |
"group_size": {
|
| 4309 |
-
"2":
|
| 4310 |
},
|
| 4311 |
"bits": [
|
| 4312 |
2
|
|
@@ -4319,10 +4319,10 @@
|
|
| 4319 |
},
|
| 4320 |
"k_proj": {
|
| 4321 |
"group_size": {
|
| 4322 |
-
"
|
| 4323 |
},
|
| 4324 |
"bits": [
|
| 4325 |
-
|
| 4326 |
],
|
| 4327 |
"bits_prop": [
|
| 4328 |
1
|
|
@@ -4345,10 +4345,10 @@
|
|
| 4345 |
},
|
| 4346 |
"gate_proj": {
|
| 4347 |
"group_size": {
|
| 4348 |
-
"
|
| 4349 |
},
|
| 4350 |
"bits": [
|
| 4351 |
-
|
| 4352 |
],
|
| 4353 |
"bits_prop": [
|
| 4354 |
1
|
|
@@ -4371,8 +4371,8 @@
|
|
| 4371 |
}
|
| 4372 |
},
|
| 4373 |
"model.layers.46": {
|
| 4374 |
-
"accuracy": 0.
|
| 4375 |
-
"total_bits":
|
| 4376 |
"o_proj": {
|
| 4377 |
"group_size": {
|
| 4378 |
"4": 128
|
|
@@ -4401,10 +4401,10 @@
|
|
| 4401 |
},
|
| 4402 |
"q_proj": {
|
| 4403 |
"group_size": {
|
| 4404 |
-
"
|
| 4405 |
},
|
| 4406 |
"bits": [
|
| 4407 |
-
|
| 4408 |
],
|
| 4409 |
"bits_prop": [
|
| 4410 |
1
|
|
@@ -4414,10 +4414,10 @@
|
|
| 4414 |
},
|
| 4415 |
"k_proj": {
|
| 4416 |
"group_size": {
|
| 4417 |
-
"
|
| 4418 |
},
|
| 4419 |
"bits": [
|
| 4420 |
-
|
| 4421 |
],
|
| 4422 |
"bits_prop": [
|
| 4423 |
1
|
|
@@ -4440,10 +4440,10 @@
|
|
| 4440 |
},
|
| 4441 |
"gate_proj": {
|
| 4442 |
"group_size": {
|
| 4443 |
-
"
|
| 4444 |
},
|
| 4445 |
"bits": [
|
| 4446 |
-
|
| 4447 |
],
|
| 4448 |
"bits_prop": [
|
| 4449 |
1
|
|
@@ -5036,11 +5036,11 @@
|
|
| 5036 |
}
|
| 5037 |
},
|
| 5038 |
"model.layers.53": {
|
| 5039 |
-
"accuracy": 0.
|
| 5040 |
-
"total_bits":
|
| 5041 |
"o_proj": {
|
| 5042 |
"group_size": {
|
| 5043 |
-
"4":
|
| 5044 |
},
|
| 5045 |
"bits": [
|
| 5046 |
4
|
|
@@ -5053,7 +5053,7 @@
|
|
| 5053 |
},
|
| 5054 |
"down_proj": {
|
| 5055 |
"group_size": {
|
| 5056 |
-
"4":
|
| 5057 |
},
|
| 5058 |
"bits": [
|
| 5059 |
4
|
|
@@ -5066,7 +5066,7 @@
|
|
| 5066 |
},
|
| 5067 |
"q_proj": {
|
| 5068 |
"group_size": {
|
| 5069 |
-
"4":
|
| 5070 |
},
|
| 5071 |
"bits": [
|
| 5072 |
4
|
|
@@ -5079,7 +5079,7 @@
|
|
| 5079 |
},
|
| 5080 |
"k_proj": {
|
| 5081 |
"group_size": {
|
| 5082 |
-
"4":
|
| 5083 |
},
|
| 5084 |
"bits": [
|
| 5085 |
4
|
|
@@ -5092,7 +5092,7 @@
|
|
| 5092 |
},
|
| 5093 |
"v_proj": {
|
| 5094 |
"group_size": {
|
| 5095 |
-
"4":
|
| 5096 |
},
|
| 5097 |
"bits": [
|
| 5098 |
4
|
|
@@ -5105,7 +5105,7 @@
|
|
| 5105 |
},
|
| 5106 |
"gate_proj": {
|
| 5107 |
"group_size": {
|
| 5108 |
-
"4":
|
| 5109 |
},
|
| 5110 |
"bits": [
|
| 5111 |
4
|
|
@@ -5118,7 +5118,7 @@
|
|
| 5118 |
},
|
| 5119 |
"up_proj": {
|
| 5120 |
"group_size": {
|
| 5121 |
-
"4":
|
| 5122 |
},
|
| 5123 |
"bits": [
|
| 5124 |
4
|
|
@@ -5606,14 +5606,14 @@
|
|
| 5606 |
}
|
| 5607 |
},
|
| 5608 |
"model.layers.59": {
|
| 5609 |
-
"accuracy": 0.
|
| 5610 |
-
"total_bits":
|
| 5611 |
"o_proj": {
|
| 5612 |
"group_size": {
|
| 5613 |
-
"
|
| 5614 |
},
|
| 5615 |
"bits": [
|
| 5616 |
-
|
| 5617 |
],
|
| 5618 |
"bits_prop": [
|
| 5619 |
1
|
|
@@ -5623,10 +5623,10 @@
|
|
| 5623 |
},
|
| 5624 |
"down_proj": {
|
| 5625 |
"group_size": {
|
| 5626 |
-
"
|
| 5627 |
},
|
| 5628 |
"bits": [
|
| 5629 |
-
|
| 5630 |
],
|
| 5631 |
"bits_prop": [
|
| 5632 |
1
|
|
@@ -5636,7 +5636,7 @@
|
|
| 5636 |
},
|
| 5637 |
"q_proj": {
|
| 5638 |
"group_size": {
|
| 5639 |
-
"2":
|
| 5640 |
},
|
| 5641 |
"bits": [
|
| 5642 |
2
|
|
@@ -5649,7 +5649,7 @@
|
|
| 5649 |
},
|
| 5650 |
"k_proj": {
|
| 5651 |
"group_size": {
|
| 5652 |
-
"2":
|
| 5653 |
},
|
| 5654 |
"bits": [
|
| 5655 |
2
|
|
@@ -5675,7 +5675,7 @@
|
|
| 5675 |
},
|
| 5676 |
"gate_proj": {
|
| 5677 |
"group_size": {
|
| 5678 |
-
"2":
|
| 5679 |
},
|
| 5680 |
"bits": [
|
| 5681 |
2
|
|
@@ -5688,7 +5688,7 @@
|
|
| 5688 |
},
|
| 5689 |
"up_proj": {
|
| 5690 |
"group_size": {
|
| 5691 |
-
"2":
|
| 5692 |
},
|
| 5693 |
"bits": [
|
| 5694 |
2
|
|
@@ -5701,14 +5701,14 @@
|
|
| 5701 |
}
|
| 5702 |
},
|
| 5703 |
"model.layers.60": {
|
| 5704 |
-
"accuracy": 0.
|
| 5705 |
-
"total_bits":
|
| 5706 |
"o_proj": {
|
| 5707 |
"group_size": {
|
| 5708 |
-
"
|
| 5709 |
},
|
| 5710 |
"bits": [
|
| 5711 |
-
|
| 5712 |
],
|
| 5713 |
"bits_prop": [
|
| 5714 |
1
|
|
@@ -5718,10 +5718,10 @@
|
|
| 5718 |
},
|
| 5719 |
"down_proj": {
|
| 5720 |
"group_size": {
|
| 5721 |
-
"
|
| 5722 |
},
|
| 5723 |
"bits": [
|
| 5724 |
-
|
| 5725 |
],
|
| 5726 |
"bits_prop": [
|
| 5727 |
1
|
|
@@ -5731,7 +5731,7 @@
|
|
| 5731 |
},
|
| 5732 |
"q_proj": {
|
| 5733 |
"group_size": {
|
| 5734 |
-
"2":
|
| 5735 |
},
|
| 5736 |
"bits": [
|
| 5737 |
2
|
|
@@ -5744,10 +5744,10 @@
|
|
| 5744 |
},
|
| 5745 |
"k_proj": {
|
| 5746 |
"group_size": {
|
| 5747 |
-
"
|
| 5748 |
},
|
| 5749 |
"bits": [
|
| 5750 |
-
|
| 5751 |
],
|
| 5752 |
"bits_prop": [
|
| 5753 |
1
|
|
@@ -5770,7 +5770,7 @@
|
|
| 5770 |
},
|
| 5771 |
"gate_proj": {
|
| 5772 |
"group_size": {
|
| 5773 |
-
"2":
|
| 5774 |
},
|
| 5775 |
"bits": [
|
| 5776 |
2
|
|
@@ -5783,7 +5783,7 @@
|
|
| 5783 |
},
|
| 5784 |
"up_proj": {
|
| 5785 |
"group_size": {
|
| 5786 |
-
"2":
|
| 5787 |
},
|
| 5788 |
"bits": [
|
| 5789 |
2
|
|
@@ -5796,14 +5796,14 @@
|
|
| 5796 |
}
|
| 5797 |
},
|
| 5798 |
"model.layers.61": {
|
| 5799 |
-
"accuracy": 0.
|
| 5800 |
-
"total_bits":
|
| 5801 |
"o_proj": {
|
| 5802 |
"group_size": {
|
| 5803 |
-
"
|
| 5804 |
},
|
| 5805 |
"bits": [
|
| 5806 |
-
|
| 5807 |
],
|
| 5808 |
"bits_prop": [
|
| 5809 |
1
|
|
@@ -5813,10 +5813,10 @@
|
|
| 5813 |
},
|
| 5814 |
"down_proj": {
|
| 5815 |
"group_size": {
|
| 5816 |
-
"
|
| 5817 |
},
|
| 5818 |
"bits": [
|
| 5819 |
-
|
| 5820 |
],
|
| 5821 |
"bits_prop": [
|
| 5822 |
1
|
|
@@ -5826,7 +5826,7 @@
|
|
| 5826 |
},
|
| 5827 |
"q_proj": {
|
| 5828 |
"group_size": {
|
| 5829 |
-
"2":
|
| 5830 |
},
|
| 5831 |
"bits": [
|
| 5832 |
2
|
|
@@ -5839,7 +5839,7 @@
|
|
| 5839 |
},
|
| 5840 |
"k_proj": {
|
| 5841 |
"group_size": {
|
| 5842 |
-
"2":
|
| 5843 |
},
|
| 5844 |
"bits": [
|
| 5845 |
2
|
|
@@ -5865,7 +5865,7 @@
|
|
| 5865 |
},
|
| 5866 |
"gate_proj": {
|
| 5867 |
"group_size": {
|
| 5868 |
-
"2":
|
| 5869 |
},
|
| 5870 |
"bits": [
|
| 5871 |
2
|
|
@@ -5878,7 +5878,7 @@
|
|
| 5878 |
},
|
| 5879 |
"up_proj": {
|
| 5880 |
"group_size": {
|
| 5881 |
-
"2":
|
| 5882 |
},
|
| 5883 |
"bits": [
|
| 5884 |
2
|
|
@@ -5891,14 +5891,14 @@
|
|
| 5891 |
}
|
| 5892 |
},
|
| 5893 |
"model.layers.62": {
|
| 5894 |
-
"accuracy": 0.
|
| 5895 |
-
"total_bits":
|
| 5896 |
"o_proj": {
|
| 5897 |
"group_size": {
|
| 5898 |
-
"
|
| 5899 |
},
|
| 5900 |
"bits": [
|
| 5901 |
-
|
| 5902 |
],
|
| 5903 |
"bits_prop": [
|
| 5904 |
1
|
|
@@ -5908,10 +5908,10 @@
|
|
| 5908 |
},
|
| 5909 |
"down_proj": {
|
| 5910 |
"group_size": {
|
| 5911 |
-
"
|
| 5912 |
},
|
| 5913 |
"bits": [
|
| 5914 |
-
|
| 5915 |
],
|
| 5916 |
"bits_prop": [
|
| 5917 |
1
|
|
@@ -5921,7 +5921,7 @@
|
|
| 5921 |
},
|
| 5922 |
"q_proj": {
|
| 5923 |
"group_size": {
|
| 5924 |
-
"2":
|
| 5925 |
},
|
| 5926 |
"bits": [
|
| 5927 |
2
|
|
@@ -5934,7 +5934,7 @@
|
|
| 5934 |
},
|
| 5935 |
"k_proj": {
|
| 5936 |
"group_size": {
|
| 5937 |
-
"2":
|
| 5938 |
},
|
| 5939 |
"bits": [
|
| 5940 |
2
|
|
@@ -5960,7 +5960,7 @@
|
|
| 5960 |
},
|
| 5961 |
"gate_proj": {
|
| 5962 |
"group_size": {
|
| 5963 |
-
"2":
|
| 5964 |
},
|
| 5965 |
"bits": [
|
| 5966 |
2
|
|
@@ -5973,7 +5973,7 @@
|
|
| 5973 |
},
|
| 5974 |
"up_proj": {
|
| 5975 |
"group_size": {
|
| 5976 |
-
"2":
|
| 5977 |
},
|
| 5978 |
"bits": [
|
| 5979 |
2
|
|
@@ -5986,14 +5986,14 @@
|
|
| 5986 |
}
|
| 5987 |
},
|
| 5988 |
"model.layers.63": {
|
| 5989 |
-
"accuracy": 0.
|
| 5990 |
-
"total_bits":
|
| 5991 |
"o_proj": {
|
| 5992 |
"group_size": {
|
| 5993 |
-
"
|
| 5994 |
},
|
| 5995 |
"bits": [
|
| 5996 |
-
|
| 5997 |
],
|
| 5998 |
"bits_prop": [
|
| 5999 |
1
|
|
@@ -6003,7 +6003,7 @@
|
|
| 6003 |
},
|
| 6004 |
"down_proj": {
|
| 6005 |
"group_size": {
|
| 6006 |
-
"4":
|
| 6007 |
},
|
| 6008 |
"bits": [
|
| 6009 |
4
|
|
@@ -6042,10 +6042,10 @@
|
|
| 6042 |
},
|
| 6043 |
"v_proj": {
|
| 6044 |
"group_size": {
|
| 6045 |
-
"
|
| 6046 |
},
|
| 6047 |
"bits": [
|
| 6048 |
-
|
| 6049 |
],
|
| 6050 |
"bits_prop": [
|
| 6051 |
1
|
|
|
|
| 1 |
{
|
| 2 |
"measurement": {
|
| 3 |
"model.layers.0": {
|
| 4 |
+
"accuracy": 0.9774280267301947,
|
| 5 |
+
"total_bits": 1988444160.0,
|
| 6 |
"o_proj": {
|
| 7 |
"group_size": {
|
| 8 |
+
"4": 128
|
| 9 |
},
|
| 10 |
"bits": [
|
| 11 |
4
|
|
|
|
| 18 |
},
|
| 19 |
"down_proj": {
|
| 20 |
"group_size": {
|
| 21 |
+
"4": 128
|
| 22 |
},
|
| 23 |
"bits": [
|
| 24 |
4
|
|
|
|
| 31 |
},
|
| 32 |
"q_proj": {
|
| 33 |
"group_size": {
|
| 34 |
+
"4": 128
|
| 35 |
},
|
| 36 |
"bits": [
|
| 37 |
4
|
|
|
|
| 44 |
},
|
| 45 |
"k_proj": {
|
| 46 |
"group_size": {
|
| 47 |
+
"4": 128
|
| 48 |
},
|
| 49 |
"bits": [
|
| 50 |
4
|
|
|
|
| 57 |
},
|
| 58 |
"v_proj": {
|
| 59 |
"group_size": {
|
| 60 |
+
"4": 128
|
| 61 |
},
|
| 62 |
"bits": [
|
| 63 |
4
|
|
|
|
| 70 |
},
|
| 71 |
"gate_proj": {
|
| 72 |
"group_size": {
|
| 73 |
+
"4": 128
|
| 74 |
},
|
| 75 |
"bits": [
|
| 76 |
4
|
|
|
|
| 83 |
},
|
| 84 |
"up_proj": {
|
| 85 |
"group_size": {
|
| 86 |
+
"4": 128
|
| 87 |
},
|
| 88 |
"bits": [
|
| 89 |
4
|
|
|
|
| 96 |
}
|
| 97 |
},
|
| 98 |
"model.layers.1": {
|
| 99 |
+
"accuracy": 0.979808229021728,
|
| 100 |
+
"total_bits": 1988444160.0,
|
| 101 |
"o_proj": {
|
| 102 |
"group_size": {
|
| 103 |
+
"4": 128
|
| 104 |
},
|
| 105 |
"bits": [
|
| 106 |
4
|
|
|
|
| 113 |
},
|
| 114 |
"down_proj": {
|
| 115 |
"group_size": {
|
| 116 |
+
"4": 128
|
| 117 |
},
|
| 118 |
"bits": [
|
| 119 |
4
|
|
|
|
| 126 |
},
|
| 127 |
"q_proj": {
|
| 128 |
"group_size": {
|
| 129 |
+
"4": 128
|
| 130 |
},
|
| 131 |
"bits": [
|
| 132 |
4
|
|
|
|
| 139 |
},
|
| 140 |
"k_proj": {
|
| 141 |
"group_size": {
|
| 142 |
+
"4": 128
|
| 143 |
},
|
| 144 |
"bits": [
|
| 145 |
4
|
|
|
|
| 152 |
},
|
| 153 |
"v_proj": {
|
| 154 |
"group_size": {
|
| 155 |
+
"4": 128
|
| 156 |
},
|
| 157 |
"bits": [
|
| 158 |
4
|
|
|
|
| 165 |
},
|
| 166 |
"gate_proj": {
|
| 167 |
"group_size": {
|
| 168 |
+
"4": 128
|
| 169 |
},
|
| 170 |
"bits": [
|
| 171 |
4
|
|
|
|
| 178 |
},
|
| 179 |
"up_proj": {
|
| 180 |
"group_size": {
|
| 181 |
+
"4": 128
|
| 182 |
},
|
| 183 |
"bits": [
|
| 184 |
4
|
|
|
|
| 286 |
}
|
| 287 |
},
|
| 288 |
"model.layers.3": {
|
| 289 |
+
"accuracy": 0.9813712932809722,
|
| 290 |
+
"total_bits": 1907834880.0,
|
| 291 |
"o_proj": {
|
| 292 |
"group_size": {
|
| 293 |
"4": 128
|
|
|
|
| 316 |
},
|
| 317 |
"q_proj": {
|
| 318 |
"group_size": {
|
| 319 |
+
"2": 64
|
| 320 |
},
|
| 321 |
"bits": [
|
| 322 |
+
2
|
| 323 |
],
|
| 324 |
"bits_prop": [
|
| 325 |
1
|
|
|
|
| 476 |
}
|
| 477 |
},
|
| 478 |
"model.layers.5": {
|
| 479 |
+
"accuracy": 0.9806460069958121,
|
| 480 |
+
"total_bits": 1988444160.0,
|
| 481 |
"o_proj": {
|
| 482 |
"group_size": {
|
| 483 |
+
"4": 128
|
| 484 |
},
|
| 485 |
"bits": [
|
| 486 |
4
|
|
|
|
| 493 |
},
|
| 494 |
"down_proj": {
|
| 495 |
"group_size": {
|
| 496 |
+
"4": 128
|
| 497 |
},
|
| 498 |
"bits": [
|
| 499 |
4
|
|
|
|
| 506 |
},
|
| 507 |
"q_proj": {
|
| 508 |
"group_size": {
|
| 509 |
+
"4": 128
|
| 510 |
},
|
| 511 |
"bits": [
|
| 512 |
4
|
|
|
|
| 519 |
},
|
| 520 |
"k_proj": {
|
| 521 |
"group_size": {
|
| 522 |
+
"4": 128
|
| 523 |
},
|
| 524 |
"bits": [
|
| 525 |
4
|
|
|
|
| 532 |
},
|
| 533 |
"v_proj": {
|
| 534 |
"group_size": {
|
| 535 |
+
"4": 128
|
| 536 |
},
|
| 537 |
"bits": [
|
| 538 |
4
|
|
|
|
| 545 |
},
|
| 546 |
"gate_proj": {
|
| 547 |
"group_size": {
|
| 548 |
+
"4": 128
|
| 549 |
},
|
| 550 |
"bits": [
|
| 551 |
4
|
|
|
|
| 558 |
},
|
| 559 |
"up_proj": {
|
| 560 |
"group_size": {
|
| 561 |
+
"4": 128
|
| 562 |
},
|
| 563 |
"bits": [
|
| 564 |
4
|
|
|
|
| 856 |
}
|
| 857 |
},
|
| 858 |
"model.layers.9": {
|
| 859 |
+
"accuracy": 0.9862758388189832,
|
| 860 |
+
"total_bits": 1442283520.0,
|
| 861 |
"o_proj": {
|
| 862 |
"group_size": {
|
| 863 |
+
"4": 128
|
| 864 |
},
|
| 865 |
"bits": [
|
| 866 |
+
4
|
| 867 |
],
|
| 868 |
"bits_prop": [
|
| 869 |
1
|
|
|
|
| 951 |
}
|
| 952 |
},
|
| 953 |
"model.layers.10": {
|
| 954 |
+
"accuracy": 0.9866171181201935,
|
| 955 |
+
"total_bits": 1442283520.0,
|
| 956 |
"o_proj": {
|
| 957 |
"group_size": {
|
| 958 |
"4": 128
|
|
|
|
| 968 |
},
|
| 969 |
"down_proj": {
|
| 970 |
"group_size": {
|
| 971 |
+
"4": 128
|
| 972 |
},
|
| 973 |
"bits": [
|
| 974 |
+
4
|
| 975 |
],
|
| 976 |
"bits_prop": [
|
| 977 |
1
|
|
|
|
| 1046 |
}
|
| 1047 |
},
|
| 1048 |
"model.layers.11": {
|
| 1049 |
+
"accuracy": 0.9859971911937464,
|
| 1050 |
+
"total_bits": 1451540480.0,
|
| 1051 |
"o_proj": {
|
| 1052 |
"group_size": {
|
| 1053 |
+
"4": 128
|
| 1054 |
},
|
| 1055 |
"bits": [
|
| 1056 |
+
4
|
| 1057 |
],
|
| 1058 |
"bits_prop": [
|
| 1059 |
1
|
|
|
|
| 1089 |
},
|
| 1090 |
"k_proj": {
|
| 1091 |
"group_size": {
|
| 1092 |
+
"4": 128
|
| 1093 |
},
|
| 1094 |
"bits": [
|
| 1095 |
+
4
|
| 1096 |
],
|
| 1097 |
"bits_prop": [
|
| 1098 |
1
|
|
|
|
| 1141 |
}
|
| 1142 |
},
|
| 1143 |
"model.layers.12": {
|
| 1144 |
+
"accuracy": 0.985051059658872,
|
| 1145 |
+
"total_bits": 1442283520.0,
|
| 1146 |
"o_proj": {
|
| 1147 |
"group_size": {
|
| 1148 |
+
"4": 128
|
| 1149 |
},
|
| 1150 |
"bits": [
|
| 1151 |
+
4
|
| 1152 |
],
|
| 1153 |
"bits_prop": [
|
| 1154 |
1
|
|
|
|
| 1236 |
}
|
| 1237 |
},
|
| 1238 |
"model.layers.13": {
|
| 1239 |
+
"accuracy": 0.9854843073990196,
|
| 1240 |
+
"total_bits": 1451540480.0,
|
| 1241 |
"o_proj": {
|
| 1242 |
"group_size": {
|
| 1243 |
+
"4": 128
|
| 1244 |
},
|
| 1245 |
"bits": [
|
| 1246 |
+
4
|
| 1247 |
],
|
| 1248 |
"bits_prop": [
|
| 1249 |
1
|
|
|
|
| 1279 |
},
|
| 1280 |
"k_proj": {
|
| 1281 |
"group_size": {
|
| 1282 |
+
"4": 128
|
| 1283 |
},
|
| 1284 |
"bits": [
|
| 1285 |
+
4
|
| 1286 |
],
|
| 1287 |
"bits_prop": [
|
| 1288 |
1
|
|
|
|
| 1331 |
}
|
| 1332 |
},
|
| 1333 |
"model.layers.14": {
|
| 1334 |
+
"accuracy": 0.9862457206763793,
|
| 1335 |
+
"total_bits": 1451540480.0,
|
| 1336 |
"o_proj": {
|
| 1337 |
"group_size": {
|
| 1338 |
+
"4": 128
|
| 1339 |
},
|
| 1340 |
"bits": [
|
| 1341 |
+
4
|
| 1342 |
],
|
| 1343 |
"bits_prop": [
|
| 1344 |
1
|
|
|
|
| 1374 |
},
|
| 1375 |
"k_proj": {
|
| 1376 |
"group_size": {
|
| 1377 |
+
"4": 128
|
| 1378 |
},
|
| 1379 |
"bits": [
|
| 1380 |
+
4
|
| 1381 |
],
|
| 1382 |
"bits_prop": [
|
| 1383 |
1
|
|
|
|
| 1426 |
}
|
| 1427 |
},
|
| 1428 |
"model.layers.15": {
|
| 1429 |
+
"accuracy": 0.9865853489609435,
|
| 1430 |
+
"total_bits": 1451540480.0,
|
| 1431 |
"o_proj": {
|
| 1432 |
"group_size": {
|
| 1433 |
+
"4": 128
|
| 1434 |
},
|
| 1435 |
"bits": [
|
| 1436 |
+
4
|
| 1437 |
],
|
| 1438 |
"bits_prop": [
|
| 1439 |
1
|
|
|
|
| 1469 |
},
|
| 1470 |
"k_proj": {
|
| 1471 |
"group_size": {
|
| 1472 |
+
"4": 128
|
| 1473 |
},
|
| 1474 |
"bits": [
|
| 1475 |
+
4
|
| 1476 |
],
|
| 1477 |
"bits_prop": [
|
| 1478 |
1
|
|
|
|
| 1521 |
}
|
| 1522 |
},
|
| 1523 |
"model.layers.16": {
|
| 1524 |
+
"accuracy": 0.987276211992139,
|
| 1525 |
+
"total_bits": 1451540480.0,
|
| 1526 |
"o_proj": {
|
| 1527 |
"group_size": {
|
| 1528 |
"4": 128
|
|
|
|
| 1538 |
},
|
| 1539 |
"down_proj": {
|
| 1540 |
"group_size": {
|
| 1541 |
+
"4": 128
|
| 1542 |
},
|
| 1543 |
"bits": [
|
| 1544 |
+
4
|
| 1545 |
],
|
| 1546 |
"bits_prop": [
|
| 1547 |
1
|
|
|
|
| 1564 |
},
|
| 1565 |
"k_proj": {
|
| 1566 |
"group_size": {
|
| 1567 |
+
"4": 128
|
| 1568 |
},
|
| 1569 |
"bits": [
|
| 1570 |
+
4
|
| 1571 |
],
|
| 1572 |
"bits_prop": [
|
| 1573 |
1
|
|
|
|
| 1616 |
}
|
| 1617 |
},
|
| 1618 |
"model.layers.17": {
|
| 1619 |
+
"accuracy": 0.987157837691484,
|
| 1620 |
+
"total_bits": 1451540480.0,
|
| 1621 |
"o_proj": {
|
| 1622 |
"group_size": {
|
| 1623 |
"4": 128
|
|
|
|
| 1633 |
},
|
| 1634 |
"down_proj": {
|
| 1635 |
"group_size": {
|
| 1636 |
+
"4": 128
|
| 1637 |
},
|
| 1638 |
"bits": [
|
| 1639 |
+
4
|
| 1640 |
],
|
| 1641 |
"bits_prop": [
|
| 1642 |
1
|
|
|
|
| 1659 |
},
|
| 1660 |
"k_proj": {
|
| 1661 |
"group_size": {
|
| 1662 |
+
"4": 128
|
| 1663 |
},
|
| 1664 |
"bits": [
|
| 1665 |
+
4
|
| 1666 |
],
|
| 1667 |
"bits_prop": [
|
| 1668 |
1
|
|
|
|
| 1711 |
}
|
| 1712 |
},
|
| 1713 |
"model.layers.18": {
|
| 1714 |
+
"accuracy": 0.98646844382165,
|
| 1715 |
+
"total_bits": 1451540480.0,
|
| 1716 |
"o_proj": {
|
| 1717 |
"group_size": {
|
| 1718 |
+
"4": 128
|
| 1719 |
},
|
| 1720 |
"bits": [
|
| 1721 |
+
4
|
| 1722 |
],
|
| 1723 |
"bits_prop": [
|
| 1724 |
1
|
|
|
|
| 1754 |
},
|
| 1755 |
"k_proj": {
|
| 1756 |
"group_size": {
|
| 1757 |
+
"4": 128
|
| 1758 |
},
|
| 1759 |
"bits": [
|
| 1760 |
+
4
|
| 1761 |
],
|
| 1762 |
"bits_prop": [
|
| 1763 |
1
|
|
|
|
| 1901 |
}
|
| 1902 |
},
|
| 1903 |
"model.layers.20": {
|
| 1904 |
+
"accuracy": 0.9845815218286589,
|
| 1905 |
+
"total_bits": 1451540480.0,
|
| 1906 |
"o_proj": {
|
| 1907 |
"group_size": {
|
| 1908 |
+
"4": 128
|
| 1909 |
},
|
| 1910 |
"bits": [
|
| 1911 |
+
4
|
| 1912 |
],
|
| 1913 |
"bits_prop": [
|
| 1914 |
1
|
|
|
|
| 1944 |
},
|
| 1945 |
"k_proj": {
|
| 1946 |
"group_size": {
|
| 1947 |
+
"4": 128
|
| 1948 |
},
|
| 1949 |
"bits": [
|
| 1950 |
+
4
|
| 1951 |
],
|
| 1952 |
"bits_prop": [
|
| 1953 |
1
|
|
|
|
| 2091 |
}
|
| 2092 |
},
|
| 2093 |
"model.layers.22": {
|
| 2094 |
+
"accuracy": 0.9820500311907381,
|
| 2095 |
+
"total_bits": 1516339200.0,
|
| 2096 |
"o_proj": {
|
| 2097 |
"group_size": {
|
| 2098 |
"4": 128
|
|
|
|
| 2134 |
},
|
| 2135 |
"k_proj": {
|
| 2136 |
"group_size": {
|
| 2137 |
+
"2": 32
|
| 2138 |
},
|
| 2139 |
"bits": [
|
| 2140 |
+
2
|
| 2141 |
],
|
| 2142 |
"bits_prop": [
|
| 2143 |
1
|
|
|
|
| 2186 |
}
|
| 2187 |
},
|
| 2188 |
"model.layers.23": {
|
| 2189 |
+
"accuracy": 0.9808617235685233,
|
| 2190 |
+
"total_bits": 1451540480.0,
|
| 2191 |
"o_proj": {
|
| 2192 |
"group_size": {
|
| 2193 |
"4": 128
|
|
|
|
| 2216 |
},
|
| 2217 |
"q_proj": {
|
| 2218 |
"group_size": {
|
| 2219 |
+
"2": 32
|
| 2220 |
},
|
| 2221 |
"bits": [
|
| 2222 |
+
2
|
| 2223 |
],
|
| 2224 |
"bits_prop": [
|
| 2225 |
1
|
|
|
|
| 2281 |
}
|
| 2282 |
},
|
| 2283 |
"model.layers.24": {
|
| 2284 |
+
"accuracy": 0.9812766579561867,
|
| 2285 |
+
"total_bits": 1653227520.0,
|
| 2286 |
"o_proj": {
|
| 2287 |
"group_size": {
|
| 2288 |
"4": 128
|
|
|
|
| 2350 |
},
|
| 2351 |
"gate_proj": {
|
| 2352 |
"group_size": {
|
| 2353 |
+
"4": 128
|
| 2354 |
},
|
| 2355 |
"bits": [
|
| 2356 |
+
4
|
| 2357 |
],
|
| 2358 |
"bits_prop": [
|
| 2359 |
1
|
|
|
|
| 2363 |
},
|
| 2364 |
"up_proj": {
|
| 2365 |
"group_size": {
|
| 2366 |
+
"2": 64
|
| 2367 |
},
|
| 2368 |
"bits": [
|
| 2369 |
+
2
|
| 2370 |
],
|
| 2371 |
"bits_prop": [
|
| 2372 |
1
|
|
|
|
| 2566 |
}
|
| 2567 |
},
|
| 2568 |
"model.layers.27": {
|
| 2569 |
+
"accuracy": 0.9812095816305373,
|
| 2570 |
+
"total_bits": 1673707520.0,
|
| 2571 |
"o_proj": {
|
| 2572 |
"group_size": {
|
| 2573 |
"4": 128
|
|
|
|
| 2596 |
},
|
| 2597 |
"q_proj": {
|
| 2598 |
"group_size": {
|
| 2599 |
+
"2": 32
|
| 2600 |
},
|
| 2601 |
"bits": [
|
| 2602 |
2
|
|
|
|
| 2609 |
},
|
| 2610 |
"k_proj": {
|
| 2611 |
"group_size": {
|
| 2612 |
+
"2": 32
|
| 2613 |
},
|
| 2614 |
"bits": [
|
| 2615 |
+
2
|
| 2616 |
],
|
| 2617 |
"bits_prop": [
|
| 2618 |
1
|
|
|
|
| 2635 |
},
|
| 2636 |
"gate_proj": {
|
| 2637 |
"group_size": {
|
| 2638 |
+
"2": 32
|
| 2639 |
},
|
| 2640 |
"bits": [
|
| 2641 |
+
2
|
| 2642 |
],
|
| 2643 |
"bits_prop": [
|
| 2644 |
1
|
|
|
|
| 2661 |
}
|
| 2662 |
},
|
| 2663 |
"model.layers.28": {
|
| 2664 |
+
"accuracy": 0.9807323368440848,
|
| 2665 |
+
"total_bits": 1673707520.0,
|
| 2666 |
"o_proj": {
|
| 2667 |
"group_size": {
|
| 2668 |
"4": 128
|
|
|
|
| 2691 |
},
|
| 2692 |
"q_proj": {
|
| 2693 |
"group_size": {
|
| 2694 |
+
"2": 32
|
| 2695 |
},
|
| 2696 |
"bits": [
|
| 2697 |
2
|
|
|
|
| 2704 |
},
|
| 2705 |
"k_proj": {
|
| 2706 |
"group_size": {
|
| 2707 |
+
"2": 32
|
| 2708 |
},
|
| 2709 |
"bits": [
|
| 2710 |
+
2
|
| 2711 |
],
|
| 2712 |
"bits_prop": [
|
| 2713 |
1
|
|
|
|
| 2730 |
},
|
| 2731 |
"gate_proj": {
|
| 2732 |
"group_size": {
|
| 2733 |
+
"2": 32
|
| 2734 |
},
|
| 2735 |
"bits": [
|
| 2736 |
+
2
|
| 2737 |
],
|
| 2738 |
"bits_prop": [
|
| 2739 |
1
|
|
|
|
| 2756 |
}
|
| 2757 |
},
|
| 2758 |
"model.layers.29": {
|
| 2759 |
+
"accuracy": 0.980840009462554,
|
| 2760 |
+
"total_bits": 1673707520.0,
|
| 2761 |
"o_proj": {
|
| 2762 |
"group_size": {
|
| 2763 |
"4": 128
|
|
|
|
| 2786 |
},
|
| 2787 |
"q_proj": {
|
| 2788 |
"group_size": {
|
| 2789 |
+
"2": 32
|
| 2790 |
},
|
| 2791 |
"bits": [
|
| 2792 |
2
|
|
|
|
| 2799 |
},
|
| 2800 |
"k_proj": {
|
| 2801 |
"group_size": {
|
| 2802 |
+
"2": 32
|
| 2803 |
},
|
| 2804 |
"bits": [
|
| 2805 |
+
2
|
| 2806 |
],
|
| 2807 |
"bits_prop": [
|
| 2808 |
1
|
|
|
|
| 2825 |
},
|
| 2826 |
"gate_proj": {
|
| 2827 |
"group_size": {
|
| 2828 |
+
"2": 32
|
| 2829 |
},
|
| 2830 |
"bits": [
|
| 2831 |
+
2
|
| 2832 |
],
|
| 2833 |
"bits_prop": [
|
| 2834 |
1
|
|
|
|
| 3421 |
}
|
| 3422 |
},
|
| 3423 |
"model.layers.36": {
|
| 3424 |
+
"accuracy": 0.9810442902962677,
|
| 3425 |
+
"total_bits": 1673707520.0,
|
| 3426 |
"o_proj": {
|
| 3427 |
"group_size": {
|
| 3428 |
"4": 128
|
|
|
|
| 3451 |
},
|
| 3452 |
"q_proj": {
|
| 3453 |
"group_size": {
|
| 3454 |
+
"2": 32
|
| 3455 |
},
|
| 3456 |
"bits": [
|
| 3457 |
2
|
|
|
|
| 3464 |
},
|
| 3465 |
"k_proj": {
|
| 3466 |
"group_size": {
|
| 3467 |
+
"2": 32
|
| 3468 |
},
|
| 3469 |
"bits": [
|
| 3470 |
+
2
|
| 3471 |
],
|
| 3472 |
"bits_prop": [
|
| 3473 |
1
|
|
|
|
| 3490 |
},
|
| 3491 |
"gate_proj": {
|
| 3492 |
"group_size": {
|
| 3493 |
+
"2": 32
|
| 3494 |
},
|
| 3495 |
"bits": [
|
| 3496 |
+
2
|
| 3497 |
],
|
| 3498 |
"bits_prop": [
|
| 3499 |
1
|
|
|
|
| 3516 |
}
|
| 3517 |
},
|
| 3518 |
"model.layers.37": {
|
| 3519 |
+
"accuracy": 0.9805779054295272,
|
| 3520 |
+
"total_bits": 1673707520.0,
|
| 3521 |
"o_proj": {
|
| 3522 |
"group_size": {
|
| 3523 |
"4": 128
|
|
|
|
| 3546 |
},
|
| 3547 |
"q_proj": {
|
| 3548 |
"group_size": {
|
| 3549 |
+
"2": 32
|
| 3550 |
},
|
| 3551 |
"bits": [
|
| 3552 |
2
|
|
|
|
| 3559 |
},
|
| 3560 |
"k_proj": {
|
| 3561 |
"group_size": {
|
| 3562 |
+
"2": 32
|
| 3563 |
},
|
| 3564 |
"bits": [
|
| 3565 |
+
2
|
| 3566 |
],
|
| 3567 |
"bits_prop": [
|
| 3568 |
1
|
|
|
|
| 3585 |
},
|
| 3586 |
"gate_proj": {
|
| 3587 |
"group_size": {
|
| 3588 |
+
"2": 32
|
| 3589 |
},
|
| 3590 |
"bits": [
|
| 3591 |
+
2
|
| 3592 |
],
|
| 3593 |
"bits_prop": [
|
| 3594 |
1
|
|
|
|
| 3611 |
}
|
| 3612 |
},
|
| 3613 |
"model.layers.38": {
|
| 3614 |
+
"accuracy": 0.9795972040155903,
|
| 3615 |
+
"total_bits": 1673707520.0,
|
| 3616 |
"o_proj": {
|
| 3617 |
"group_size": {
|
| 3618 |
"4": 128
|
|
|
|
| 3641 |
},
|
| 3642 |
"q_proj": {
|
| 3643 |
"group_size": {
|
| 3644 |
+
"2": 32
|
| 3645 |
},
|
| 3646 |
"bits": [
|
| 3647 |
2
|
|
|
|
| 3654 |
},
|
| 3655 |
"k_proj": {
|
| 3656 |
"group_size": {
|
| 3657 |
+
"2": 32
|
| 3658 |
},
|
| 3659 |
"bits": [
|
| 3660 |
+
2
|
| 3661 |
],
|
| 3662 |
"bits_prop": [
|
| 3663 |
1
|
|
|
|
| 3680 |
},
|
| 3681 |
"gate_proj": {
|
| 3682 |
"group_size": {
|
| 3683 |
+
"2": 32
|
| 3684 |
},
|
| 3685 |
"bits": [
|
| 3686 |
+
2
|
| 3687 |
],
|
| 3688 |
"bits_prop": [
|
| 3689 |
1
|
|
|
|
| 3706 |
}
|
| 3707 |
},
|
| 3708 |
"model.layers.39": {
|
| 3709 |
+
"accuracy": 0.979535614955239,
|
| 3710 |
+
"total_bits": 1673707520.0,
|
| 3711 |
"o_proj": {
|
| 3712 |
"group_size": {
|
| 3713 |
"4": 128
|
|
|
|
| 3736 |
},
|
| 3737 |
"q_proj": {
|
| 3738 |
"group_size": {
|
| 3739 |
+
"2": 32
|
| 3740 |
},
|
| 3741 |
"bits": [
|
| 3742 |
2
|
|
|
|
| 3749 |
},
|
| 3750 |
"k_proj": {
|
| 3751 |
"group_size": {
|
| 3752 |
+
"2": 32
|
| 3753 |
},
|
| 3754 |
"bits": [
|
| 3755 |
+
2
|
| 3756 |
],
|
| 3757 |
"bits_prop": [
|
| 3758 |
1
|
|
|
|
| 3775 |
},
|
| 3776 |
"gate_proj": {
|
| 3777 |
"group_size": {
|
| 3778 |
+
"2": 32
|
| 3779 |
},
|
| 3780 |
"bits": [
|
| 3781 |
+
2
|
| 3782 |
],
|
| 3783 |
"bits_prop": [
|
| 3784 |
1
|
|
|
|
| 4181 |
}
|
| 4182 |
},
|
| 4183 |
"model.layers.44": {
|
| 4184 |
+
"accuracy": 0.9810187924886122,
|
| 4185 |
+
"total_bits": 1673707520.0,
|
| 4186 |
"o_proj": {
|
| 4187 |
"group_size": {
|
| 4188 |
"4": 128
|
|
|
|
| 4211 |
},
|
| 4212 |
"q_proj": {
|
| 4213 |
"group_size": {
|
| 4214 |
+
"2": 32
|
| 4215 |
},
|
| 4216 |
"bits": [
|
| 4217 |
2
|
|
|
|
| 4224 |
},
|
| 4225 |
"k_proj": {
|
| 4226 |
"group_size": {
|
| 4227 |
+
"2": 32
|
| 4228 |
},
|
| 4229 |
"bits": [
|
| 4230 |
+
2
|
| 4231 |
],
|
| 4232 |
"bits_prop": [
|
| 4233 |
1
|
|
|
|
| 4250 |
},
|
| 4251 |
"gate_proj": {
|
| 4252 |
"group_size": {
|
| 4253 |
+
"2": 32
|
| 4254 |
},
|
| 4255 |
"bits": [
|
| 4256 |
+
2
|
| 4257 |
],
|
| 4258 |
"bits_prop": [
|
| 4259 |
1
|
|
|
|
| 4276 |
}
|
| 4277 |
},
|
| 4278 |
"model.layers.45": {
|
| 4279 |
+
"accuracy": 0.979574806347955,
|
| 4280 |
+
"total_bits": 1673707520.0,
|
| 4281 |
"o_proj": {
|
| 4282 |
"group_size": {
|
| 4283 |
"4": 128
|
|
|
|
| 4306 |
},
|
| 4307 |
"q_proj": {
|
| 4308 |
"group_size": {
|
| 4309 |
+
"2": 32
|
| 4310 |
},
|
| 4311 |
"bits": [
|
| 4312 |
2
|
|
|
|
| 4319 |
},
|
| 4320 |
"k_proj": {
|
| 4321 |
"group_size": {
|
| 4322 |
+
"2": 32
|
| 4323 |
},
|
| 4324 |
"bits": [
|
| 4325 |
+
2
|
| 4326 |
],
|
| 4327 |
"bits_prop": [
|
| 4328 |
1
|
|
|
|
| 4345 |
},
|
| 4346 |
"gate_proj": {
|
| 4347 |
"group_size": {
|
| 4348 |
+
"2": 32
|
| 4349 |
},
|
| 4350 |
"bits": [
|
| 4351 |
+
2
|
| 4352 |
],
|
| 4353 |
"bits_prop": [
|
| 4354 |
1
|
|
|
|
| 4371 |
}
|
| 4372 |
},
|
| 4373 |
"model.layers.46": {
|
| 4374 |
+
"accuracy": 0.9794538663118146,
|
| 4375 |
+
"total_bits": 1726464000.0,
|
| 4376 |
"o_proj": {
|
| 4377 |
"group_size": {
|
| 4378 |
"4": 128
|
|
|
|
| 4401 |
},
|
| 4402 |
"q_proj": {
|
| 4403 |
"group_size": {
|
| 4404 |
+
"4": 128
|
| 4405 |
},
|
| 4406 |
"bits": [
|
| 4407 |
+
4
|
| 4408 |
],
|
| 4409 |
"bits_prop": [
|
| 4410 |
1
|
|
|
|
| 4414 |
},
|
| 4415 |
"k_proj": {
|
| 4416 |
"group_size": {
|
| 4417 |
+
"2": 64
|
| 4418 |
},
|
| 4419 |
"bits": [
|
| 4420 |
+
2
|
| 4421 |
],
|
| 4422 |
"bits_prop": [
|
| 4423 |
1
|
|
|
|
| 4440 |
},
|
| 4441 |
"gate_proj": {
|
| 4442 |
"group_size": {
|
| 4443 |
+
"2": 64
|
| 4444 |
},
|
| 4445 |
"bits": [
|
| 4446 |
+
2
|
| 4447 |
],
|
| 4448 |
"bits_prop": [
|
| 4449 |
1
|
|
|
|
| 5036 |
}
|
| 5037 |
},
|
| 5038 |
"model.layers.53": {
|
| 5039 |
+
"accuracy": 0.9816716767963953,
|
| 5040 |
+
"total_bits": 1988444160.0,
|
| 5041 |
"o_proj": {
|
| 5042 |
"group_size": {
|
| 5043 |
+
"4": 128
|
| 5044 |
},
|
| 5045 |
"bits": [
|
| 5046 |
4
|
|
|
|
| 5053 |
},
|
| 5054 |
"down_proj": {
|
| 5055 |
"group_size": {
|
| 5056 |
+
"4": 128
|
| 5057 |
},
|
| 5058 |
"bits": [
|
| 5059 |
4
|
|
|
|
| 5066 |
},
|
| 5067 |
"q_proj": {
|
| 5068 |
"group_size": {
|
| 5069 |
+
"4": 128
|
| 5070 |
},
|
| 5071 |
"bits": [
|
| 5072 |
4
|
|
|
|
| 5079 |
},
|
| 5080 |
"k_proj": {
|
| 5081 |
"group_size": {
|
| 5082 |
+
"4": 128
|
| 5083 |
},
|
| 5084 |
"bits": [
|
| 5085 |
4
|
|
|
|
| 5092 |
},
|
| 5093 |
"v_proj": {
|
| 5094 |
"group_size": {
|
| 5095 |
+
"4": 128
|
| 5096 |
},
|
| 5097 |
"bits": [
|
| 5098 |
4
|
|
|
|
| 5105 |
},
|
| 5106 |
"gate_proj": {
|
| 5107 |
"group_size": {
|
| 5108 |
+
"4": 128
|
| 5109 |
},
|
| 5110 |
"bits": [
|
| 5111 |
4
|
|
|
|
| 5118 |
},
|
| 5119 |
"up_proj": {
|
| 5120 |
"group_size": {
|
| 5121 |
+
"4": 128
|
| 5122 |
},
|
| 5123 |
"bits": [
|
| 5124 |
4
|
|
|
|
| 5606 |
}
|
| 5607 |
},
|
| 5608 |
"model.layers.59": {
|
| 5609 |
+
"accuracy": 0.9957845042226836,
|
| 5610 |
+
"total_bits": 1442283520.0,
|
| 5611 |
"o_proj": {
|
| 5612 |
"group_size": {
|
| 5613 |
+
"4": 128
|
| 5614 |
},
|
| 5615 |
"bits": [
|
| 5616 |
+
4
|
| 5617 |
],
|
| 5618 |
"bits_prop": [
|
| 5619 |
1
|
|
|
|
| 5623 |
},
|
| 5624 |
"down_proj": {
|
| 5625 |
"group_size": {
|
| 5626 |
+
"4": 128
|
| 5627 |
},
|
| 5628 |
"bits": [
|
| 5629 |
+
4
|
| 5630 |
],
|
| 5631 |
"bits_prop": [
|
| 5632 |
1
|
|
|
|
| 5636 |
},
|
| 5637 |
"q_proj": {
|
| 5638 |
"group_size": {
|
| 5639 |
+
"2": 32
|
| 5640 |
},
|
| 5641 |
"bits": [
|
| 5642 |
2
|
|
|
|
| 5649 |
},
|
| 5650 |
"k_proj": {
|
| 5651 |
"group_size": {
|
| 5652 |
+
"2": 32
|
| 5653 |
},
|
| 5654 |
"bits": [
|
| 5655 |
2
|
|
|
|
| 5675 |
},
|
| 5676 |
"gate_proj": {
|
| 5677 |
"group_size": {
|
| 5678 |
+
"2": 32
|
| 5679 |
},
|
| 5680 |
"bits": [
|
| 5681 |
2
|
|
|
|
| 5688 |
},
|
| 5689 |
"up_proj": {
|
| 5690 |
"group_size": {
|
| 5691 |
+
"2": 32
|
| 5692 |
},
|
| 5693 |
"bits": [
|
| 5694 |
2
|
|
|
|
| 5701 |
}
|
| 5702 |
},
|
| 5703 |
"model.layers.60": {
|
| 5704 |
+
"accuracy": 0.9971412243321538,
|
| 5705 |
+
"total_bits": 1451540480.0,
|
| 5706 |
"o_proj": {
|
| 5707 |
"group_size": {
|
| 5708 |
+
"4": 128
|
| 5709 |
},
|
| 5710 |
"bits": [
|
| 5711 |
+
4
|
| 5712 |
],
|
| 5713 |
"bits_prop": [
|
| 5714 |
1
|
|
|
|
| 5718 |
},
|
| 5719 |
"down_proj": {
|
| 5720 |
"group_size": {
|
| 5721 |
+
"4": 128
|
| 5722 |
},
|
| 5723 |
"bits": [
|
| 5724 |
+
4
|
| 5725 |
],
|
| 5726 |
"bits_prop": [
|
| 5727 |
1
|
|
|
|
| 5731 |
},
|
| 5732 |
"q_proj": {
|
| 5733 |
"group_size": {
|
| 5734 |
+
"2": 32
|
| 5735 |
},
|
| 5736 |
"bits": [
|
| 5737 |
2
|
|
|
|
| 5744 |
},
|
| 5745 |
"k_proj": {
|
| 5746 |
"group_size": {
|
| 5747 |
+
"4": 128
|
| 5748 |
},
|
| 5749 |
"bits": [
|
| 5750 |
+
4
|
| 5751 |
],
|
| 5752 |
"bits_prop": [
|
| 5753 |
1
|
|
|
|
| 5770 |
},
|
| 5771 |
"gate_proj": {
|
| 5772 |
"group_size": {
|
| 5773 |
+
"2": 32
|
| 5774 |
},
|
| 5775 |
"bits": [
|
| 5776 |
2
|
|
|
|
| 5783 |
},
|
| 5784 |
"up_proj": {
|
| 5785 |
"group_size": {
|
| 5786 |
+
"2": 32
|
| 5787 |
},
|
| 5788 |
"bits": [
|
| 5789 |
2
|
|
|
|
| 5796 |
}
|
| 5797 |
},
|
| 5798 |
"model.layers.61": {
|
| 5799 |
+
"accuracy": 0.9959207735955715,
|
| 5800 |
+
"total_bits": 1442283520.0,
|
| 5801 |
"o_proj": {
|
| 5802 |
"group_size": {
|
| 5803 |
+
"4": 128
|
| 5804 |
},
|
| 5805 |
"bits": [
|
| 5806 |
+
4
|
| 5807 |
],
|
| 5808 |
"bits_prop": [
|
| 5809 |
1
|
|
|
|
| 5813 |
},
|
| 5814 |
"down_proj": {
|
| 5815 |
"group_size": {
|
| 5816 |
+
"4": 128
|
| 5817 |
},
|
| 5818 |
"bits": [
|
| 5819 |
+
4
|
| 5820 |
],
|
| 5821 |
"bits_prop": [
|
| 5822 |
1
|
|
|
|
| 5826 |
},
|
| 5827 |
"q_proj": {
|
| 5828 |
"group_size": {
|
| 5829 |
+
"2": 32
|
| 5830 |
},
|
| 5831 |
"bits": [
|
| 5832 |
2
|
|
|
|
| 5839 |
},
|
| 5840 |
"k_proj": {
|
| 5841 |
"group_size": {
|
| 5842 |
+
"2": 32
|
| 5843 |
},
|
| 5844 |
"bits": [
|
| 5845 |
2
|
|
|
|
| 5865 |
},
|
| 5866 |
"gate_proj": {
|
| 5867 |
"group_size": {
|
| 5868 |
+
"2": 32
|
| 5869 |
},
|
| 5870 |
"bits": [
|
| 5871 |
2
|
|
|
|
| 5878 |
},
|
| 5879 |
"up_proj": {
|
| 5880 |
"group_size": {
|
| 5881 |
+
"2": 32
|
| 5882 |
},
|
| 5883 |
"bits": [
|
| 5884 |
2
|
|
|
|
| 5891 |
}
|
| 5892 |
},
|
| 5893 |
"model.layers.62": {
|
| 5894 |
+
"accuracy": 0.9954170882701874,
|
| 5895 |
+
"total_bits": 1442283520.0,
|
| 5896 |
"o_proj": {
|
| 5897 |
"group_size": {
|
| 5898 |
+
"4": 128
|
| 5899 |
},
|
| 5900 |
"bits": [
|
| 5901 |
+
4
|
| 5902 |
],
|
| 5903 |
"bits_prop": [
|
| 5904 |
1
|
|
|
|
| 5908 |
},
|
| 5909 |
"down_proj": {
|
| 5910 |
"group_size": {
|
| 5911 |
+
"4": 128
|
| 5912 |
},
|
| 5913 |
"bits": [
|
| 5914 |
+
4
|
| 5915 |
],
|
| 5916 |
"bits_prop": [
|
| 5917 |
1
|
|
|
|
| 5921 |
},
|
| 5922 |
"q_proj": {
|
| 5923 |
"group_size": {
|
| 5924 |
+
"2": 32
|
| 5925 |
},
|
| 5926 |
"bits": [
|
| 5927 |
2
|
|
|
|
| 5934 |
},
|
| 5935 |
"k_proj": {
|
| 5936 |
"group_size": {
|
| 5937 |
+
"2": 32
|
| 5938 |
},
|
| 5939 |
"bits": [
|
| 5940 |
2
|
|
|
|
| 5960 |
},
|
| 5961 |
"gate_proj": {
|
| 5962 |
"group_size": {
|
| 5963 |
+
"2": 32
|
| 5964 |
},
|
| 5965 |
"bits": [
|
| 5966 |
2
|
|
|
|
| 5973 |
},
|
| 5974 |
"up_proj": {
|
| 5975 |
"group_size": {
|
| 5976 |
+
"2": 32
|
| 5977 |
},
|
| 5978 |
"bits": [
|
| 5979 |
2
|
|
|
|
| 5986 |
}
|
| 5987 |
},
|
| 5988 |
"model.layers.63": {
|
| 5989 |
+
"accuracy": 0.9917106227949262,
|
| 5990 |
+
"total_bits": 1442283520.0,
|
| 5991 |
"o_proj": {
|
| 5992 |
"group_size": {
|
| 5993 |
+
"4": 128
|
| 5994 |
},
|
| 5995 |
"bits": [
|
| 5996 |
+
4
|
| 5997 |
],
|
| 5998 |
"bits_prop": [
|
| 5999 |
1
|
|
|
|
| 6003 |
},
|
| 6004 |
"down_proj": {
|
| 6005 |
"group_size": {
|
| 6006 |
+
"4": 128
|
| 6007 |
},
|
| 6008 |
"bits": [
|
| 6009 |
4
|
|
|
|
| 6042 |
},
|
| 6043 |
"v_proj": {
|
| 6044 |
"group_size": {
|
| 6045 |
+
"4": 128
|
| 6046 |
},
|
| 6047 |
"bits": [
|
| 6048 |
+
4
|
| 6049 |
],
|
| 6050 |
"bits_prop": [
|
| 6051 |
1
|