NicoNico commited on
Commit
5586a07
·
verified ·
1 Parent(s): 0e39e78

Upload folder using huggingface_hub

Browse files
log_rank0_1767135176.txt ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [2025-12-30 23:52:56 root] (main.py 611): INFO Namespace(net=None, model='/sc/home/nianhui.guo/models/qwen_vl/models--Qwen--Qwen3-VL-32B-Instruct/snapshots/0cfaf48183f594c314753d30a4c4974bc75f3ccb', cache_dir='./cache', output_dir='./log/qwen3-vl-32b-instruct-w3.5a16g128-sample128-seqlen4096-search-layer-mix-gpu-batch4-hadmad-stage1/', save_dir='./log/qwen3-vl-32b-instruct-w3.5a16g128-sample128-seqlen4096-search-layer-mix-gpu-batch4-hadmad-stage1/', resume=None, real_quant=True, calib_dataset='tulu', nsamples=256, batch_size=4, mini_batch_size=4, seed=2, epochs=3, reverse_epochs=3, limit=-1, deactive_amp=False, clip_grad=1.0, seq_length=4096, gradient_checkpoint=False, blocks=0, reverse_block_qat=False, top_calibration=True, top_calibration_blocks=1, recurrent_calibration_iterations=1, bidirection_qat=True, tasks='', eval_ppl=True, num_fewshot=0, eval_seq_length=2048, wbits=3.5, abits=16, symmetric=False, group_size=128, act_group_size=-1, qat=False, shift=True, optimizer='adamw', wd=0, norm_lr=1e-05, scale_lr=1e-05, one_bit_weight_lr=1e-05, two_bit_weight_lr=2e-05, four_bit_weight_lr=5e-06, shift_lr=5e-06, post_lr=5e-06, channel_scaler_lr=5e-06, prepare_lr=5e-06, fp_lr=1e-06, aug_loss=True, layerwise_loss=False, attention_loss=False, kl_loss=True, lm_head_loss=False, search=True, measurement=True, strategy=False, measure_dir='./measure/qwen3-vl-32b-sample128-gpu/', strategy_dir='./strategy/qwen3-vl-32b-sample128-gpu/', nonuniform_layout=False, residual_ratio=0.1, base_rank_ratio=0.01, exl_v2_pack=False, mix_precision_layout='layer_mix', rotation=False, online_rotation=False, rotate_mode='hadamard', lora_adapter=False, attn_implementation='sdpa', local_rank=-1, master_port=25901)
2
+ [2025-12-30 23:53:31 root] (main.py 642): INFO ====================================================================================================
3
+ [2025-12-30 23:53:31 root] (main.py 643): INFO === START QUANTIZATION ===
4
+ [2025-12-30 23:53:31 root] (main.py 644): INFO ====================================================================================================
5
+ [2025-12-30 23:53:59 root] (omniquant.py 155): INFO ====================================================================================================
6
+ [2025-12-30 23:53:59 root] (omniquant.py 156): INFO STARTING OMNIQUANT WITH MIXED-PRECISION SEARCH FOR QWEN3VL
7
+ [2025-12-30 23:53:59 root] (omniquant.py 157): INFO ====================================================================================================
8
+ [2025-12-30 23:53:59 root] (omniquant.py 168): INFO [CONFIG] Disabled use_cache in text_config (was: True)
9
+ [2025-12-30 23:53:59 root] (omniquant.py 186): INFO ====================================================================================================
10
+ [2025-12-30 23:53:59 root] (omniquant.py 187): INFO [MODEL] Detected Qwen3VL - Quantizing TEXT BRANCH ONLY
11
+ [2025-12-30 23:53:59 root] (omniquant.py 188): INFO [MODEL] Vision encoder will remain in FP16
12
+ [2025-12-30 23:53:59 root] (omniquant.py 189): INFO ====================================================================================================
13
+ [2025-12-30 23:54:00 root] (omniquant.py 209): INFO [MODEL] Language model: 64 layers
14
+ [2025-12-30 23:54:00 root] (omniquant.py 210): INFO [MODEL] Hidden size: 5120
15
+ [2025-12-30 23:54:00 root] (omniquant.py 211): INFO [MODEL] Num attention heads: 64
16
+ [2025-12-30 23:54:00 root] (omniquant.py 212): INFO [MODEL] Attention implementation: sdpa
17
+ [2025-12-30 23:54:00 root] (omniquant.py 218): INFO [DEVICE] Using: cuda
18
+ [2025-12-30 23:54:01 root] (omniquant.py 229): INFO [TRAINING] Using FP16 with AMP
19
+ [2025-12-30 23:54:01 root] (omniquant.py 234): INFO ====================================================================================================
20
+ [2025-12-30 23:54:01 root] (omniquant.py 235): INFO [INPUT CAPTURE] Starting...
21
+ [2025-12-30 23:54:01 root] (omniquant.py 236): INFO ====================================================================================================
22
+ [2025-12-30 23:54:02 root] (omniquant.py 239): INFO [INPUT CAPTURE] Allocated inps: torch.Size([256, 4096, 5120])
23
+ [2025-12-30 23:55:51 root] (omniquant.py 270): INFO [INPUT CAPTURE] Captured 256 samples
24
+ [2025-12-30 23:55:51 root] (omniquant.py 271): INFO [INPUT CAPTURE] attention_mask: torch.Size([1, 1, 4096, 4096])
25
+ [2025-12-30 23:55:51 root] (omniquant.py 272): INFO [INPUT CAPTURE] position_ids: torch.Size([1, 4096])
26
+ [2025-12-30 23:55:57 root] (omniquant.py 287): INFO ====================================================================================================
27
+ [2025-12-30 23:55:57 root] (omniquant.py 288): INFO [CALIBRATION DATA] Preparing...
28
+ [2025-12-30 23:55:57 root] (omniquant.py 289): INFO ====================================================================================================
29
+ [2025-12-30 23:56:01 root] (omniquant.py 295): INFO [CALIBRATION DATA] quant_inps: torch.Size([256, 4096, 5120])
30
+ [2025-12-30 23:56:01 root] (omniquant.py 296): INFO [CALIBRATION DATA] fp_inps: torch.Size([256, 4096, 5120])
31
+ [2025-12-30 23:56:01 root] (omniquant.py 298): INFO [CALIBRATION DATA] fp_inps_2: torch.Size([256, 4096, 5120])
32
+ [2025-12-30 23:56:01 root] (omniquant.py 320): INFO [ATTENTION MASK] Single sample: torch.Size([1, 1, 4096, 4096])
33
+ [2025-12-30 23:56:01 root] (omniquant.py 321): INFO [ATTENTION MASK] Batch (mini_batch_size=4): torch.Size([4, 1, 4096, 4096])
34
+ [2025-12-30 23:56:01 root] (omniquant.py 332): INFO ====================================================================================================
35
+ [2025-12-30 23:56:01 root] (omniquant.py 333): INFO [POSITION IDS] Processing...
36
+ [2025-12-30 23:56:01 root] (omniquant.py 334): INFO ====================================================================================================
37
+ [2025-12-30 23:56:01 root] (omniquant.py 337): INFO [POSITION IDS] Captured: torch.Size([1, 4096])
38
+ [2025-12-30 23:56:01 root] (omniquant.py 346): INFO [POSITION IDS] Base shape (will expand per batch): torch.Size([1, 4096])
39
+ [2025-12-30 23:56:01 root] (omniquant.py 350): INFO [ROTARY EMB] Module type: Qwen3VLTextRotaryEmbedding
40
+ [2025-12-30 23:56:01 root] (omniquant.py 356): INFO [POSITION EMBEDDINGS] cos shape: torch.Size([1, 4096, 128])
41
+ [2025-12-30 23:56:01 root] (omniquant.py 357): INFO [POSITION EMBEDDINGS] sin shape: torch.Size([1, 4096, 128])
42
+ [2025-12-30 23:56:02 root] (omniquant.py 370): INFO ====================================================================================================
43
+ [2025-12-30 23:56:02 root] (omniquant.py 371): INFO [SEARCH] Starting mixed-precision search...
44
+ [2025-12-30 23:56:02 root] (omniquant.py 372): INFO ====================================================================================================
45
+ [2025-12-30 23:56:02 root] (omniquant.py 387): INFO [SEARCH] Loading measurements from file...
46
+ [2025-12-30 23:56:02 root] (omniquant.py 391): INFO [SEARCH] Loaded measurements from ./measure/qwen3-vl-32b-sample128-gpu/
47
+ [2025-12-30 23:56:02 root] (omniquant.py 411): INFO [SEARCH] Running optimization phase...
48
+ [2025-12-30 23:56:08 root] (omniquant.py 422): INFO [SEARCH] Optimization complete
49
+ [2025-12-30 23:56:08 root] (omniquant.py 461): INFO ====================================================================================================
50
+ [2025-12-30 23:56:08 root] (omniquant.py 462): INFO [QUANTIZATION] Starting layer-wise quantization for 64 layers
51
+ [2025-12-30 23:56:08 root] (omniquant.py 463): INFO ====================================================================================================
52
+ [2025-12-30 23:56:08 root] (omniquant.py 473): INFO
53
+ ====================================================================================================
54
+ [2025-12-30 23:56:08 root] (omniquant.py 474): INFO [LAYER 0/63] Starting quantization
55
+ [2025-12-30 23:56:08 root] (omniquant.py 475): INFO ====================================================================================================
56
+ [2025-12-30 23:56:10 root] (omniquant.py 488): INFO [LAYER 0] Applying mixed-precision strategy...
57
+ [2025-12-30 23:56:10 root] (omniquant.py 517): INFO model.language_model.layers.0.self_attn.q_proj: QParams(32, [4], [1], 4, 32)
58
+ [2025-12-30 23:56:10 root] (omniquant.py 517): INFO model.language_model.layers.0.self_attn.k_proj: QParams(32, [4], [1], 4, 32)
59
+ [2025-12-30 23:56:10 root] (omniquant.py 517): INFO model.language_model.layers.0.self_attn.v_proj: QParams(32, [4], [1], 4, 32)
60
+ [2025-12-30 23:56:10 root] (omniquant.py 517): INFO model.language_model.layers.0.self_attn.o_proj: QParams(32, [4], [1], 4, 32)
61
+ [2025-12-30 23:56:11 root] (omniquant.py 517): INFO model.language_model.layers.0.mlp.gate_proj: QParams(32, [4], [1], 4, 32)
62
+ [2025-12-30 23:56:11 root] (omniquant.py 517): INFO model.language_model.layers.0.mlp.up_proj: QParams(32, [4], [1], 4, 32)
63
+ [2025-12-30 23:56:11 root] (omniquant.py 517): INFO model.language_model.layers.0.mlp.down_proj: QParams(32, [4], [1], 4, 32)
64
+ [2025-12-30 23:56:12 root] (omniquant.py 534): INFO [LAYER 0] BPW: 5.00
65
+ [2025-12-30 23:56:12 root] (omniquant.py 539): INFO [LAYER 0] Computing FP reference outputs...
66
+ [2025-12-30 23:56:12 root] (omniquant.py 559): INFO [LAYER 0] FP reference sample 0:
67
+ [2025-12-30 23:56:12 root] (omniquant.py 560): INFO fp_inp_batch: torch.Size([1, 4096, 5120])
68
+ [2025-12-30 23:56:12 root] (omniquant.py 561): INFO batch_position_ids: torch.Size([1, 4096])
69
+ [2025-12-30 23:56:12 root] (omniquant.py 567): INFO position_embeddings[0] (cos): torch.Size([1, 4096, 128])
70
+ [2025-12-30 23:56:12 root] (omniquant.py 568): INFO position_embeddings[1] (sin): torch.Size([1, 4096, 128])
71
+ [2025-12-30 23:56:12 root] (omniquant.py 587): INFO fp_hidden_states: torch.Size([1, 4096, 5120])
log_rank0_1767135476.txt ADDED
The diff for this file is too large to render. See raw diff
 
log_rank0_1767270363.txt ADDED
The diff for this file is too large to render. See raw diff
 
model-00002-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f97072b285aa566e62ce3874551553a21a9c3b33bf1d49422619d523fbb3a03
3
- size 4993559464
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b5484e90778f0393ec54c0c5ea0b30c8ce8d240b91c90c2f7b9655645da0013
3
+ size 4989486632
model-00003-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77a3dd0d92f63898f9e935e6a361c6123ceb2785cea4d20b832e74c9b2f001c0
3
- size 4992961184
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab8a3b79c0f9a632967d5e1e3dcb001e603838225789658bdc4e986b8752a330
3
+ size 4989825872
model-00004-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:806c6049fb0d6283f97643cff70f63f2db4126c9bfa48579ca53feb0eb71249f
3
- size 4947587080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0c48c5c661d230ab351e18ea84272492dbcf3dfd8df7c2b2a8936fb0623f340
3
+ size 4984013472
model-00005-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab8f65c27b6ffcdf79abf20dda07d2539439c815c3656fbb7e6b74be7af23866
3
- size 3272528192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f01c987898ac5ec7898287607f3457efdaee8098c022d447ad18235baf7ffc7a
3
+ size 3192028080
model.safetensors.index.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "metadata": {
3
  "total_parameters": 2151768304,
4
- "total_size": 23699030976
5
  },
6
  "weight_map": {
7
  "lm_head.weight": "model-00006-of-00006.safetensors",
@@ -864,12 +864,12 @@
864
  "model.language_model.layers.28.self_attn.v_proj.qweight": "model-00003-of-00006.safetensors",
865
  "model.language_model.layers.28.self_attn.v_proj.scales": "model-00003-of-00006.safetensors",
866
  "model.language_model.layers.28.self_attn.v_proj.zeros": "model-00003-of-00006.safetensors",
867
- "model.language_model.layers.29.input_layernorm.weight": "model-00004-of-00006.safetensors",
868
  "model.language_model.layers.29.mlp.down_proj.channel_scale": "model-00003-of-00006.safetensors",
869
  "model.language_model.layers.29.mlp.down_proj.q_perm": "model-00003-of-00006.safetensors",
870
- "model.language_model.layers.29.mlp.down_proj.qweight": "model-00004-of-00006.safetensors",
871
- "model.language_model.layers.29.mlp.down_proj.scales": "model-00004-of-00006.safetensors",
872
- "model.language_model.layers.29.mlp.down_proj.zeros": "model-00004-of-00006.safetensors",
873
  "model.language_model.layers.29.mlp.gate_proj.channel_scale": "model-00003-of-00006.safetensors",
874
  "model.language_model.layers.29.mlp.gate_proj.q_perm": "model-00003-of-00006.safetensors",
875
  "model.language_model.layers.29.mlp.gate_proj.qweight": "model-00003-of-00006.safetensors",
@@ -880,7 +880,7 @@
880
  "model.language_model.layers.29.mlp.up_proj.qweight": "model-00003-of-00006.safetensors",
881
  "model.language_model.layers.29.mlp.up_proj.scales": "model-00003-of-00006.safetensors",
882
  "model.language_model.layers.29.mlp.up_proj.zeros": "model-00003-of-00006.safetensors",
883
- "model.language_model.layers.29.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
884
  "model.language_model.layers.29.self_attn.k_norm.weight": "model-00003-of-00006.safetensors",
885
  "model.language_model.layers.29.self_attn.k_proj.channel_scale": "model-00003-of-00006.safetensors",
886
  "model.language_model.layers.29.self_attn.k_proj.q_perm": "model-00003-of-00006.safetensors",
@@ -960,27 +960,27 @@
960
  "model.language_model.layers.30.mlp.up_proj.zeros": "model-00004-of-00006.safetensors",
961
  "model.language_model.layers.30.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
962
  "model.language_model.layers.30.self_attn.k_norm.weight": "model-00004-of-00006.safetensors",
963
- "model.language_model.layers.30.self_attn.k_proj.channel_scale": "model-00004-of-00006.safetensors",
964
- "model.language_model.layers.30.self_attn.k_proj.q_perm": "model-00004-of-00006.safetensors",
965
- "model.language_model.layers.30.self_attn.k_proj.qweight": "model-00004-of-00006.safetensors",
966
- "model.language_model.layers.30.self_attn.k_proj.scales": "model-00004-of-00006.safetensors",
967
- "model.language_model.layers.30.self_attn.k_proj.zeros": "model-00004-of-00006.safetensors",
968
- "model.language_model.layers.30.self_attn.o_proj.channel_scale": "model-00004-of-00006.safetensors",
969
- "model.language_model.layers.30.self_attn.o_proj.q_perm": "model-00004-of-00006.safetensors",
970
  "model.language_model.layers.30.self_attn.o_proj.qweight": "model-00004-of-00006.safetensors",
971
  "model.language_model.layers.30.self_attn.o_proj.scales": "model-00004-of-00006.safetensors",
972
  "model.language_model.layers.30.self_attn.o_proj.zeros": "model-00004-of-00006.safetensors",
973
  "model.language_model.layers.30.self_attn.q_norm.weight": "model-00004-of-00006.safetensors",
974
- "model.language_model.layers.30.self_attn.q_proj.channel_scale": "model-00004-of-00006.safetensors",
975
- "model.language_model.layers.30.self_attn.q_proj.q_perm": "model-00004-of-00006.safetensors",
976
- "model.language_model.layers.30.self_attn.q_proj.qweight": "model-00004-of-00006.safetensors",
977
- "model.language_model.layers.30.self_attn.q_proj.scales": "model-00004-of-00006.safetensors",
978
- "model.language_model.layers.30.self_attn.q_proj.zeros": "model-00004-of-00006.safetensors",
979
- "model.language_model.layers.30.self_attn.v_proj.channel_scale": "model-00004-of-00006.safetensors",
980
- "model.language_model.layers.30.self_attn.v_proj.q_perm": "model-00004-of-00006.safetensors",
981
- "model.language_model.layers.30.self_attn.v_proj.qweight": "model-00004-of-00006.safetensors",
982
- "model.language_model.layers.30.self_attn.v_proj.scales": "model-00004-of-00006.safetensors",
983
- "model.language_model.layers.30.self_attn.v_proj.zeros": "model-00004-of-00006.safetensors",
984
  "model.language_model.layers.31.input_layernorm.weight": "model-00004-of-00006.safetensors",
985
  "model.language_model.layers.31.mlp.down_proj.channel_scale": "model-00004-of-00006.safetensors",
986
  "model.language_model.layers.31.mlp.down_proj.q_perm": "model-00004-of-00006.safetensors",
@@ -1722,12 +1722,12 @@
1722
  "model.language_model.layers.48.self_attn.v_proj.qweight": "model-00004-of-00006.safetensors",
1723
  "model.language_model.layers.48.self_attn.v_proj.scales": "model-00004-of-00006.safetensors",
1724
  "model.language_model.layers.48.self_attn.v_proj.zeros": "model-00004-of-00006.safetensors",
1725
- "model.language_model.layers.49.input_layernorm.weight": "model-00005-of-00006.safetensors",
1726
  "model.language_model.layers.49.mlp.down_proj.channel_scale": "model-00004-of-00006.safetensors",
1727
  "model.language_model.layers.49.mlp.down_proj.q_perm": "model-00004-of-00006.safetensors",
1728
- "model.language_model.layers.49.mlp.down_proj.qweight": "model-00005-of-00006.safetensors",
1729
- "model.language_model.layers.49.mlp.down_proj.scales": "model-00005-of-00006.safetensors",
1730
- "model.language_model.layers.49.mlp.down_proj.zeros": "model-00005-of-00006.safetensors",
1731
  "model.language_model.layers.49.mlp.gate_proj.channel_scale": "model-00004-of-00006.safetensors",
1732
  "model.language_model.layers.49.mlp.gate_proj.q_perm": "model-00004-of-00006.safetensors",
1733
  "model.language_model.layers.49.mlp.gate_proj.qweight": "model-00004-of-00006.safetensors",
@@ -1738,7 +1738,7 @@
1738
  "model.language_model.layers.49.mlp.up_proj.qweight": "model-00004-of-00006.safetensors",
1739
  "model.language_model.layers.49.mlp.up_proj.scales": "model-00004-of-00006.safetensors",
1740
  "model.language_model.layers.49.mlp.up_proj.zeros": "model-00004-of-00006.safetensors",
1741
- "model.language_model.layers.49.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
1742
  "model.language_model.layers.49.self_attn.k_norm.weight": "model-00004-of-00006.safetensors",
1743
  "model.language_model.layers.49.self_attn.k_proj.channel_scale": "model-00004-of-00006.safetensors",
1744
  "model.language_model.layers.49.self_attn.k_proj.q_perm": "model-00004-of-00006.safetensors",
@@ -1801,44 +1801,44 @@
1801
  "model.language_model.layers.5.self_attn.v_proj.scales": "model-00002-of-00006.safetensors",
1802
  "model.language_model.layers.5.self_attn.v_proj.zeros": "model-00002-of-00006.safetensors",
1803
  "model.language_model.layers.50.input_layernorm.weight": "model-00005-of-00006.safetensors",
1804
- "model.language_model.layers.50.mlp.down_proj.channel_scale": "model-00005-of-00006.safetensors",
1805
- "model.language_model.layers.50.mlp.down_proj.q_perm": "model-00005-of-00006.safetensors",
1806
  "model.language_model.layers.50.mlp.down_proj.qweight": "model-00005-of-00006.safetensors",
1807
  "model.language_model.layers.50.mlp.down_proj.scales": "model-00005-of-00006.safetensors",
1808
  "model.language_model.layers.50.mlp.down_proj.zeros": "model-00005-of-00006.safetensors",
1809
- "model.language_model.layers.50.mlp.gate_proj.channel_scale": "model-00005-of-00006.safetensors",
1810
- "model.language_model.layers.50.mlp.gate_proj.q_perm": "model-00005-of-00006.safetensors",
1811
- "model.language_model.layers.50.mlp.gate_proj.qweight": "model-00005-of-00006.safetensors",
1812
- "model.language_model.layers.50.mlp.gate_proj.scales": "model-00005-of-00006.safetensors",
1813
- "model.language_model.layers.50.mlp.gate_proj.zeros": "model-00005-of-00006.safetensors",
1814
- "model.language_model.layers.50.mlp.up_proj.channel_scale": "model-00005-of-00006.safetensors",
1815
- "model.language_model.layers.50.mlp.up_proj.q_perm": "model-00005-of-00006.safetensors",
1816
- "model.language_model.layers.50.mlp.up_proj.qweight": "model-00005-of-00006.safetensors",
1817
- "model.language_model.layers.50.mlp.up_proj.scales": "model-00005-of-00006.safetensors",
1818
- "model.language_model.layers.50.mlp.up_proj.zeros": "model-00005-of-00006.safetensors",
1819
  "model.language_model.layers.50.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
1820
- "model.language_model.layers.50.self_attn.k_norm.weight": "model-00005-of-00006.safetensors",
1821
- "model.language_model.layers.50.self_attn.k_proj.channel_scale": "model-00005-of-00006.safetensors",
1822
- "model.language_model.layers.50.self_attn.k_proj.q_perm": "model-00005-of-00006.safetensors",
1823
- "model.language_model.layers.50.self_attn.k_proj.qweight": "model-00005-of-00006.safetensors",
1824
- "model.language_model.layers.50.self_attn.k_proj.scales": "model-00005-of-00006.safetensors",
1825
- "model.language_model.layers.50.self_attn.k_proj.zeros": "model-00005-of-00006.safetensors",
1826
- "model.language_model.layers.50.self_attn.o_proj.channel_scale": "model-00005-of-00006.safetensors",
1827
- "model.language_model.layers.50.self_attn.o_proj.q_perm": "model-00005-of-00006.safetensors",
1828
- "model.language_model.layers.50.self_attn.o_proj.qweight": "model-00005-of-00006.safetensors",
1829
- "model.language_model.layers.50.self_attn.o_proj.scales": "model-00005-of-00006.safetensors",
1830
- "model.language_model.layers.50.self_attn.o_proj.zeros": "model-00005-of-00006.safetensors",
1831
- "model.language_model.layers.50.self_attn.q_norm.weight": "model-00005-of-00006.safetensors",
1832
- "model.language_model.layers.50.self_attn.q_proj.channel_scale": "model-00005-of-00006.safetensors",
1833
- "model.language_model.layers.50.self_attn.q_proj.q_perm": "model-00005-of-00006.safetensors",
1834
- "model.language_model.layers.50.self_attn.q_proj.qweight": "model-00005-of-00006.safetensors",
1835
- "model.language_model.layers.50.self_attn.q_proj.scales": "model-00005-of-00006.safetensors",
1836
- "model.language_model.layers.50.self_attn.q_proj.zeros": "model-00005-of-00006.safetensors",
1837
- "model.language_model.layers.50.self_attn.v_proj.channel_scale": "model-00005-of-00006.safetensors",
1838
- "model.language_model.layers.50.self_attn.v_proj.q_perm": "model-00005-of-00006.safetensors",
1839
- "model.language_model.layers.50.self_attn.v_proj.qweight": "model-00005-of-00006.safetensors",
1840
- "model.language_model.layers.50.self_attn.v_proj.scales": "model-00005-of-00006.safetensors",
1841
- "model.language_model.layers.50.self_attn.v_proj.zeros": "model-00005-of-00006.safetensors",
1842
  "model.language_model.layers.51.input_layernorm.weight": "model-00005-of-00006.safetensors",
1843
  "model.language_model.layers.51.mlp.down_proj.channel_scale": "model-00005-of-00006.safetensors",
1844
  "model.language_model.layers.51.mlp.down_proj.q_perm": "model-00005-of-00006.safetensors",
@@ -2190,12 +2190,12 @@
2190
  "model.language_model.layers.59.self_attn.v_proj.qweight": "model-00005-of-00006.safetensors",
2191
  "model.language_model.layers.59.self_attn.v_proj.scales": "model-00005-of-00006.safetensors",
2192
  "model.language_model.layers.59.self_attn.v_proj.zeros": "model-00005-of-00006.safetensors",
2193
- "model.language_model.layers.6.input_layernorm.weight": "model-00003-of-00006.safetensors",
2194
- "model.language_model.layers.6.mlp.down_proj.channel_scale": "model-00003-of-00006.safetensors",
2195
- "model.language_model.layers.6.mlp.down_proj.q_perm": "model-00003-of-00006.safetensors",
2196
- "model.language_model.layers.6.mlp.down_proj.qweight": "model-00003-of-00006.safetensors",
2197
- "model.language_model.layers.6.mlp.down_proj.scales": "model-00003-of-00006.safetensors",
2198
- "model.language_model.layers.6.mlp.down_proj.zeros": "model-00003-of-00006.safetensors",
2199
  "model.language_model.layers.6.mlp.gate_proj.channel_scale": "model-00002-of-00006.safetensors",
2200
  "model.language_model.layers.6.mlp.gate_proj.q_perm": "model-00002-of-00006.safetensors",
2201
  "model.language_model.layers.6.mlp.gate_proj.qweight": "model-00002-of-00006.safetensors",
@@ -2204,9 +2204,9 @@
2204
  "model.language_model.layers.6.mlp.up_proj.channel_scale": "model-00002-of-00006.safetensors",
2205
  "model.language_model.layers.6.mlp.up_proj.q_perm": "model-00002-of-00006.safetensors",
2206
  "model.language_model.layers.6.mlp.up_proj.qweight": "model-00002-of-00006.safetensors",
2207
- "model.language_model.layers.6.mlp.up_proj.scales": "model-00003-of-00006.safetensors",
2208
- "model.language_model.layers.6.mlp.up_proj.zeros": "model-00003-of-00006.safetensors",
2209
- "model.language_model.layers.6.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
2210
  "model.language_model.layers.6.self_attn.k_norm.weight": "model-00002-of-00006.safetensors",
2211
  "model.language_model.layers.6.self_attn.k_proj.channel_scale": "model-00002-of-00006.safetensors",
2212
  "model.language_model.layers.6.self_attn.k_proj.q_perm": "model-00002-of-00006.safetensors",
@@ -2391,8 +2391,8 @@
2391
  "model.language_model.layers.7.mlp.down_proj.qweight": "model-00003-of-00006.safetensors",
2392
  "model.language_model.layers.7.mlp.down_proj.scales": "model-00003-of-00006.safetensors",
2393
  "model.language_model.layers.7.mlp.down_proj.zeros": "model-00003-of-00006.safetensors",
2394
- "model.language_model.layers.7.mlp.gate_proj.channel_scale": "model-00003-of-00006.safetensors",
2395
- "model.language_model.layers.7.mlp.gate_proj.q_perm": "model-00003-of-00006.safetensors",
2396
  "model.language_model.layers.7.mlp.gate_proj.qweight": "model-00003-of-00006.safetensors",
2397
  "model.language_model.layers.7.mlp.gate_proj.scales": "model-00003-of-00006.safetensors",
2398
  "model.language_model.layers.7.mlp.gate_proj.zeros": "model-00003-of-00006.safetensors",
@@ -2402,28 +2402,28 @@
2402
  "model.language_model.layers.7.mlp.up_proj.scales": "model-00003-of-00006.safetensors",
2403
  "model.language_model.layers.7.mlp.up_proj.zeros": "model-00003-of-00006.safetensors",
2404
  "model.language_model.layers.7.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
2405
- "model.language_model.layers.7.self_attn.k_norm.weight": "model-00003-of-00006.safetensors",
2406
- "model.language_model.layers.7.self_attn.k_proj.channel_scale": "model-00003-of-00006.safetensors",
2407
- "model.language_model.layers.7.self_attn.k_proj.q_perm": "model-00003-of-00006.safetensors",
2408
- "model.language_model.layers.7.self_attn.k_proj.qweight": "model-00003-of-00006.safetensors",
2409
- "model.language_model.layers.7.self_attn.k_proj.scales": "model-00003-of-00006.safetensors",
2410
- "model.language_model.layers.7.self_attn.k_proj.zeros": "model-00003-of-00006.safetensors",
2411
- "model.language_model.layers.7.self_attn.o_proj.channel_scale": "model-00003-of-00006.safetensors",
2412
- "model.language_model.layers.7.self_attn.o_proj.q_perm": "model-00003-of-00006.safetensors",
2413
- "model.language_model.layers.7.self_attn.o_proj.qweight": "model-00003-of-00006.safetensors",
2414
- "model.language_model.layers.7.self_attn.o_proj.scales": "model-00003-of-00006.safetensors",
2415
- "model.language_model.layers.7.self_attn.o_proj.zeros": "model-00003-of-00006.safetensors",
2416
- "model.language_model.layers.7.self_attn.q_norm.weight": "model-00003-of-00006.safetensors",
2417
- "model.language_model.layers.7.self_attn.q_proj.channel_scale": "model-00003-of-00006.safetensors",
2418
- "model.language_model.layers.7.self_attn.q_proj.q_perm": "model-00003-of-00006.safetensors",
2419
- "model.language_model.layers.7.self_attn.q_proj.qweight": "model-00003-of-00006.safetensors",
2420
- "model.language_model.layers.7.self_attn.q_proj.scales": "model-00003-of-00006.safetensors",
2421
- "model.language_model.layers.7.self_attn.q_proj.zeros": "model-00003-of-00006.safetensors",
2422
- "model.language_model.layers.7.self_attn.v_proj.channel_scale": "model-00003-of-00006.safetensors",
2423
- "model.language_model.layers.7.self_attn.v_proj.q_perm": "model-00003-of-00006.safetensors",
2424
- "model.language_model.layers.7.self_attn.v_proj.qweight": "model-00003-of-00006.safetensors",
2425
- "model.language_model.layers.7.self_attn.v_proj.scales": "model-00003-of-00006.safetensors",
2426
- "model.language_model.layers.7.self_attn.v_proj.zeros": "model-00003-of-00006.safetensors",
2427
  "model.language_model.layers.8.input_layernorm.weight": "model-00003-of-00006.safetensors",
2428
  "model.language_model.layers.8.mlp.down_proj.channel_scale": "model-00003-of-00006.safetensors",
2429
  "model.language_model.layers.8.mlp.down_proj.q_perm": "model-00003-of-00006.safetensors",
 
1
  {
2
  "metadata": {
3
  "total_parameters": 2151768304,
4
+ "total_size": 23647749056
5
  },
6
  "weight_map": {
7
  "lm_head.weight": "model-00006-of-00006.safetensors",
 
864
  "model.language_model.layers.28.self_attn.v_proj.qweight": "model-00003-of-00006.safetensors",
865
  "model.language_model.layers.28.self_attn.v_proj.scales": "model-00003-of-00006.safetensors",
866
  "model.language_model.layers.28.self_attn.v_proj.zeros": "model-00003-of-00006.safetensors",
867
+ "model.language_model.layers.29.input_layernorm.weight": "model-00003-of-00006.safetensors",
868
  "model.language_model.layers.29.mlp.down_proj.channel_scale": "model-00003-of-00006.safetensors",
869
  "model.language_model.layers.29.mlp.down_proj.q_perm": "model-00003-of-00006.safetensors",
870
+ "model.language_model.layers.29.mlp.down_proj.qweight": "model-00003-of-00006.safetensors",
871
+ "model.language_model.layers.29.mlp.down_proj.scales": "model-00003-of-00006.safetensors",
872
+ "model.language_model.layers.29.mlp.down_proj.zeros": "model-00003-of-00006.safetensors",
873
  "model.language_model.layers.29.mlp.gate_proj.channel_scale": "model-00003-of-00006.safetensors",
874
  "model.language_model.layers.29.mlp.gate_proj.q_perm": "model-00003-of-00006.safetensors",
875
  "model.language_model.layers.29.mlp.gate_proj.qweight": "model-00003-of-00006.safetensors",
 
880
  "model.language_model.layers.29.mlp.up_proj.qweight": "model-00003-of-00006.safetensors",
881
  "model.language_model.layers.29.mlp.up_proj.scales": "model-00003-of-00006.safetensors",
882
  "model.language_model.layers.29.mlp.up_proj.zeros": "model-00003-of-00006.safetensors",
883
+ "model.language_model.layers.29.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
884
  "model.language_model.layers.29.self_attn.k_norm.weight": "model-00003-of-00006.safetensors",
885
  "model.language_model.layers.29.self_attn.k_proj.channel_scale": "model-00003-of-00006.safetensors",
886
  "model.language_model.layers.29.self_attn.k_proj.q_perm": "model-00003-of-00006.safetensors",
 
960
  "model.language_model.layers.30.mlp.up_proj.zeros": "model-00004-of-00006.safetensors",
961
  "model.language_model.layers.30.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
962
  "model.language_model.layers.30.self_attn.k_norm.weight": "model-00004-of-00006.safetensors",
963
+ "model.language_model.layers.30.self_attn.k_proj.channel_scale": "model-00003-of-00006.safetensors",
964
+ "model.language_model.layers.30.self_attn.k_proj.q_perm": "model-00003-of-00006.safetensors",
965
+ "model.language_model.layers.30.self_attn.k_proj.qweight": "model-00003-of-00006.safetensors",
966
+ "model.language_model.layers.30.self_attn.k_proj.scales": "model-00003-of-00006.safetensors",
967
+ "model.language_model.layers.30.self_attn.k_proj.zeros": "model-00003-of-00006.safetensors",
968
+ "model.language_model.layers.30.self_attn.o_proj.channel_scale": "model-00003-of-00006.safetensors",
969
+ "model.language_model.layers.30.self_attn.o_proj.q_perm": "model-00003-of-00006.safetensors",
970
  "model.language_model.layers.30.self_attn.o_proj.qweight": "model-00004-of-00006.safetensors",
971
  "model.language_model.layers.30.self_attn.o_proj.scales": "model-00004-of-00006.safetensors",
972
  "model.language_model.layers.30.self_attn.o_proj.zeros": "model-00004-of-00006.safetensors",
973
  "model.language_model.layers.30.self_attn.q_norm.weight": "model-00004-of-00006.safetensors",
974
+ "model.language_model.layers.30.self_attn.q_proj.channel_scale": "model-00003-of-00006.safetensors",
975
+ "model.language_model.layers.30.self_attn.q_proj.q_perm": "model-00003-of-00006.safetensors",
976
+ "model.language_model.layers.30.self_attn.q_proj.qweight": "model-00003-of-00006.safetensors",
977
+ "model.language_model.layers.30.self_attn.q_proj.scales": "model-00003-of-00006.safetensors",
978
+ "model.language_model.layers.30.self_attn.q_proj.zeros": "model-00003-of-00006.safetensors",
979
+ "model.language_model.layers.30.self_attn.v_proj.channel_scale": "model-00003-of-00006.safetensors",
980
+ "model.language_model.layers.30.self_attn.v_proj.q_perm": "model-00003-of-00006.safetensors",
981
+ "model.language_model.layers.30.self_attn.v_proj.qweight": "model-00003-of-00006.safetensors",
982
+ "model.language_model.layers.30.self_attn.v_proj.scales": "model-00003-of-00006.safetensors",
983
+ "model.language_model.layers.30.self_attn.v_proj.zeros": "model-00003-of-00006.safetensors",
984
  "model.language_model.layers.31.input_layernorm.weight": "model-00004-of-00006.safetensors",
985
  "model.language_model.layers.31.mlp.down_proj.channel_scale": "model-00004-of-00006.safetensors",
986
  "model.language_model.layers.31.mlp.down_proj.q_perm": "model-00004-of-00006.safetensors",
 
1722
  "model.language_model.layers.48.self_attn.v_proj.qweight": "model-00004-of-00006.safetensors",
1723
  "model.language_model.layers.48.self_attn.v_proj.scales": "model-00004-of-00006.safetensors",
1724
  "model.language_model.layers.48.self_attn.v_proj.zeros": "model-00004-of-00006.safetensors",
1725
+ "model.language_model.layers.49.input_layernorm.weight": "model-00004-of-00006.safetensors",
1726
  "model.language_model.layers.49.mlp.down_proj.channel_scale": "model-00004-of-00006.safetensors",
1727
  "model.language_model.layers.49.mlp.down_proj.q_perm": "model-00004-of-00006.safetensors",
1728
+ "model.language_model.layers.49.mlp.down_proj.qweight": "model-00004-of-00006.safetensors",
1729
+ "model.language_model.layers.49.mlp.down_proj.scales": "model-00004-of-00006.safetensors",
1730
+ "model.language_model.layers.49.mlp.down_proj.zeros": "model-00004-of-00006.safetensors",
1731
  "model.language_model.layers.49.mlp.gate_proj.channel_scale": "model-00004-of-00006.safetensors",
1732
  "model.language_model.layers.49.mlp.gate_proj.q_perm": "model-00004-of-00006.safetensors",
1733
  "model.language_model.layers.49.mlp.gate_proj.qweight": "model-00004-of-00006.safetensors",
 
1738
  "model.language_model.layers.49.mlp.up_proj.qweight": "model-00004-of-00006.safetensors",
1739
  "model.language_model.layers.49.mlp.up_proj.scales": "model-00004-of-00006.safetensors",
1740
  "model.language_model.layers.49.mlp.up_proj.zeros": "model-00004-of-00006.safetensors",
1741
+ "model.language_model.layers.49.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
1742
  "model.language_model.layers.49.self_attn.k_norm.weight": "model-00004-of-00006.safetensors",
1743
  "model.language_model.layers.49.self_attn.k_proj.channel_scale": "model-00004-of-00006.safetensors",
1744
  "model.language_model.layers.49.self_attn.k_proj.q_perm": "model-00004-of-00006.safetensors",
 
1801
  "model.language_model.layers.5.self_attn.v_proj.scales": "model-00002-of-00006.safetensors",
1802
  "model.language_model.layers.5.self_attn.v_proj.zeros": "model-00002-of-00006.safetensors",
1803
  "model.language_model.layers.50.input_layernorm.weight": "model-00005-of-00006.safetensors",
1804
+ "model.language_model.layers.50.mlp.down_proj.channel_scale": "model-00004-of-00006.safetensors",
1805
+ "model.language_model.layers.50.mlp.down_proj.q_perm": "model-00004-of-00006.safetensors",
1806
  "model.language_model.layers.50.mlp.down_proj.qweight": "model-00005-of-00006.safetensors",
1807
  "model.language_model.layers.50.mlp.down_proj.scales": "model-00005-of-00006.safetensors",
1808
  "model.language_model.layers.50.mlp.down_proj.zeros": "model-00005-of-00006.safetensors",
1809
+ "model.language_model.layers.50.mlp.gate_proj.channel_scale": "model-00004-of-00006.safetensors",
1810
+ "model.language_model.layers.50.mlp.gate_proj.q_perm": "model-00004-of-00006.safetensors",
1811
+ "model.language_model.layers.50.mlp.gate_proj.qweight": "model-00004-of-00006.safetensors",
1812
+ "model.language_model.layers.50.mlp.gate_proj.scales": "model-00004-of-00006.safetensors",
1813
+ "model.language_model.layers.50.mlp.gate_proj.zeros": "model-00004-of-00006.safetensors",
1814
+ "model.language_model.layers.50.mlp.up_proj.channel_scale": "model-00004-of-00006.safetensors",
1815
+ "model.language_model.layers.50.mlp.up_proj.q_perm": "model-00004-of-00006.safetensors",
1816
+ "model.language_model.layers.50.mlp.up_proj.qweight": "model-00004-of-00006.safetensors",
1817
+ "model.language_model.layers.50.mlp.up_proj.scales": "model-00004-of-00006.safetensors",
1818
+ "model.language_model.layers.50.mlp.up_proj.zeros": "model-00004-of-00006.safetensors",
1819
  "model.language_model.layers.50.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
1820
+ "model.language_model.layers.50.self_attn.k_norm.weight": "model-00004-of-00006.safetensors",
1821
+ "model.language_model.layers.50.self_attn.k_proj.channel_scale": "model-00004-of-00006.safetensors",
1822
+ "model.language_model.layers.50.self_attn.k_proj.q_perm": "model-00004-of-00006.safetensors",
1823
+ "model.language_model.layers.50.self_attn.k_proj.qweight": "model-00004-of-00006.safetensors",
1824
+ "model.language_model.layers.50.self_attn.k_proj.scales": "model-00004-of-00006.safetensors",
1825
+ "model.language_model.layers.50.self_attn.k_proj.zeros": "model-00004-of-00006.safetensors",
1826
+ "model.language_model.layers.50.self_attn.o_proj.channel_scale": "model-00004-of-00006.safetensors",
1827
+ "model.language_model.layers.50.self_attn.o_proj.q_perm": "model-00004-of-00006.safetensors",
1828
+ "model.language_model.layers.50.self_attn.o_proj.qweight": "model-00004-of-00006.safetensors",
1829
+ "model.language_model.layers.50.self_attn.o_proj.scales": "model-00004-of-00006.safetensors",
1830
+ "model.language_model.layers.50.self_attn.o_proj.zeros": "model-00004-of-00006.safetensors",
1831
+ "model.language_model.layers.50.self_attn.q_norm.weight": "model-00004-of-00006.safetensors",
1832
+ "model.language_model.layers.50.self_attn.q_proj.channel_scale": "model-00004-of-00006.safetensors",
1833
+ "model.language_model.layers.50.self_attn.q_proj.q_perm": "model-00004-of-00006.safetensors",
1834
+ "model.language_model.layers.50.self_attn.q_proj.qweight": "model-00004-of-00006.safetensors",
1835
+ "model.language_model.layers.50.self_attn.q_proj.scales": "model-00004-of-00006.safetensors",
1836
+ "model.language_model.layers.50.self_attn.q_proj.zeros": "model-00004-of-00006.safetensors",
1837
+ "model.language_model.layers.50.self_attn.v_proj.channel_scale": "model-00004-of-00006.safetensors",
1838
+ "model.language_model.layers.50.self_attn.v_proj.q_perm": "model-00004-of-00006.safetensors",
1839
+ "model.language_model.layers.50.self_attn.v_proj.qweight": "model-00004-of-00006.safetensors",
1840
+ "model.language_model.layers.50.self_attn.v_proj.scales": "model-00004-of-00006.safetensors",
1841
+ "model.language_model.layers.50.self_attn.v_proj.zeros": "model-00004-of-00006.safetensors",
1842
  "model.language_model.layers.51.input_layernorm.weight": "model-00005-of-00006.safetensors",
1843
  "model.language_model.layers.51.mlp.down_proj.channel_scale": "model-00005-of-00006.safetensors",
1844
  "model.language_model.layers.51.mlp.down_proj.q_perm": "model-00005-of-00006.safetensors",
 
2190
  "model.language_model.layers.59.self_attn.v_proj.qweight": "model-00005-of-00006.safetensors",
2191
  "model.language_model.layers.59.self_attn.v_proj.scales": "model-00005-of-00006.safetensors",
2192
  "model.language_model.layers.59.self_attn.v_proj.zeros": "model-00005-of-00006.safetensors",
2193
+ "model.language_model.layers.6.input_layernorm.weight": "model-00002-of-00006.safetensors",
2194
+ "model.language_model.layers.6.mlp.down_proj.channel_scale": "model-00002-of-00006.safetensors",
2195
+ "model.language_model.layers.6.mlp.down_proj.q_perm": "model-00002-of-00006.safetensors",
2196
+ "model.language_model.layers.6.mlp.down_proj.qweight": "model-00002-of-00006.safetensors",
2197
+ "model.language_model.layers.6.mlp.down_proj.scales": "model-00002-of-00006.safetensors",
2198
+ "model.language_model.layers.6.mlp.down_proj.zeros": "model-00002-of-00006.safetensors",
2199
  "model.language_model.layers.6.mlp.gate_proj.channel_scale": "model-00002-of-00006.safetensors",
2200
  "model.language_model.layers.6.mlp.gate_proj.q_perm": "model-00002-of-00006.safetensors",
2201
  "model.language_model.layers.6.mlp.gate_proj.qweight": "model-00002-of-00006.safetensors",
 
2204
  "model.language_model.layers.6.mlp.up_proj.channel_scale": "model-00002-of-00006.safetensors",
2205
  "model.language_model.layers.6.mlp.up_proj.q_perm": "model-00002-of-00006.safetensors",
2206
  "model.language_model.layers.6.mlp.up_proj.qweight": "model-00002-of-00006.safetensors",
2207
+ "model.language_model.layers.6.mlp.up_proj.scales": "model-00002-of-00006.safetensors",
2208
+ "model.language_model.layers.6.mlp.up_proj.zeros": "model-00002-of-00006.safetensors",
2209
+ "model.language_model.layers.6.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
2210
  "model.language_model.layers.6.self_attn.k_norm.weight": "model-00002-of-00006.safetensors",
2211
  "model.language_model.layers.6.self_attn.k_proj.channel_scale": "model-00002-of-00006.safetensors",
2212
  "model.language_model.layers.6.self_attn.k_proj.q_perm": "model-00002-of-00006.safetensors",
 
2391
  "model.language_model.layers.7.mlp.down_proj.qweight": "model-00003-of-00006.safetensors",
2392
  "model.language_model.layers.7.mlp.down_proj.scales": "model-00003-of-00006.safetensors",
2393
  "model.language_model.layers.7.mlp.down_proj.zeros": "model-00003-of-00006.safetensors",
2394
+ "model.language_model.layers.7.mlp.gate_proj.channel_scale": "model-00002-of-00006.safetensors",
2395
+ "model.language_model.layers.7.mlp.gate_proj.q_perm": "model-00002-of-00006.safetensors",
2396
  "model.language_model.layers.7.mlp.gate_proj.qweight": "model-00003-of-00006.safetensors",
2397
  "model.language_model.layers.7.mlp.gate_proj.scales": "model-00003-of-00006.safetensors",
2398
  "model.language_model.layers.7.mlp.gate_proj.zeros": "model-00003-of-00006.safetensors",
 
2402
  "model.language_model.layers.7.mlp.up_proj.scales": "model-00003-of-00006.safetensors",
2403
  "model.language_model.layers.7.mlp.up_proj.zeros": "model-00003-of-00006.safetensors",
2404
  "model.language_model.layers.7.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
2405
+ "model.language_model.layers.7.self_attn.k_norm.weight": "model-00002-of-00006.safetensors",
2406
+ "model.language_model.layers.7.self_attn.k_proj.channel_scale": "model-00002-of-00006.safetensors",
2407
+ "model.language_model.layers.7.self_attn.k_proj.q_perm": "model-00002-of-00006.safetensors",
2408
+ "model.language_model.layers.7.self_attn.k_proj.qweight": "model-00002-of-00006.safetensors",
2409
+ "model.language_model.layers.7.self_attn.k_proj.scales": "model-00002-of-00006.safetensors",
2410
+ "model.language_model.layers.7.self_attn.k_proj.zeros": "model-00002-of-00006.safetensors",
2411
+ "model.language_model.layers.7.self_attn.o_proj.channel_scale": "model-00002-of-00006.safetensors",
2412
+ "model.language_model.layers.7.self_attn.o_proj.q_perm": "model-00002-of-00006.safetensors",
2413
+ "model.language_model.layers.7.self_attn.o_proj.qweight": "model-00002-of-00006.safetensors",
2414
+ "model.language_model.layers.7.self_attn.o_proj.scales": "model-00002-of-00006.safetensors",
2415
+ "model.language_model.layers.7.self_attn.o_proj.zeros": "model-00002-of-00006.safetensors",
2416
+ "model.language_model.layers.7.self_attn.q_norm.weight": "model-00002-of-00006.safetensors",
2417
+ "model.language_model.layers.7.self_attn.q_proj.channel_scale": "model-00002-of-00006.safetensors",
2418
+ "model.language_model.layers.7.self_attn.q_proj.q_perm": "model-00002-of-00006.safetensors",
2419
+ "model.language_model.layers.7.self_attn.q_proj.qweight": "model-00002-of-00006.safetensors",
2420
+ "model.language_model.layers.7.self_attn.q_proj.scales": "model-00002-of-00006.safetensors",
2421
+ "model.language_model.layers.7.self_attn.q_proj.zeros": "model-00002-of-00006.safetensors",
2422
+ "model.language_model.layers.7.self_attn.v_proj.channel_scale": "model-00002-of-00006.safetensors",
2423
+ "model.language_model.layers.7.self_attn.v_proj.q_perm": "model-00002-of-00006.safetensors",
2424
+ "model.language_model.layers.7.self_attn.v_proj.qweight": "model-00002-of-00006.safetensors",
2425
+ "model.language_model.layers.7.self_attn.v_proj.scales": "model-00002-of-00006.safetensors",
2426
+ "model.language_model.layers.7.self_attn.v_proj.zeros": "model-00002-of-00006.safetensors",
2427
  "model.language_model.layers.8.input_layernorm.weight": "model-00003-of-00006.safetensors",
2428
  "model.language_model.layers.8.mlp.down_proj.channel_scale": "model-00003-of-00006.safetensors",
2429
  "model.language_model.layers.8.mlp.down_proj.q_perm": "model-00003-of-00006.safetensors",
quant_strategy.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
  "measurement": {
3
  "model.layers.0": {
4
- "accuracy": 0.9828830574697349,
5
- "total_bits": 2102722560.0,
6
  "o_proj": {
7
  "group_size": {
8
- "4": 32
9
  },
10
  "bits": [
11
  4
@@ -18,7 +18,7 @@
18
  },
19
  "down_proj": {
20
  "group_size": {
21
- "4": 32
22
  },
23
  "bits": [
24
  4
@@ -31,7 +31,7 @@
31
  },
32
  "q_proj": {
33
  "group_size": {
34
- "4": 32
35
  },
36
  "bits": [
37
  4
@@ -44,7 +44,7 @@
44
  },
45
  "k_proj": {
46
  "group_size": {
47
- "4": 32
48
  },
49
  "bits": [
50
  4
@@ -57,7 +57,7 @@
57
  },
58
  "v_proj": {
59
  "group_size": {
60
- "4": 32
61
  },
62
  "bits": [
63
  4
@@ -70,7 +70,7 @@
70
  },
71
  "gate_proj": {
72
  "group_size": {
73
- "4": 32
74
  },
75
  "bits": [
76
  4
@@ -83,7 +83,7 @@
83
  },
84
  "up_proj": {
85
  "group_size": {
86
- "4": 32
87
  },
88
  "bits": [
89
  4
@@ -96,11 +96,11 @@
96
  }
97
  },
98
  "model.layers.1": {
99
- "accuracy": 0.9855277572060004,
100
- "total_bits": 2102722560.0,
101
  "o_proj": {
102
  "group_size": {
103
- "4": 32
104
  },
105
  "bits": [
106
  4
@@ -113,7 +113,7 @@
113
  },
114
  "down_proj": {
115
  "group_size": {
116
- "4": 32
117
  },
118
  "bits": [
119
  4
@@ -126,7 +126,7 @@
126
  },
127
  "q_proj": {
128
  "group_size": {
129
- "4": 32
130
  },
131
  "bits": [
132
  4
@@ -139,7 +139,7 @@
139
  },
140
  "k_proj": {
141
  "group_size": {
142
- "4": 32
143
  },
144
  "bits": [
145
  4
@@ -152,7 +152,7 @@
152
  },
153
  "v_proj": {
154
  "group_size": {
155
- "4": 32
156
  },
157
  "bits": [
158
  4
@@ -165,7 +165,7 @@
165
  },
166
  "gate_proj": {
167
  "group_size": {
168
- "4": 32
169
  },
170
  "bits": [
171
  4
@@ -178,7 +178,7 @@
178
  },
179
  "up_proj": {
180
  "group_size": {
181
- "4": 32
182
  },
183
  "bits": [
184
  4
@@ -286,8 +286,8 @@
286
  }
287
  },
288
  "model.layers.3": {
289
- "accuracy": 0.9826611040625721,
290
- "total_bits": 1988444160.0,
291
  "o_proj": {
292
  "group_size": {
293
  "4": 128
@@ -316,10 +316,10 @@
316
  },
317
  "q_proj": {
318
  "group_size": {
319
- "4": 128
320
  },
321
  "bits": [
322
- 4
323
  ],
324
  "bits_prop": [
325
  1
@@ -476,11 +476,11 @@
476
  }
477
  },
478
  "model.layers.5": {
479
- "accuracy": 0.9851978906372096,
480
- "total_bits": 2102722560.0,
481
  "o_proj": {
482
  "group_size": {
483
- "4": 32
484
  },
485
  "bits": [
486
  4
@@ -493,7 +493,7 @@
493
  },
494
  "down_proj": {
495
  "group_size": {
496
- "4": 32
497
  },
498
  "bits": [
499
  4
@@ -506,7 +506,7 @@
506
  },
507
  "q_proj": {
508
  "group_size": {
509
- "4": 32
510
  },
511
  "bits": [
512
  4
@@ -519,7 +519,7 @@
519
  },
520
  "k_proj": {
521
  "group_size": {
522
- "4": 32
523
  },
524
  "bits": [
525
  4
@@ -532,7 +532,7 @@
532
  },
533
  "v_proj": {
534
  "group_size": {
535
- "4": 32
536
  },
537
  "bits": [
538
  4
@@ -545,7 +545,7 @@
545
  },
546
  "gate_proj": {
547
  "group_size": {
548
- "4": 32
549
  },
550
  "bits": [
551
  4
@@ -558,7 +558,7 @@
558
  },
559
  "up_proj": {
560
  "group_size": {
561
- "4": 32
562
  },
563
  "bits": [
564
  4
@@ -856,14 +856,14 @@
856
  }
857
  },
858
  "model.layers.9": {
859
- "accuracy": 0.9856001285370439,
860
- "total_bits": 1368227840.0,
861
  "o_proj": {
862
  "group_size": {
863
- "2": 32
864
  },
865
  "bits": [
866
- 2
867
  ],
868
  "bits_prop": [
869
  1
@@ -951,8 +951,8 @@
951
  }
952
  },
953
  "model.layers.10": {
954
- "accuracy": 0.9813145939260721,
955
- "total_bits": 1210859520.0,
956
  "o_proj": {
957
  "group_size": {
958
  "4": 128
@@ -968,10 +968,10 @@
968
  },
969
  "down_proj": {
970
  "group_size": {
971
- "2": 32
972
  },
973
  "bits": [
974
- 2
975
  ],
976
  "bits_prop": [
977
  1
@@ -1046,14 +1046,14 @@
1046
  }
1047
  },
1048
  "model.layers.11": {
1049
- "accuracy": 0.9843400969984941,
1050
- "total_bits": 1368227840.0,
1051
  "o_proj": {
1052
  "group_size": {
1053
- "2": 32
1054
  },
1055
  "bits": [
1056
- 2
1057
  ],
1058
  "bits_prop": [
1059
  1
@@ -1089,10 +1089,10 @@
1089
  },
1090
  "k_proj": {
1091
  "group_size": {
1092
- "2": 32
1093
  },
1094
  "bits": [
1095
- 2
1096
  ],
1097
  "bits_prop": [
1098
  1
@@ -1141,14 +1141,14 @@
1141
  }
1142
  },
1143
  "model.layers.12": {
1144
- "accuracy": 0.9840793606999796,
1145
- "total_bits": 1368227840.0,
1146
  "o_proj": {
1147
  "group_size": {
1148
- "2": 32
1149
  },
1150
  "bits": [
1151
- 2
1152
  ],
1153
  "bits_prop": [
1154
  1
@@ -1236,14 +1236,14 @@
1236
  }
1237
  },
1238
  "model.layers.13": {
1239
- "accuracy": 0.9839283300098032,
1240
- "total_bits": 1368227840.0,
1241
  "o_proj": {
1242
  "group_size": {
1243
- "2": 32
1244
  },
1245
  "bits": [
1246
- 2
1247
  ],
1248
  "bits_prop": [
1249
  1
@@ -1279,10 +1279,10 @@
1279
  },
1280
  "k_proj": {
1281
  "group_size": {
1282
- "2": 32
1283
  },
1284
  "bits": [
1285
- 2
1286
  ],
1287
  "bits_prop": [
1288
  1
@@ -1331,14 +1331,14 @@
1331
  }
1332
  },
1333
  "model.layers.14": {
1334
- "accuracy": 0.9848056569171604,
1335
- "total_bits": 1368227840.0,
1336
  "o_proj": {
1337
  "group_size": {
1338
- "2": 32
1339
  },
1340
  "bits": [
1341
- 2
1342
  ],
1343
  "bits_prop": [
1344
  1
@@ -1374,10 +1374,10 @@
1374
  },
1375
  "k_proj": {
1376
  "group_size": {
1377
- "2": 32
1378
  },
1379
  "bits": [
1380
- 2
1381
  ],
1382
  "bits_prop": [
1383
  1
@@ -1426,14 +1426,14 @@
1426
  }
1427
  },
1428
  "model.layers.15": {
1429
- "accuracy": 0.9847434815601446,
1430
- "total_bits": 1368227840.0,
1431
  "o_proj": {
1432
  "group_size": {
1433
- "2": 32
1434
  },
1435
  "bits": [
1436
- 2
1437
  ],
1438
  "bits_prop": [
1439
  1
@@ -1469,10 +1469,10 @@
1469
  },
1470
  "k_proj": {
1471
  "group_size": {
1472
- "2": 32
1473
  },
1474
  "bits": [
1475
- 2
1476
  ],
1477
  "bits_prop": [
1478
  1
@@ -1521,8 +1521,8 @@
1521
  }
1522
  },
1523
  "model.layers.16": {
1524
- "accuracy": 0.9818843859247863,
1525
- "total_bits": 1210859520.0,
1526
  "o_proj": {
1527
  "group_size": {
1528
  "4": 128
@@ -1538,10 +1538,10 @@
1538
  },
1539
  "down_proj": {
1540
  "group_size": {
1541
- "2": 32
1542
  },
1543
  "bits": [
1544
- 2
1545
  ],
1546
  "bits_prop": [
1547
  1
@@ -1564,10 +1564,10 @@
1564
  },
1565
  "k_proj": {
1566
  "group_size": {
1567
- "2": 32
1568
  },
1569
  "bits": [
1570
- 2
1571
  ],
1572
  "bits_prop": [
1573
  1
@@ -1616,8 +1616,8 @@
1616
  }
1617
  },
1618
  "model.layers.17": {
1619
- "accuracy": 0.9818928238819353,
1620
- "total_bits": 1210859520.0,
1621
  "o_proj": {
1622
  "group_size": {
1623
  "4": 128
@@ -1633,10 +1633,10 @@
1633
  },
1634
  "down_proj": {
1635
  "group_size": {
1636
- "2": 32
1637
  },
1638
  "bits": [
1639
- 2
1640
  ],
1641
  "bits_prop": [
1642
  1
@@ -1659,10 +1659,10 @@
1659
  },
1660
  "k_proj": {
1661
  "group_size": {
1662
- "2": 32
1663
  },
1664
  "bits": [
1665
- 2
1666
  ],
1667
  "bits_prop": [
1668
  1
@@ -1711,14 +1711,14 @@
1711
  }
1712
  },
1713
  "model.layers.18": {
1714
- "accuracy": 0.9846934152592439,
1715
- "total_bits": 1368227840.0,
1716
  "o_proj": {
1717
  "group_size": {
1718
- "2": 32
1719
  },
1720
  "bits": [
1721
- 2
1722
  ],
1723
  "bits_prop": [
1724
  1
@@ -1754,10 +1754,10 @@
1754
  },
1755
  "k_proj": {
1756
  "group_size": {
1757
- "2": 32
1758
  },
1759
  "bits": [
1760
- 2
1761
  ],
1762
  "bits_prop": [
1763
  1
@@ -1901,14 +1901,14 @@
1901
  }
1902
  },
1903
  "model.layers.20": {
1904
- "accuracy": 0.9828240241040476,
1905
- "total_bits": 1368227840.0,
1906
  "o_proj": {
1907
  "group_size": {
1908
- "2": 32
1909
  },
1910
  "bits": [
1911
- 2
1912
  ],
1913
  "bits_prop": [
1914
  1
@@ -1944,10 +1944,10 @@
1944
  },
1945
  "k_proj": {
1946
  "group_size": {
1947
- "2": 32
1948
  },
1949
  "bits": [
1950
- 2
1951
  ],
1952
  "bits_prop": [
1953
  1
@@ -2091,8 +2091,8 @@
2091
  }
2092
  },
2093
  "model.layers.22": {
2094
- "accuracy": 0.9825039394199848,
2095
- "total_bits": 1525596160.0,
2096
  "o_proj": {
2097
  "group_size": {
2098
  "4": 128
@@ -2134,10 +2134,10 @@
2134
  },
2135
  "k_proj": {
2136
  "group_size": {
2137
- "4": 128
2138
  },
2139
  "bits": [
2140
- 4
2141
  ],
2142
  "bits_prop": [
2143
  1
@@ -2186,8 +2186,8 @@
2186
  }
2187
  },
2188
  "model.layers.23": {
2189
- "accuracy": 0.9812842943647411,
2190
- "total_bits": 1525596160.0,
2191
  "o_proj": {
2192
  "group_size": {
2193
  "4": 128
@@ -2216,10 +2216,10 @@
2216
  },
2217
  "q_proj": {
2218
  "group_size": {
2219
- "4": 128
2220
  },
2221
  "bits": [
2222
- 4
2223
  ],
2224
  "bits_prop": [
2225
  1
@@ -2281,8 +2281,8 @@
2281
  }
2282
  },
2283
  "model.layers.24": {
2284
- "accuracy": 0.9836647573392838,
2285
- "total_bits": 1673707520.0,
2286
  "o_proj": {
2287
  "group_size": {
2288
  "4": 128
@@ -2350,10 +2350,10 @@
2350
  },
2351
  "gate_proj": {
2352
  "group_size": {
2353
- "2": 32
2354
  },
2355
  "bits": [
2356
- 2
2357
  ],
2358
  "bits_prop": [
2359
  1
@@ -2363,10 +2363,10 @@
2363
  },
2364
  "up_proj": {
2365
  "group_size": {
2366
- "4": 128
2367
  },
2368
  "bits": [
2369
- 4
2370
  ],
2371
  "bits_prop": [
2372
  1
@@ -2566,8 +2566,8 @@
2566
  }
2567
  },
2568
  "model.layers.27": {
2569
- "accuracy": 0.9891230833891314,
2570
- "total_bits": 1907834880.0,
2571
  "o_proj": {
2572
  "group_size": {
2573
  "4": 128
@@ -2596,7 +2596,7 @@
2596
  },
2597
  "q_proj": {
2598
  "group_size": {
2599
- "2": 64
2600
  },
2601
  "bits": [
2602
  2
@@ -2609,10 +2609,10 @@
2609
  },
2610
  "k_proj": {
2611
  "group_size": {
2612
- "4": 128
2613
  },
2614
  "bits": [
2615
- 4
2616
  ],
2617
  "bits_prop": [
2618
  1
@@ -2635,10 +2635,10 @@
2635
  },
2636
  "gate_proj": {
2637
  "group_size": {
2638
- "4": 128
2639
  },
2640
  "bits": [
2641
- 4
2642
  ],
2643
  "bits_prop": [
2644
  1
@@ -2661,8 +2661,8 @@
2661
  }
2662
  },
2663
  "model.layers.28": {
2664
- "accuracy": 0.9891000912321033,
2665
- "total_bits": 1907834880.0,
2666
  "o_proj": {
2667
  "group_size": {
2668
  "4": 128
@@ -2691,7 +2691,7 @@
2691
  },
2692
  "q_proj": {
2693
  "group_size": {
2694
- "2": 64
2695
  },
2696
  "bits": [
2697
  2
@@ -2704,10 +2704,10 @@
2704
  },
2705
  "k_proj": {
2706
  "group_size": {
2707
- "4": 128
2708
  },
2709
  "bits": [
2710
- 4
2711
  ],
2712
  "bits_prop": [
2713
  1
@@ -2730,10 +2730,10 @@
2730
  },
2731
  "gate_proj": {
2732
  "group_size": {
2733
- "4": 128
2734
  },
2735
  "bits": [
2736
- 4
2737
  ],
2738
  "bits_prop": [
2739
  1
@@ -2756,8 +2756,8 @@
2756
  }
2757
  },
2758
  "model.layers.29": {
2759
- "accuracy": 0.9894378067547223,
2760
- "total_bits": 1907834880.0,
2761
  "o_proj": {
2762
  "group_size": {
2763
  "4": 128
@@ -2786,7 +2786,7 @@
2786
  },
2787
  "q_proj": {
2788
  "group_size": {
2789
- "2": 64
2790
  },
2791
  "bits": [
2792
  2
@@ -2799,10 +2799,10 @@
2799
  },
2800
  "k_proj": {
2801
  "group_size": {
2802
- "4": 128
2803
  },
2804
  "bits": [
2805
- 4
2806
  ],
2807
  "bits_prop": [
2808
  1
@@ -2825,10 +2825,10 @@
2825
  },
2826
  "gate_proj": {
2827
  "group_size": {
2828
- "4": 128
2829
  },
2830
  "bits": [
2831
- 4
2832
  ],
2833
  "bits_prop": [
2834
  1
@@ -3421,8 +3421,8 @@
3421
  }
3422
  },
3423
  "model.layers.36": {
3424
- "accuracy": 0.9883409765607212,
3425
- "total_bits": 1907834880.0,
3426
  "o_proj": {
3427
  "group_size": {
3428
  "4": 128
@@ -3451,7 +3451,7 @@
3451
  },
3452
  "q_proj": {
3453
  "group_size": {
3454
- "2": 64
3455
  },
3456
  "bits": [
3457
  2
@@ -3464,10 +3464,10 @@
3464
  },
3465
  "k_proj": {
3466
  "group_size": {
3467
- "4": 128
3468
  },
3469
  "bits": [
3470
- 4
3471
  ],
3472
  "bits_prop": [
3473
  1
@@ -3490,10 +3490,10 @@
3490
  },
3491
  "gate_proj": {
3492
  "group_size": {
3493
- "4": 128
3494
  },
3495
  "bits": [
3496
- 4
3497
  ],
3498
  "bits_prop": [
3499
  1
@@ -3516,8 +3516,8 @@
3516
  }
3517
  },
3518
  "model.layers.37": {
3519
- "accuracy": 0.9876541621342767,
3520
- "total_bits": 1907834880.0,
3521
  "o_proj": {
3522
  "group_size": {
3523
  "4": 128
@@ -3546,7 +3546,7 @@
3546
  },
3547
  "q_proj": {
3548
  "group_size": {
3549
- "2": 64
3550
  },
3551
  "bits": [
3552
  2
@@ -3559,10 +3559,10 @@
3559
  },
3560
  "k_proj": {
3561
  "group_size": {
3562
- "4": 128
3563
  },
3564
  "bits": [
3565
- 4
3566
  ],
3567
  "bits_prop": [
3568
  1
@@ -3585,10 +3585,10 @@
3585
  },
3586
  "gate_proj": {
3587
  "group_size": {
3588
- "4": 128
3589
  },
3590
  "bits": [
3591
- 4
3592
  ],
3593
  "bits_prop": [
3594
  1
@@ -3611,8 +3611,8 @@
3611
  }
3612
  },
3613
  "model.layers.38": {
3614
- "accuracy": 0.9870018090005033,
3615
- "total_bits": 1907834880.0,
3616
  "o_proj": {
3617
  "group_size": {
3618
  "4": 128
@@ -3641,7 +3641,7 @@
3641
  },
3642
  "q_proj": {
3643
  "group_size": {
3644
- "2": 64
3645
  },
3646
  "bits": [
3647
  2
@@ -3654,10 +3654,10 @@
3654
  },
3655
  "k_proj": {
3656
  "group_size": {
3657
- "4": 128
3658
  },
3659
  "bits": [
3660
- 4
3661
  ],
3662
  "bits_prop": [
3663
  1
@@ -3680,10 +3680,10 @@
3680
  },
3681
  "gate_proj": {
3682
  "group_size": {
3683
- "4": 128
3684
  },
3685
  "bits": [
3686
- 4
3687
  ],
3688
  "bits_prop": [
3689
  1
@@ -3706,8 +3706,8 @@
3706
  }
3707
  },
3708
  "model.layers.39": {
3709
- "accuracy": 0.986489765055012,
3710
- "total_bits": 1907834880.0,
3711
  "o_proj": {
3712
  "group_size": {
3713
  "4": 128
@@ -3736,7 +3736,7 @@
3736
  },
3737
  "q_proj": {
3738
  "group_size": {
3739
- "2": 64
3740
  },
3741
  "bits": [
3742
  2
@@ -3749,10 +3749,10 @@
3749
  },
3750
  "k_proj": {
3751
  "group_size": {
3752
- "4": 128
3753
  },
3754
  "bits": [
3755
- 4
3756
  ],
3757
  "bits_prop": [
3758
  1
@@ -3775,10 +3775,10 @@
3775
  },
3776
  "gate_proj": {
3777
  "group_size": {
3778
- "4": 128
3779
  },
3780
  "bits": [
3781
- 4
3782
  ],
3783
  "bits_prop": [
3784
  1
@@ -4181,8 +4181,8 @@
4181
  }
4182
  },
4183
  "model.layers.44": {
4184
- "accuracy": 0.9865467178460676,
4185
- "total_bits": 1907834880.0,
4186
  "o_proj": {
4187
  "group_size": {
4188
  "4": 128
@@ -4211,7 +4211,7 @@
4211
  },
4212
  "q_proj": {
4213
  "group_size": {
4214
- "2": 64
4215
  },
4216
  "bits": [
4217
  2
@@ -4224,10 +4224,10 @@
4224
  },
4225
  "k_proj": {
4226
  "group_size": {
4227
- "4": 128
4228
  },
4229
  "bits": [
4230
- 4
4231
  ],
4232
  "bits_prop": [
4233
  1
@@ -4250,10 +4250,10 @@
4250
  },
4251
  "gate_proj": {
4252
  "group_size": {
4253
- "4": 128
4254
  },
4255
  "bits": [
4256
- 4
4257
  ],
4258
  "bits_prop": [
4259
  1
@@ -4276,8 +4276,8 @@
4276
  }
4277
  },
4278
  "model.layers.45": {
4279
- "accuracy": 0.9846922820433974,
4280
- "total_bits": 1907834880.0,
4281
  "o_proj": {
4282
  "group_size": {
4283
  "4": 128
@@ -4306,7 +4306,7 @@
4306
  },
4307
  "q_proj": {
4308
  "group_size": {
4309
- "2": 64
4310
  },
4311
  "bits": [
4312
  2
@@ -4319,10 +4319,10 @@
4319
  },
4320
  "k_proj": {
4321
  "group_size": {
4322
- "4": 128
4323
  },
4324
  "bits": [
4325
- 4
4326
  ],
4327
  "bits_prop": [
4328
  1
@@ -4345,10 +4345,10 @@
4345
  },
4346
  "gate_proj": {
4347
  "group_size": {
4348
- "4": 128
4349
  },
4350
  "bits": [
4351
- 4
4352
  ],
4353
  "bits_prop": [
4354
  1
@@ -4371,8 +4371,8 @@
4371
  }
4372
  },
4373
  "model.layers.46": {
4374
- "accuracy": 0.9846796012716368,
4375
- "total_bits": 1907834880.0,
4376
  "o_proj": {
4377
  "group_size": {
4378
  "4": 128
@@ -4401,10 +4401,10 @@
4401
  },
4402
  "q_proj": {
4403
  "group_size": {
4404
- "2": 64
4405
  },
4406
  "bits": [
4407
- 2
4408
  ],
4409
  "bits_prop": [
4410
  1
@@ -4414,10 +4414,10 @@
4414
  },
4415
  "k_proj": {
4416
  "group_size": {
4417
- "4": 128
4418
  },
4419
  "bits": [
4420
- 4
4421
  ],
4422
  "bits_prop": [
4423
  1
@@ -4440,10 +4440,10 @@
4440
  },
4441
  "gate_proj": {
4442
  "group_size": {
4443
- "4": 128
4444
  },
4445
  "bits": [
4446
- 4
4447
  ],
4448
  "bits_prop": [
4449
  1
@@ -5036,11 +5036,11 @@
5036
  }
5037
  },
5038
  "model.layers.53": {
5039
- "accuracy": 0.985947548673721,
5040
- "total_bits": 2102722560.0,
5041
  "o_proj": {
5042
  "group_size": {
5043
- "4": 32
5044
  },
5045
  "bits": [
5046
  4
@@ -5053,7 +5053,7 @@
5053
  },
5054
  "down_proj": {
5055
  "group_size": {
5056
- "4": 32
5057
  },
5058
  "bits": [
5059
  4
@@ -5066,7 +5066,7 @@
5066
  },
5067
  "q_proj": {
5068
  "group_size": {
5069
- "4": 32
5070
  },
5071
  "bits": [
5072
  4
@@ -5079,7 +5079,7 @@
5079
  },
5080
  "k_proj": {
5081
  "group_size": {
5082
- "4": 32
5083
  },
5084
  "bits": [
5085
  4
@@ -5092,7 +5092,7 @@
5092
  },
5093
  "v_proj": {
5094
  "group_size": {
5095
- "4": 32
5096
  },
5097
  "bits": [
5098
  4
@@ -5105,7 +5105,7 @@
5105
  },
5106
  "gate_proj": {
5107
  "group_size": {
5108
- "4": 32
5109
  },
5110
  "bits": [
5111
  4
@@ -5118,7 +5118,7 @@
5118
  },
5119
  "up_proj": {
5120
  "group_size": {
5121
- "4": 32
5122
  },
5123
  "bits": [
5124
  4
@@ -5606,14 +5606,14 @@
5606
  }
5607
  },
5608
  "model.layers.59": {
5609
- "accuracy": 0.989684437867254,
5610
- "total_bits": 1061437440.0,
5611
  "o_proj": {
5612
  "group_size": {
5613
- "2": 64
5614
  },
5615
  "bits": [
5616
- 2
5617
  ],
5618
  "bits_prop": [
5619
  1
@@ -5623,10 +5623,10 @@
5623
  },
5624
  "down_proj": {
5625
  "group_size": {
5626
- "2": 64
5627
  },
5628
  "bits": [
5629
- 2
5630
  ],
5631
  "bits_prop": [
5632
  1
@@ -5636,7 +5636,7 @@
5636
  },
5637
  "q_proj": {
5638
  "group_size": {
5639
- "2": 64
5640
  },
5641
  "bits": [
5642
  2
@@ -5649,7 +5649,7 @@
5649
  },
5650
  "k_proj": {
5651
  "group_size": {
5652
- "2": 64
5653
  },
5654
  "bits": [
5655
  2
@@ -5675,7 +5675,7 @@
5675
  },
5676
  "gate_proj": {
5677
  "group_size": {
5678
- "2": 64
5679
  },
5680
  "bits": [
5681
  2
@@ -5688,7 +5688,7 @@
5688
  },
5689
  "up_proj": {
5690
  "group_size": {
5691
- "2": 64
5692
  },
5693
  "bits": [
5694
  2
@@ -5701,14 +5701,14 @@
5701
  }
5702
  },
5703
  "model.layers.60": {
5704
- "accuracy": 0.9913747301325202,
5705
- "total_bits": 1061437440.0,
5706
  "o_proj": {
5707
  "group_size": {
5708
- "2": 64
5709
  },
5710
  "bits": [
5711
- 2
5712
  ],
5713
  "bits_prop": [
5714
  1
@@ -5718,10 +5718,10 @@
5718
  },
5719
  "down_proj": {
5720
  "group_size": {
5721
- "2": 64
5722
  },
5723
  "bits": [
5724
- 2
5725
  ],
5726
  "bits_prop": [
5727
  1
@@ -5731,7 +5731,7 @@
5731
  },
5732
  "q_proj": {
5733
  "group_size": {
5734
- "2": 64
5735
  },
5736
  "bits": [
5737
  2
@@ -5744,10 +5744,10 @@
5744
  },
5745
  "k_proj": {
5746
  "group_size": {
5747
- "2": 64
5748
  },
5749
  "bits": [
5750
- 2
5751
  ],
5752
  "bits_prop": [
5753
  1
@@ -5770,7 +5770,7 @@
5770
  },
5771
  "gate_proj": {
5772
  "group_size": {
5773
- "2": 64
5774
  },
5775
  "bits": [
5776
  2
@@ -5783,7 +5783,7 @@
5783
  },
5784
  "up_proj": {
5785
  "group_size": {
5786
- "2": 64
5787
  },
5788
  "bits": [
5789
  2
@@ -5796,14 +5796,14 @@
5796
  }
5797
  },
5798
  "model.layers.61": {
5799
- "accuracy": 0.9891216587275267,
5800
- "total_bits": 1061437440.0,
5801
  "o_proj": {
5802
  "group_size": {
5803
- "2": 64
5804
  },
5805
  "bits": [
5806
- 2
5807
  ],
5808
  "bits_prop": [
5809
  1
@@ -5813,10 +5813,10 @@
5813
  },
5814
  "down_proj": {
5815
  "group_size": {
5816
- "2": 64
5817
  },
5818
  "bits": [
5819
- 2
5820
  ],
5821
  "bits_prop": [
5822
  1
@@ -5826,7 +5826,7 @@
5826
  },
5827
  "q_proj": {
5828
  "group_size": {
5829
- "2": 64
5830
  },
5831
  "bits": [
5832
  2
@@ -5839,7 +5839,7 @@
5839
  },
5840
  "k_proj": {
5841
  "group_size": {
5842
- "2": 64
5843
  },
5844
  "bits": [
5845
  2
@@ -5865,7 +5865,7 @@
5865
  },
5866
  "gate_proj": {
5867
  "group_size": {
5868
- "2": 64
5869
  },
5870
  "bits": [
5871
  2
@@ -5878,7 +5878,7 @@
5878
  },
5879
  "up_proj": {
5880
  "group_size": {
5881
- "2": 64
5882
  },
5883
  "bits": [
5884
  2
@@ -5891,14 +5891,14 @@
5891
  }
5892
  },
5893
  "model.layers.62": {
5894
- "accuracy": 0.9859952349215746,
5895
- "total_bits": 1061437440.0,
5896
  "o_proj": {
5897
  "group_size": {
5898
- "2": 64
5899
  },
5900
  "bits": [
5901
- 2
5902
  ],
5903
  "bits_prop": [
5904
  1
@@ -5908,10 +5908,10 @@
5908
  },
5909
  "down_proj": {
5910
  "group_size": {
5911
- "2": 64
5912
  },
5913
  "bits": [
5914
- 2
5915
  ],
5916
  "bits_prop": [
5917
  1
@@ -5921,7 +5921,7 @@
5921
  },
5922
  "q_proj": {
5923
  "group_size": {
5924
- "2": 64
5925
  },
5926
  "bits": [
5927
  2
@@ -5934,7 +5934,7 @@
5934
  },
5935
  "k_proj": {
5936
  "group_size": {
5937
- "2": 64
5938
  },
5939
  "bits": [
5940
  2
@@ -5960,7 +5960,7 @@
5960
  },
5961
  "gate_proj": {
5962
  "group_size": {
5963
- "2": 64
5964
  },
5965
  "bits": [
5966
  2
@@ -5973,7 +5973,7 @@
5973
  },
5974
  "up_proj": {
5975
  "group_size": {
5976
- "2": 64
5977
  },
5978
  "bits": [
5979
  2
@@ -5986,14 +5986,14 @@
5986
  }
5987
  },
5988
  "model.layers.63": {
5989
- "accuracy": 0.987260795198381,
5990
- "total_bits": 1389690880.0,
5991
  "o_proj": {
5992
  "group_size": {
5993
- "2": 32
5994
  },
5995
  "bits": [
5996
- 2
5997
  ],
5998
  "bits_prop": [
5999
  1
@@ -6003,7 +6003,7 @@
6003
  },
6004
  "down_proj": {
6005
  "group_size": {
6006
- "4": 32
6007
  },
6008
  "bits": [
6009
  4
@@ -6042,10 +6042,10 @@
6042
  },
6043
  "v_proj": {
6044
  "group_size": {
6045
- "2": 32
6046
  },
6047
  "bits": [
6048
- 2
6049
  ],
6050
  "bits_prop": [
6051
  1
 
1
  {
2
  "measurement": {
3
  "model.layers.0": {
4
+ "accuracy": 0.9774280267301947,
5
+ "total_bits": 1988444160.0,
6
  "o_proj": {
7
  "group_size": {
8
+ "4": 128
9
  },
10
  "bits": [
11
  4
 
18
  },
19
  "down_proj": {
20
  "group_size": {
21
+ "4": 128
22
  },
23
  "bits": [
24
  4
 
31
  },
32
  "q_proj": {
33
  "group_size": {
34
+ "4": 128
35
  },
36
  "bits": [
37
  4
 
44
  },
45
  "k_proj": {
46
  "group_size": {
47
+ "4": 128
48
  },
49
  "bits": [
50
  4
 
57
  },
58
  "v_proj": {
59
  "group_size": {
60
+ "4": 128
61
  },
62
  "bits": [
63
  4
 
70
  },
71
  "gate_proj": {
72
  "group_size": {
73
+ "4": 128
74
  },
75
  "bits": [
76
  4
 
83
  },
84
  "up_proj": {
85
  "group_size": {
86
+ "4": 128
87
  },
88
  "bits": [
89
  4
 
96
  }
97
  },
98
  "model.layers.1": {
99
+ "accuracy": 0.979808229021728,
100
+ "total_bits": 1988444160.0,
101
  "o_proj": {
102
  "group_size": {
103
+ "4": 128
104
  },
105
  "bits": [
106
  4
 
113
  },
114
  "down_proj": {
115
  "group_size": {
116
+ "4": 128
117
  },
118
  "bits": [
119
  4
 
126
  },
127
  "q_proj": {
128
  "group_size": {
129
+ "4": 128
130
  },
131
  "bits": [
132
  4
 
139
  },
140
  "k_proj": {
141
  "group_size": {
142
+ "4": 128
143
  },
144
  "bits": [
145
  4
 
152
  },
153
  "v_proj": {
154
  "group_size": {
155
+ "4": 128
156
  },
157
  "bits": [
158
  4
 
165
  },
166
  "gate_proj": {
167
  "group_size": {
168
+ "4": 128
169
  },
170
  "bits": [
171
  4
 
178
  },
179
  "up_proj": {
180
  "group_size": {
181
+ "4": 128
182
  },
183
  "bits": [
184
  4
 
286
  }
287
  },
288
  "model.layers.3": {
289
+ "accuracy": 0.9813712932809722,
290
+ "total_bits": 1907834880.0,
291
  "o_proj": {
292
  "group_size": {
293
  "4": 128
 
316
  },
317
  "q_proj": {
318
  "group_size": {
319
+ "2": 64
320
  },
321
  "bits": [
322
+ 2
323
  ],
324
  "bits_prop": [
325
  1
 
476
  }
477
  },
478
  "model.layers.5": {
479
+ "accuracy": 0.9806460069958121,
480
+ "total_bits": 1988444160.0,
481
  "o_proj": {
482
  "group_size": {
483
+ "4": 128
484
  },
485
  "bits": [
486
  4
 
493
  },
494
  "down_proj": {
495
  "group_size": {
496
+ "4": 128
497
  },
498
  "bits": [
499
  4
 
506
  },
507
  "q_proj": {
508
  "group_size": {
509
+ "4": 128
510
  },
511
  "bits": [
512
  4
 
519
  },
520
  "k_proj": {
521
  "group_size": {
522
+ "4": 128
523
  },
524
  "bits": [
525
  4
 
532
  },
533
  "v_proj": {
534
  "group_size": {
535
+ "4": 128
536
  },
537
  "bits": [
538
  4
 
545
  },
546
  "gate_proj": {
547
  "group_size": {
548
+ "4": 128
549
  },
550
  "bits": [
551
  4
 
558
  },
559
  "up_proj": {
560
  "group_size": {
561
+ "4": 128
562
  },
563
  "bits": [
564
  4
 
856
  }
857
  },
858
  "model.layers.9": {
859
+ "accuracy": 0.9862758388189832,
860
+ "total_bits": 1442283520.0,
861
  "o_proj": {
862
  "group_size": {
863
+ "4": 128
864
  },
865
  "bits": [
866
+ 4
867
  ],
868
  "bits_prop": [
869
  1
 
951
  }
952
  },
953
  "model.layers.10": {
954
+ "accuracy": 0.9866171181201935,
955
+ "total_bits": 1442283520.0,
956
  "o_proj": {
957
  "group_size": {
958
  "4": 128
 
968
  },
969
  "down_proj": {
970
  "group_size": {
971
+ "4": 128
972
  },
973
  "bits": [
974
+ 4
975
  ],
976
  "bits_prop": [
977
  1
 
1046
  }
1047
  },
1048
  "model.layers.11": {
1049
+ "accuracy": 0.9859971911937464,
1050
+ "total_bits": 1451540480.0,
1051
  "o_proj": {
1052
  "group_size": {
1053
+ "4": 128
1054
  },
1055
  "bits": [
1056
+ 4
1057
  ],
1058
  "bits_prop": [
1059
  1
 
1089
  },
1090
  "k_proj": {
1091
  "group_size": {
1092
+ "4": 128
1093
  },
1094
  "bits": [
1095
+ 4
1096
  ],
1097
  "bits_prop": [
1098
  1
 
1141
  }
1142
  },
1143
  "model.layers.12": {
1144
+ "accuracy": 0.985051059658872,
1145
+ "total_bits": 1442283520.0,
1146
  "o_proj": {
1147
  "group_size": {
1148
+ "4": 128
1149
  },
1150
  "bits": [
1151
+ 4
1152
  ],
1153
  "bits_prop": [
1154
  1
 
1236
  }
1237
  },
1238
  "model.layers.13": {
1239
+ "accuracy": 0.9854843073990196,
1240
+ "total_bits": 1451540480.0,
1241
  "o_proj": {
1242
  "group_size": {
1243
+ "4": 128
1244
  },
1245
  "bits": [
1246
+ 4
1247
  ],
1248
  "bits_prop": [
1249
  1
 
1279
  },
1280
  "k_proj": {
1281
  "group_size": {
1282
+ "4": 128
1283
  },
1284
  "bits": [
1285
+ 4
1286
  ],
1287
  "bits_prop": [
1288
  1
 
1331
  }
1332
  },
1333
  "model.layers.14": {
1334
+ "accuracy": 0.9862457206763793,
1335
+ "total_bits": 1451540480.0,
1336
  "o_proj": {
1337
  "group_size": {
1338
+ "4": 128
1339
  },
1340
  "bits": [
1341
+ 4
1342
  ],
1343
  "bits_prop": [
1344
  1
 
1374
  },
1375
  "k_proj": {
1376
  "group_size": {
1377
+ "4": 128
1378
  },
1379
  "bits": [
1380
+ 4
1381
  ],
1382
  "bits_prop": [
1383
  1
 
1426
  }
1427
  },
1428
  "model.layers.15": {
1429
+ "accuracy": 0.9865853489609435,
1430
+ "total_bits": 1451540480.0,
1431
  "o_proj": {
1432
  "group_size": {
1433
+ "4": 128
1434
  },
1435
  "bits": [
1436
+ 4
1437
  ],
1438
  "bits_prop": [
1439
  1
 
1469
  },
1470
  "k_proj": {
1471
  "group_size": {
1472
+ "4": 128
1473
  },
1474
  "bits": [
1475
+ 4
1476
  ],
1477
  "bits_prop": [
1478
  1
 
1521
  }
1522
  },
1523
  "model.layers.16": {
1524
+ "accuracy": 0.987276211992139,
1525
+ "total_bits": 1451540480.0,
1526
  "o_proj": {
1527
  "group_size": {
1528
  "4": 128
 
1538
  },
1539
  "down_proj": {
1540
  "group_size": {
1541
+ "4": 128
1542
  },
1543
  "bits": [
1544
+ 4
1545
  ],
1546
  "bits_prop": [
1547
  1
 
1564
  },
1565
  "k_proj": {
1566
  "group_size": {
1567
+ "4": 128
1568
  },
1569
  "bits": [
1570
+ 4
1571
  ],
1572
  "bits_prop": [
1573
  1
 
1616
  }
1617
  },
1618
  "model.layers.17": {
1619
+ "accuracy": 0.987157837691484,
1620
+ "total_bits": 1451540480.0,
1621
  "o_proj": {
1622
  "group_size": {
1623
  "4": 128
 
1633
  },
1634
  "down_proj": {
1635
  "group_size": {
1636
+ "4": 128
1637
  },
1638
  "bits": [
1639
+ 4
1640
  ],
1641
  "bits_prop": [
1642
  1
 
1659
  },
1660
  "k_proj": {
1661
  "group_size": {
1662
+ "4": 128
1663
  },
1664
  "bits": [
1665
+ 4
1666
  ],
1667
  "bits_prop": [
1668
  1
 
1711
  }
1712
  },
1713
  "model.layers.18": {
1714
+ "accuracy": 0.98646844382165,
1715
+ "total_bits": 1451540480.0,
1716
  "o_proj": {
1717
  "group_size": {
1718
+ "4": 128
1719
  },
1720
  "bits": [
1721
+ 4
1722
  ],
1723
  "bits_prop": [
1724
  1
 
1754
  },
1755
  "k_proj": {
1756
  "group_size": {
1757
+ "4": 128
1758
  },
1759
  "bits": [
1760
+ 4
1761
  ],
1762
  "bits_prop": [
1763
  1
 
1901
  }
1902
  },
1903
  "model.layers.20": {
1904
+ "accuracy": 0.9845815218286589,
1905
+ "total_bits": 1451540480.0,
1906
  "o_proj": {
1907
  "group_size": {
1908
+ "4": 128
1909
  },
1910
  "bits": [
1911
+ 4
1912
  ],
1913
  "bits_prop": [
1914
  1
 
1944
  },
1945
  "k_proj": {
1946
  "group_size": {
1947
+ "4": 128
1948
  },
1949
  "bits": [
1950
+ 4
1951
  ],
1952
  "bits_prop": [
1953
  1
 
2091
  }
2092
  },
2093
  "model.layers.22": {
2094
+ "accuracy": 0.9820500311907381,
2095
+ "total_bits": 1516339200.0,
2096
  "o_proj": {
2097
  "group_size": {
2098
  "4": 128
 
2134
  },
2135
  "k_proj": {
2136
  "group_size": {
2137
+ "2": 32
2138
  },
2139
  "bits": [
2140
+ 2
2141
  ],
2142
  "bits_prop": [
2143
  1
 
2186
  }
2187
  },
2188
  "model.layers.23": {
2189
+ "accuracy": 0.9808617235685233,
2190
+ "total_bits": 1451540480.0,
2191
  "o_proj": {
2192
  "group_size": {
2193
  "4": 128
 
2216
  },
2217
  "q_proj": {
2218
  "group_size": {
2219
+ "2": 32
2220
  },
2221
  "bits": [
2222
+ 2
2223
  ],
2224
  "bits_prop": [
2225
  1
 
2281
  }
2282
  },
2283
  "model.layers.24": {
2284
+ "accuracy": 0.9812766579561867,
2285
+ "total_bits": 1653227520.0,
2286
  "o_proj": {
2287
  "group_size": {
2288
  "4": 128
 
2350
  },
2351
  "gate_proj": {
2352
  "group_size": {
2353
+ "4": 128
2354
  },
2355
  "bits": [
2356
+ 4
2357
  ],
2358
  "bits_prop": [
2359
  1
 
2363
  },
2364
  "up_proj": {
2365
  "group_size": {
2366
+ "2": 64
2367
  },
2368
  "bits": [
2369
+ 2
2370
  ],
2371
  "bits_prop": [
2372
  1
 
2566
  }
2567
  },
2568
  "model.layers.27": {
2569
+ "accuracy": 0.9812095816305373,
2570
+ "total_bits": 1673707520.0,
2571
  "o_proj": {
2572
  "group_size": {
2573
  "4": 128
 
2596
  },
2597
  "q_proj": {
2598
  "group_size": {
2599
+ "2": 32
2600
  },
2601
  "bits": [
2602
  2
 
2609
  },
2610
  "k_proj": {
2611
  "group_size": {
2612
+ "2": 32
2613
  },
2614
  "bits": [
2615
+ 2
2616
  ],
2617
  "bits_prop": [
2618
  1
 
2635
  },
2636
  "gate_proj": {
2637
  "group_size": {
2638
+ "2": 32
2639
  },
2640
  "bits": [
2641
+ 2
2642
  ],
2643
  "bits_prop": [
2644
  1
 
2661
  }
2662
  },
2663
  "model.layers.28": {
2664
+ "accuracy": 0.9807323368440848,
2665
+ "total_bits": 1673707520.0,
2666
  "o_proj": {
2667
  "group_size": {
2668
  "4": 128
 
2691
  },
2692
  "q_proj": {
2693
  "group_size": {
2694
+ "2": 32
2695
  },
2696
  "bits": [
2697
  2
 
2704
  },
2705
  "k_proj": {
2706
  "group_size": {
2707
+ "2": 32
2708
  },
2709
  "bits": [
2710
+ 2
2711
  ],
2712
  "bits_prop": [
2713
  1
 
2730
  },
2731
  "gate_proj": {
2732
  "group_size": {
2733
+ "2": 32
2734
  },
2735
  "bits": [
2736
+ 2
2737
  ],
2738
  "bits_prop": [
2739
  1
 
2756
  }
2757
  },
2758
  "model.layers.29": {
2759
+ "accuracy": 0.980840009462554,
2760
+ "total_bits": 1673707520.0,
2761
  "o_proj": {
2762
  "group_size": {
2763
  "4": 128
 
2786
  },
2787
  "q_proj": {
2788
  "group_size": {
2789
+ "2": 32
2790
  },
2791
  "bits": [
2792
  2
 
2799
  },
2800
  "k_proj": {
2801
  "group_size": {
2802
+ "2": 32
2803
  },
2804
  "bits": [
2805
+ 2
2806
  ],
2807
  "bits_prop": [
2808
  1
 
2825
  },
2826
  "gate_proj": {
2827
  "group_size": {
2828
+ "2": 32
2829
  },
2830
  "bits": [
2831
+ 2
2832
  ],
2833
  "bits_prop": [
2834
  1
 
3421
  }
3422
  },
3423
  "model.layers.36": {
3424
+ "accuracy": 0.9810442902962677,
3425
+ "total_bits": 1673707520.0,
3426
  "o_proj": {
3427
  "group_size": {
3428
  "4": 128
 
3451
  },
3452
  "q_proj": {
3453
  "group_size": {
3454
+ "2": 32
3455
  },
3456
  "bits": [
3457
  2
 
3464
  },
3465
  "k_proj": {
3466
  "group_size": {
3467
+ "2": 32
3468
  },
3469
  "bits": [
3470
+ 2
3471
  ],
3472
  "bits_prop": [
3473
  1
 
3490
  },
3491
  "gate_proj": {
3492
  "group_size": {
3493
+ "2": 32
3494
  },
3495
  "bits": [
3496
+ 2
3497
  ],
3498
  "bits_prop": [
3499
  1
 
3516
  }
3517
  },
3518
  "model.layers.37": {
3519
+ "accuracy": 0.9805779054295272,
3520
+ "total_bits": 1673707520.0,
3521
  "o_proj": {
3522
  "group_size": {
3523
  "4": 128
 
3546
  },
3547
  "q_proj": {
3548
  "group_size": {
3549
+ "2": 32
3550
  },
3551
  "bits": [
3552
  2
 
3559
  },
3560
  "k_proj": {
3561
  "group_size": {
3562
+ "2": 32
3563
  },
3564
  "bits": [
3565
+ 2
3566
  ],
3567
  "bits_prop": [
3568
  1
 
3585
  },
3586
  "gate_proj": {
3587
  "group_size": {
3588
+ "2": 32
3589
  },
3590
  "bits": [
3591
+ 2
3592
  ],
3593
  "bits_prop": [
3594
  1
 
3611
  }
3612
  },
3613
  "model.layers.38": {
3614
+ "accuracy": 0.9795972040155903,
3615
+ "total_bits": 1673707520.0,
3616
  "o_proj": {
3617
  "group_size": {
3618
  "4": 128
 
3641
  },
3642
  "q_proj": {
3643
  "group_size": {
3644
+ "2": 32
3645
  },
3646
  "bits": [
3647
  2
 
3654
  },
3655
  "k_proj": {
3656
  "group_size": {
3657
+ "2": 32
3658
  },
3659
  "bits": [
3660
+ 2
3661
  ],
3662
  "bits_prop": [
3663
  1
 
3680
  },
3681
  "gate_proj": {
3682
  "group_size": {
3683
+ "2": 32
3684
  },
3685
  "bits": [
3686
+ 2
3687
  ],
3688
  "bits_prop": [
3689
  1
 
3706
  }
3707
  },
3708
  "model.layers.39": {
3709
+ "accuracy": 0.979535614955239,
3710
+ "total_bits": 1673707520.0,
3711
  "o_proj": {
3712
  "group_size": {
3713
  "4": 128
 
3736
  },
3737
  "q_proj": {
3738
  "group_size": {
3739
+ "2": 32
3740
  },
3741
  "bits": [
3742
  2
 
3749
  },
3750
  "k_proj": {
3751
  "group_size": {
3752
+ "2": 32
3753
  },
3754
  "bits": [
3755
+ 2
3756
  ],
3757
  "bits_prop": [
3758
  1
 
3775
  },
3776
  "gate_proj": {
3777
  "group_size": {
3778
+ "2": 32
3779
  },
3780
  "bits": [
3781
+ 2
3782
  ],
3783
  "bits_prop": [
3784
  1
 
4181
  }
4182
  },
4183
  "model.layers.44": {
4184
+ "accuracy": 0.9810187924886122,
4185
+ "total_bits": 1673707520.0,
4186
  "o_proj": {
4187
  "group_size": {
4188
  "4": 128
 
4211
  },
4212
  "q_proj": {
4213
  "group_size": {
4214
+ "2": 32
4215
  },
4216
  "bits": [
4217
  2
 
4224
  },
4225
  "k_proj": {
4226
  "group_size": {
4227
+ "2": 32
4228
  },
4229
  "bits": [
4230
+ 2
4231
  ],
4232
  "bits_prop": [
4233
  1
 
4250
  },
4251
  "gate_proj": {
4252
  "group_size": {
4253
+ "2": 32
4254
  },
4255
  "bits": [
4256
+ 2
4257
  ],
4258
  "bits_prop": [
4259
  1
 
4276
  }
4277
  },
4278
  "model.layers.45": {
4279
+ "accuracy": 0.979574806347955,
4280
+ "total_bits": 1673707520.0,
4281
  "o_proj": {
4282
  "group_size": {
4283
  "4": 128
 
4306
  },
4307
  "q_proj": {
4308
  "group_size": {
4309
+ "2": 32
4310
  },
4311
  "bits": [
4312
  2
 
4319
  },
4320
  "k_proj": {
4321
  "group_size": {
4322
+ "2": 32
4323
  },
4324
  "bits": [
4325
+ 2
4326
  ],
4327
  "bits_prop": [
4328
  1
 
4345
  },
4346
  "gate_proj": {
4347
  "group_size": {
4348
+ "2": 32
4349
  },
4350
  "bits": [
4351
+ 2
4352
  ],
4353
  "bits_prop": [
4354
  1
 
4371
  }
4372
  },
4373
  "model.layers.46": {
4374
+ "accuracy": 0.9794538663118146,
4375
+ "total_bits": 1726464000.0,
4376
  "o_proj": {
4377
  "group_size": {
4378
  "4": 128
 
4401
  },
4402
  "q_proj": {
4403
  "group_size": {
4404
+ "4": 128
4405
  },
4406
  "bits": [
4407
+ 4
4408
  ],
4409
  "bits_prop": [
4410
  1
 
4414
  },
4415
  "k_proj": {
4416
  "group_size": {
4417
+ "2": 64
4418
  },
4419
  "bits": [
4420
+ 2
4421
  ],
4422
  "bits_prop": [
4423
  1
 
4440
  },
4441
  "gate_proj": {
4442
  "group_size": {
4443
+ "2": 64
4444
  },
4445
  "bits": [
4446
+ 2
4447
  ],
4448
  "bits_prop": [
4449
  1
 
5036
  }
5037
  },
5038
  "model.layers.53": {
5039
+ "accuracy": 0.9816716767963953,
5040
+ "total_bits": 1988444160.0,
5041
  "o_proj": {
5042
  "group_size": {
5043
+ "4": 128
5044
  },
5045
  "bits": [
5046
  4
 
5053
  },
5054
  "down_proj": {
5055
  "group_size": {
5056
+ "4": 128
5057
  },
5058
  "bits": [
5059
  4
 
5066
  },
5067
  "q_proj": {
5068
  "group_size": {
5069
+ "4": 128
5070
  },
5071
  "bits": [
5072
  4
 
5079
  },
5080
  "k_proj": {
5081
  "group_size": {
5082
+ "4": 128
5083
  },
5084
  "bits": [
5085
  4
 
5092
  },
5093
  "v_proj": {
5094
  "group_size": {
5095
+ "4": 128
5096
  },
5097
  "bits": [
5098
  4
 
5105
  },
5106
  "gate_proj": {
5107
  "group_size": {
5108
+ "4": 128
5109
  },
5110
  "bits": [
5111
  4
 
5118
  },
5119
  "up_proj": {
5120
  "group_size": {
5121
+ "4": 128
5122
  },
5123
  "bits": [
5124
  4
 
5606
  }
5607
  },
5608
  "model.layers.59": {
5609
+ "accuracy": 0.9957845042226836,
5610
+ "total_bits": 1442283520.0,
5611
  "o_proj": {
5612
  "group_size": {
5613
+ "4": 128
5614
  },
5615
  "bits": [
5616
+ 4
5617
  ],
5618
  "bits_prop": [
5619
  1
 
5623
  },
5624
  "down_proj": {
5625
  "group_size": {
5626
+ "4": 128
5627
  },
5628
  "bits": [
5629
+ 4
5630
  ],
5631
  "bits_prop": [
5632
  1
 
5636
  },
5637
  "q_proj": {
5638
  "group_size": {
5639
+ "2": 32
5640
  },
5641
  "bits": [
5642
  2
 
5649
  },
5650
  "k_proj": {
5651
  "group_size": {
5652
+ "2": 32
5653
  },
5654
  "bits": [
5655
  2
 
5675
  },
5676
  "gate_proj": {
5677
  "group_size": {
5678
+ "2": 32
5679
  },
5680
  "bits": [
5681
  2
 
5688
  },
5689
  "up_proj": {
5690
  "group_size": {
5691
+ "2": 32
5692
  },
5693
  "bits": [
5694
  2
 
5701
  }
5702
  },
5703
  "model.layers.60": {
5704
+ "accuracy": 0.9971412243321538,
5705
+ "total_bits": 1451540480.0,
5706
  "o_proj": {
5707
  "group_size": {
5708
+ "4": 128
5709
  },
5710
  "bits": [
5711
+ 4
5712
  ],
5713
  "bits_prop": [
5714
  1
 
5718
  },
5719
  "down_proj": {
5720
  "group_size": {
5721
+ "4": 128
5722
  },
5723
  "bits": [
5724
+ 4
5725
  ],
5726
  "bits_prop": [
5727
  1
 
5731
  },
5732
  "q_proj": {
5733
  "group_size": {
5734
+ "2": 32
5735
  },
5736
  "bits": [
5737
  2
 
5744
  },
5745
  "k_proj": {
5746
  "group_size": {
5747
+ "4": 128
5748
  },
5749
  "bits": [
5750
+ 4
5751
  ],
5752
  "bits_prop": [
5753
  1
 
5770
  },
5771
  "gate_proj": {
5772
  "group_size": {
5773
+ "2": 32
5774
  },
5775
  "bits": [
5776
  2
 
5783
  },
5784
  "up_proj": {
5785
  "group_size": {
5786
+ "2": 32
5787
  },
5788
  "bits": [
5789
  2
 
5796
  }
5797
  },
5798
  "model.layers.61": {
5799
+ "accuracy": 0.9959207735955715,
5800
+ "total_bits": 1442283520.0,
5801
  "o_proj": {
5802
  "group_size": {
5803
+ "4": 128
5804
  },
5805
  "bits": [
5806
+ 4
5807
  ],
5808
  "bits_prop": [
5809
  1
 
5813
  },
5814
  "down_proj": {
5815
  "group_size": {
5816
+ "4": 128
5817
  },
5818
  "bits": [
5819
+ 4
5820
  ],
5821
  "bits_prop": [
5822
  1
 
5826
  },
5827
  "q_proj": {
5828
  "group_size": {
5829
+ "2": 32
5830
  },
5831
  "bits": [
5832
  2
 
5839
  },
5840
  "k_proj": {
5841
  "group_size": {
5842
+ "2": 32
5843
  },
5844
  "bits": [
5845
  2
 
5865
  },
5866
  "gate_proj": {
5867
  "group_size": {
5868
+ "2": 32
5869
  },
5870
  "bits": [
5871
  2
 
5878
  },
5879
  "up_proj": {
5880
  "group_size": {
5881
+ "2": 32
5882
  },
5883
  "bits": [
5884
  2
 
5891
  }
5892
  },
5893
  "model.layers.62": {
5894
+ "accuracy": 0.9954170882701874,
5895
+ "total_bits": 1442283520.0,
5896
  "o_proj": {
5897
  "group_size": {
5898
+ "4": 128
5899
  },
5900
  "bits": [
5901
+ 4
5902
  ],
5903
  "bits_prop": [
5904
  1
 
5908
  },
5909
  "down_proj": {
5910
  "group_size": {
5911
+ "4": 128
5912
  },
5913
  "bits": [
5914
+ 4
5915
  ],
5916
  "bits_prop": [
5917
  1
 
5921
  },
5922
  "q_proj": {
5923
  "group_size": {
5924
+ "2": 32
5925
  },
5926
  "bits": [
5927
  2
 
5934
  },
5935
  "k_proj": {
5936
  "group_size": {
5937
+ "2": 32
5938
  },
5939
  "bits": [
5940
  2
 
5960
  },
5961
  "gate_proj": {
5962
  "group_size": {
5963
+ "2": 32
5964
  },
5965
  "bits": [
5966
  2
 
5973
  },
5974
  "up_proj": {
5975
  "group_size": {
5976
+ "2": 32
5977
  },
5978
  "bits": [
5979
  2
 
5986
  }
5987
  },
5988
  "model.layers.63": {
5989
+ "accuracy": 0.9917106227949262,
5990
+ "total_bits": 1442283520.0,
5991
  "o_proj": {
5992
  "group_size": {
5993
+ "4": 128
5994
  },
5995
  "bits": [
5996
+ 4
5997
  ],
5998
  "bits_prop": [
5999
  1
 
6003
  },
6004
  "down_proj": {
6005
  "group_size": {
6006
+ "4": 128
6007
  },
6008
  "bits": [
6009
  4
 
6042
  },
6043
  "v_proj": {
6044
  "group_size": {
6045
+ "4": 128
6046
  },
6047
  "bits": [
6048
+ 4
6049
  ],
6050
  "bits_prop": [
6051
  1