diff --git a/Llama-3.1-8B-Instruct/ll_bsearch_kl0.01_sha_bw6.80_4-8bit/model.safetensors.index.json b/Llama-3.1-8B-Instruct/ll_bsearch_kl0.01_sha_bw6.80_4-8bit/model.safetensors.index.json deleted file mode 100644 index 5c64f1e87be95160fabc494eebfa0f7e68064af2..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_bsearch_kl0.01_sha_bw6.80_4-8bit/model.safetensors.index.json +++ /dev/null @@ -1,299 +0,0 @@ -{ - "metadata": { - "total_parameters": 8030261248, - "total_size": 16060522496 - }, - "weight_map": { - "lm_head.weight": "model-00004-of-00004.safetensors", - "model.embed_tokens.weight": "model-00001-of-00004.safetensors", - "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors", - "model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", - "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.norm.weight": "model-00004-of-00004.safetensors" - } -} diff --git a/Llama-3.1-8B-Instruct/ll_bsearch_kl0.01_sha_bw6.80_4-8bit/quantization_config.txt b/Llama-3.1-8B-Instruct/ll_bsearch_kl0.01_sha_bw6.80_4-8bit/quantization_config.txt deleted file mode 100644 index fc9f2998b5e8cf6e209447b8fb83520681f0aff7..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_bsearch_kl0.01_sha_bw6.80_4-8bit/quantization_config.txt +++ /dev/null @@ -1,249 +0,0 @@ -# Model: meta-llama/Llama-3.1-8B-Instruct -# Layer directory: /nfs/scistore19/alistgrp/mhelcig/local/data/search/4_5_6_7_8bit_asym_g128/Llama-3.1-8B-Instruct/6bit/ -# Sensitivity method: shapley -# Estimation method: permutation_separate -# Available bitwidths: [4, 8] -# Bitwidth map: {4: 4.3125, 5: 5.3125, 6: 6.3125, 7: 7.3125, 8: 8.3125} -# -# Mode: binary_search_constraint -# Constraint max_kl: 0.01 -# Weights: nll=0.0, kl=0.0, eap=1.0 -# -# Average bitwidth: 6.7981 -# Total params: 6979321856 -# Total bits: 47445966848 -# Final KL: 0.009849 -# Final EAP: 0.967613 -# Final ETL: 0.032387 -# Satisfies constraints: True -# Solver calls: 6 -# Evaluations: 6 -# -# Bitwidth distribution: -# 8-bit: 179 layers (79.9%) -# 4-bit: 45 layers (20.1%) -# -model.layers.0.self_attn.q_proj: 8 -model.layers.0.self_attn.k_proj: 8 -model.layers.0.self_attn.v_proj: 8 -model.layers.0.self_attn.o_proj: 8 -model.layers.0.mlp.gate_proj: 4 -model.layers.0.mlp.up_proj: 4 -model.layers.0.mlp.down_proj: 8 -model.layers.1.self_attn.q_proj: 8 -model.layers.1.self_attn.k_proj: 8 -model.layers.1.self_attn.v_proj: 8 -model.layers.1.self_attn.o_proj: 8 -model.layers.1.mlp.gate_proj: 8 -model.layers.1.mlp.up_proj: 8 -model.layers.1.mlp.down_proj: 8 -model.layers.2.self_attn.q_proj: 8 -model.layers.2.self_attn.k_proj: 8 -model.layers.2.self_attn.v_proj: 8 -model.layers.2.self_attn.o_proj: 8 -model.layers.2.mlp.gate_proj: 8 -model.layers.2.mlp.up_proj: 8 -model.layers.2.mlp.down_proj: 4 -model.layers.3.self_attn.q_proj: 8 -model.layers.3.self_attn.k_proj: 8 -model.layers.3.self_attn.v_proj: 8 -model.layers.3.self_attn.o_proj: 8 -model.layers.3.mlp.gate_proj: 8 -model.layers.3.mlp.up_proj: 8 -model.layers.3.mlp.down_proj: 4 -model.layers.4.self_attn.q_proj: 8 -model.layers.4.self_attn.k_proj: 8 -model.layers.4.self_attn.v_proj: 8 -model.layers.4.self_attn.o_proj: 8 -model.layers.4.mlp.gate_proj: 8 -model.layers.4.mlp.up_proj: 8 -model.layers.4.mlp.down_proj: 8 -model.layers.5.self_attn.q_proj: 8 -model.layers.5.self_attn.k_proj: 8 -model.layers.5.self_attn.v_proj: 8 -model.layers.5.self_attn.o_proj: 8 -model.layers.5.mlp.gate_proj: 8 -model.layers.5.mlp.up_proj: 8 -model.layers.5.mlp.down_proj: 4 -model.layers.6.self_attn.q_proj: 8 -model.layers.6.self_attn.k_proj: 8 -model.layers.6.self_attn.v_proj: 8 -model.layers.6.self_attn.o_proj: 8 -model.layers.6.mlp.gate_proj: 8 -model.layers.6.mlp.up_proj: 8 -model.layers.6.mlp.down_proj: 4 -model.layers.7.self_attn.q_proj: 8 -model.layers.7.self_attn.k_proj: 8 -model.layers.7.self_attn.v_proj: 8 -model.layers.7.self_attn.o_proj: 8 -model.layers.7.mlp.gate_proj: 8 -model.layers.7.mlp.up_proj: 8 -model.layers.7.mlp.down_proj: 4 -model.layers.8.self_attn.q_proj: 8 -model.layers.8.self_attn.k_proj: 8 -model.layers.8.self_attn.v_proj: 8 -model.layers.8.self_attn.o_proj: 8 -model.layers.8.mlp.gate_proj: 8 -model.layers.8.mlp.up_proj: 8 -model.layers.8.mlp.down_proj: 4 -model.layers.9.self_attn.q_proj: 8 -model.layers.9.self_attn.k_proj: 8 -model.layers.9.self_attn.v_proj: 8 -model.layers.9.self_attn.o_proj: 8 -model.layers.9.mlp.gate_proj: 4 -model.layers.9.mlp.up_proj: 4 -model.layers.9.mlp.down_proj: 4 -model.layers.10.self_attn.q_proj: 8 -model.layers.10.self_attn.k_proj: 8 -model.layers.10.self_attn.v_proj: 8 -model.layers.10.self_attn.o_proj: 8 -model.layers.10.mlp.gate_proj: 8 -model.layers.10.mlp.up_proj: 8 -model.layers.10.mlp.down_proj: 4 -model.layers.11.self_attn.q_proj: 8 -model.layers.11.self_attn.k_proj: 8 -model.layers.11.self_attn.v_proj: 8 -model.layers.11.self_attn.o_proj: 8 -model.layers.11.mlp.gate_proj: 8 -model.layers.11.mlp.up_proj: 8 -model.layers.11.mlp.down_proj: 4 -model.layers.12.self_attn.q_proj: 8 -model.layers.12.self_attn.k_proj: 8 -model.layers.12.self_attn.v_proj: 8 -model.layers.12.self_attn.o_proj: 8 -model.layers.12.mlp.gate_proj: 4 -model.layers.12.mlp.up_proj: 4 -model.layers.12.mlp.down_proj: 4 -model.layers.13.self_attn.q_proj: 8 -model.layers.13.self_attn.k_proj: 8 -model.layers.13.self_attn.v_proj: 8 -model.layers.13.self_attn.o_proj: 8 -model.layers.13.mlp.gate_proj: 4 -model.layers.13.mlp.up_proj: 4 -model.layers.13.mlp.down_proj: 8 -model.layers.14.self_attn.q_proj: 8 -model.layers.14.self_attn.k_proj: 8 -model.layers.14.self_attn.v_proj: 8 -model.layers.14.self_attn.o_proj: 8 -model.layers.14.mlp.gate_proj: 8 -model.layers.14.mlp.up_proj: 8 -model.layers.14.mlp.down_proj: 4 -model.layers.15.self_attn.q_proj: 8 -model.layers.15.self_attn.k_proj: 8 -model.layers.15.self_attn.v_proj: 8 -model.layers.15.self_attn.o_proj: 8 -model.layers.15.mlp.gate_proj: 8 -model.layers.15.mlp.up_proj: 8 -model.layers.15.mlp.down_proj: 8 -model.layers.16.self_attn.q_proj: 8 -model.layers.16.self_attn.k_proj: 8 -model.layers.16.self_attn.v_proj: 8 -model.layers.16.self_attn.o_proj: 8 -model.layers.16.mlp.gate_proj: 8 -model.layers.16.mlp.up_proj: 8 -model.layers.16.mlp.down_proj: 4 -model.layers.17.self_attn.q_proj: 8 -model.layers.17.self_attn.k_proj: 8 -model.layers.17.self_attn.v_proj: 8 -model.layers.17.self_attn.o_proj: 8 -model.layers.17.mlp.gate_proj: 8 -model.layers.17.mlp.up_proj: 8 -model.layers.17.mlp.down_proj: 8 -model.layers.18.self_attn.q_proj: 8 -model.layers.18.self_attn.k_proj: 8 -model.layers.18.self_attn.v_proj: 8 -model.layers.18.self_attn.o_proj: 8 -model.layers.18.mlp.gate_proj: 8 -model.layers.18.mlp.up_proj: 8 -model.layers.18.mlp.down_proj: 4 -model.layers.19.self_attn.q_proj: 8 -model.layers.19.self_attn.k_proj: 8 -model.layers.19.self_attn.v_proj: 8 -model.layers.19.self_attn.o_proj: 8 -model.layers.19.mlp.gate_proj: 8 -model.layers.19.mlp.up_proj: 8 -model.layers.19.mlp.down_proj: 4 -model.layers.20.self_attn.q_proj: 8 -model.layers.20.self_attn.k_proj: 8 -model.layers.20.self_attn.v_proj: 8 -model.layers.20.self_attn.o_proj: 8 -model.layers.20.mlp.gate_proj: 8 -model.layers.20.mlp.up_proj: 8 -model.layers.20.mlp.down_proj: 4 -model.layers.21.self_attn.q_proj: 8 -model.layers.21.self_attn.k_proj: 8 -model.layers.21.self_attn.v_proj: 8 -model.layers.21.self_attn.o_proj: 8 -model.layers.21.mlp.gate_proj: 4 -model.layers.21.mlp.up_proj: 4 -model.layers.21.mlp.down_proj: 4 -model.layers.22.self_attn.q_proj: 8 -model.layers.22.self_attn.k_proj: 8 -model.layers.22.self_attn.v_proj: 8 -model.layers.22.self_attn.o_proj: 8 -model.layers.22.mlp.gate_proj: 8 -model.layers.22.mlp.up_proj: 8 -model.layers.22.mlp.down_proj: 4 -model.layers.23.self_attn.q_proj: 8 -model.layers.23.self_attn.k_proj: 8 -model.layers.23.self_attn.v_proj: 8 -model.layers.23.self_attn.o_proj: 8 -model.layers.23.mlp.gate_proj: 8 -model.layers.23.mlp.up_proj: 8 -model.layers.23.mlp.down_proj: 4 -model.layers.24.self_attn.q_proj: 8 -model.layers.24.self_attn.k_proj: 8 -model.layers.24.self_attn.v_proj: 8 -model.layers.24.self_attn.o_proj: 8 -model.layers.24.mlp.gate_proj: 4 -model.layers.24.mlp.up_proj: 4 -model.layers.24.mlp.down_proj: 4 -model.layers.25.self_attn.q_proj: 8 -model.layers.25.self_attn.k_proj: 8 -model.layers.25.self_attn.v_proj: 8 -model.layers.25.self_attn.o_proj: 8 -model.layers.25.mlp.gate_proj: 4 -model.layers.25.mlp.up_proj: 4 -model.layers.25.mlp.down_proj: 4 -model.layers.26.self_attn.q_proj: 8 -model.layers.26.self_attn.k_proj: 8 -model.layers.26.self_attn.v_proj: 8 -model.layers.26.self_attn.o_proj: 8 -model.layers.26.mlp.gate_proj: 4 -model.layers.26.mlp.up_proj: 4 -model.layers.26.mlp.down_proj: 4 -model.layers.27.self_attn.q_proj: 8 -model.layers.27.self_attn.k_proj: 8 -model.layers.27.self_attn.v_proj: 8 -model.layers.27.self_attn.o_proj: 8 -model.layers.27.mlp.gate_proj: 4 -model.layers.27.mlp.up_proj: 4 -model.layers.27.mlp.down_proj: 4 -model.layers.28.self_attn.q_proj: 8 -model.layers.28.self_attn.k_proj: 8 -model.layers.28.self_attn.v_proj: 8 -model.layers.28.self_attn.o_proj: 8 -model.layers.28.mlp.gate_proj: 4 -model.layers.28.mlp.up_proj: 4 -model.layers.28.mlp.down_proj: 4 -model.layers.29.self_attn.q_proj: 8 -model.layers.29.self_attn.k_proj: 8 -model.layers.29.self_attn.v_proj: 8 -model.layers.29.self_attn.o_proj: 8 -model.layers.29.mlp.gate_proj: 8 -model.layers.29.mlp.up_proj: 8 -model.layers.29.mlp.down_proj: 4 -model.layers.30.self_attn.q_proj: 8 -model.layers.30.self_attn.k_proj: 8 -model.layers.30.self_attn.v_proj: 8 -model.layers.30.self_attn.o_proj: 8 -model.layers.30.mlp.gate_proj: 8 -model.layers.30.mlp.up_proj: 8 -model.layers.30.mlp.down_proj: 4 -model.layers.31.self_attn.q_proj: 8 -model.layers.31.self_attn.k_proj: 8 -model.layers.31.self_attn.v_proj: 8 -model.layers.31.self_attn.o_proj: 8 -model.layers.31.mlp.gate_proj: 8 -model.layers.31.mlp.up_proj: 8 -model.layers.31.mlp.down_proj: 8 diff --git a/Llama-3.1-8B-Instruct/ll_bsearch_kl0.01_sha_bw6.80_4-8bit/special_tokens_map.json b/Llama-3.1-8B-Instruct/ll_bsearch_kl0.01_sha_bw6.80_4-8bit/special_tokens_map.json deleted file mode 100644 index b43be96621d147110fb8a18b5776ec6e38516127..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_bsearch_kl0.01_sha_bw6.80_4-8bit/special_tokens_map.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "bos_token": { - "content": "<|begin_of_text|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false - }, - "eos_token": { - "content": "<|eot_id|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false - }, - "pad_token": "<|eot_id|>" -} diff --git a/Llama-3.1-8B-Instruct/ll_bsearch_kl0.01_sha_bw6.80_4-8bit/tokenizer.json b/Llama-3.1-8B-Instruct/ll_bsearch_kl0.01_sha_bw6.80_4-8bit/tokenizer.json deleted file mode 100644 index 1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_bsearch_kl0.01_sha_bw6.80_4-8bit/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b -size 17209920 diff --git a/Llama-3.1-8B-Instruct/ll_bsearch_kl0.01_sha_bw6.80_4-8bit/tokenizer_config.json b/Llama-3.1-8B-Instruct/ll_bsearch_kl0.01_sha_bw6.80_4-8bit/tokenizer_config.json deleted file mode 100644 index 3beeacc86a6ca3cae14ad3004263ab74a4bac07a..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_bsearch_kl0.01_sha_bw6.80_4-8bit/tokenizer_config.json +++ /dev/null @@ -1,2063 +0,0 @@ -{ - "added_tokens_decoder": { - "128000": { - "content": "<|begin_of_text|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128001": { - "content": "<|end_of_text|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128002": { - "content": "<|reserved_special_token_0|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128003": { - "content": "<|reserved_special_token_1|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128004": { - "content": "<|finetune_right_pad_id|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128005": { - "content": "<|reserved_special_token_2|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128006": { - "content": "<|start_header_id|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128007": { - "content": "<|end_header_id|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128008": { - "content": "<|eom_id|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128009": { - "content": "<|eot_id|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128010": { - "content": "<|python_tag|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128011": { - "content": "<|reserved_special_token_3|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128012": { - "content": "<|reserved_special_token_4|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128013": { - "content": "<|reserved_special_token_5|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128014": { - "content": "<|reserved_special_token_6|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128015": { - "content": "<|reserved_special_token_7|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128016": { - "content": "<|reserved_special_token_8|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128017": { - "content": "<|reserved_special_token_9|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128018": { - "content": "<|reserved_special_token_10|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128019": { - "content": "<|reserved_special_token_11|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128020": { - "content": "<|reserved_special_token_12|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128021": { - "content": "<|reserved_special_token_13|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128022": { - "content": "<|reserved_special_token_14|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128023": { - "content": "<|reserved_special_token_15|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128024": { - "content": "<|reserved_special_token_16|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128025": { - "content": "<|reserved_special_token_17|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128026": { - "content": "<|reserved_special_token_18|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128027": { - "content": "<|reserved_special_token_19|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128028": { - "content": "<|reserved_special_token_20|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128029": { - "content": "<|reserved_special_token_21|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128030": { - "content": "<|reserved_special_token_22|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128031": { - "content": "<|reserved_special_token_23|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128032": { - "content": "<|reserved_special_token_24|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128033": { - "content": "<|reserved_special_token_25|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128034": { - "content": "<|reserved_special_token_26|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128035": { - "content": "<|reserved_special_token_27|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128036": { - "content": "<|reserved_special_token_28|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128037": { - "content": "<|reserved_special_token_29|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128038": { - "content": "<|reserved_special_token_30|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128039": { - "content": "<|reserved_special_token_31|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128040": { - "content": "<|reserved_special_token_32|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128041": { - "content": "<|reserved_special_token_33|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128042": { - "content": "<|reserved_special_token_34|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128043": { - "content": "<|reserved_special_token_35|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128044": { - "content": "<|reserved_special_token_36|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128045": { - "content": "<|reserved_special_token_37|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128046": { - "content": "<|reserved_special_token_38|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128047": { - "content": "<|reserved_special_token_39|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128048": { - "content": "<|reserved_special_token_40|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128049": { - "content": "<|reserved_special_token_41|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128050": { - "content": "<|reserved_special_token_42|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128051": { - "content": "<|reserved_special_token_43|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128052": { - "content": "<|reserved_special_token_44|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128053": { - "content": "<|reserved_special_token_45|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128054": { - "content": "<|reserved_special_token_46|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128055": { - "content": "<|reserved_special_token_47|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128056": { - "content": "<|reserved_special_token_48|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128057": { - "content": "<|reserved_special_token_49|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128058": { - "content": "<|reserved_special_token_50|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128059": { - "content": "<|reserved_special_token_51|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128060": { - "content": "<|reserved_special_token_52|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128061": { - "content": "<|reserved_special_token_53|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128062": { - "content": "<|reserved_special_token_54|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128063": { - "content": "<|reserved_special_token_55|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128064": { - "content": "<|reserved_special_token_56|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128065": { - "content": "<|reserved_special_token_57|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128066": { - "content": "<|reserved_special_token_58|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128067": { - "content": "<|reserved_special_token_59|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128068": { - "content": "<|reserved_special_token_60|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128069": { - "content": "<|reserved_special_token_61|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128070": { - "content": "<|reserved_special_token_62|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128071": { - "content": "<|reserved_special_token_63|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128072": { - "content": "<|reserved_special_token_64|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128073": { - "content": "<|reserved_special_token_65|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128074": { - "content": "<|reserved_special_token_66|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128075": { - "content": "<|reserved_special_token_67|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128076": { - "content": "<|reserved_special_token_68|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128077": { - "content": "<|reserved_special_token_69|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128078": { - "content": "<|reserved_special_token_70|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128079": { - "content": "<|reserved_special_token_71|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128080": { - "content": "<|reserved_special_token_72|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128081": { - "content": "<|reserved_special_token_73|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128082": { - "content": "<|reserved_special_token_74|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128083": { - "content": "<|reserved_special_token_75|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128084": { - "content": "<|reserved_special_token_76|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128085": { - "content": "<|reserved_special_token_77|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128086": { - "content": "<|reserved_special_token_78|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128087": { - "content": "<|reserved_special_token_79|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128088": { - "content": "<|reserved_special_token_80|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128089": { - "content": "<|reserved_special_token_81|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128090": { - "content": "<|reserved_special_token_82|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128091": { - "content": "<|reserved_special_token_83|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128092": { - "content": "<|reserved_special_token_84|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128093": { - "content": "<|reserved_special_token_85|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128094": { - "content": "<|reserved_special_token_86|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128095": { - "content": "<|reserved_special_token_87|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128096": { - "content": "<|reserved_special_token_88|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128097": { - "content": "<|reserved_special_token_89|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128098": { - "content": "<|reserved_special_token_90|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128099": { - "content": "<|reserved_special_token_91|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128100": { - "content": "<|reserved_special_token_92|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128101": { - "content": "<|reserved_special_token_93|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128102": { - "content": "<|reserved_special_token_94|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128103": { - "content": "<|reserved_special_token_95|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128104": { - "content": "<|reserved_special_token_96|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128105": { - "content": "<|reserved_special_token_97|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128106": { - "content": "<|reserved_special_token_98|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128107": { - "content": "<|reserved_special_token_99|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128108": { - "content": "<|reserved_special_token_100|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128109": { - "content": "<|reserved_special_token_101|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128110": { - "content": "<|reserved_special_token_102|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128111": { - "content": "<|reserved_special_token_103|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128112": { - "content": "<|reserved_special_token_104|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128113": { - "content": "<|reserved_special_token_105|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128114": { - "content": "<|reserved_special_token_106|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128115": { - "content": "<|reserved_special_token_107|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128116": { - "content": "<|reserved_special_token_108|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128117": { - "content": "<|reserved_special_token_109|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128118": { - "content": "<|reserved_special_token_110|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128119": { - "content": "<|reserved_special_token_111|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128120": { - "content": "<|reserved_special_token_112|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128121": { - "content": "<|reserved_special_token_113|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128122": { - "content": "<|reserved_special_token_114|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128123": { - "content": "<|reserved_special_token_115|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128124": { - "content": "<|reserved_special_token_116|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128125": { - "content": "<|reserved_special_token_117|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128126": { - "content": "<|reserved_special_token_118|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128127": { - "content": "<|reserved_special_token_119|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128128": { - "content": "<|reserved_special_token_120|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128129": { - "content": "<|reserved_special_token_121|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128130": { - "content": "<|reserved_special_token_122|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128131": { - "content": "<|reserved_special_token_123|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128132": { - "content": "<|reserved_special_token_124|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128133": { - "content": "<|reserved_special_token_125|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128134": { - "content": "<|reserved_special_token_126|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128135": { - "content": "<|reserved_special_token_127|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128136": { - "content": "<|reserved_special_token_128|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128137": { - "content": "<|reserved_special_token_129|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128138": { - "content": "<|reserved_special_token_130|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128139": { - "content": "<|reserved_special_token_131|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128140": { - "content": "<|reserved_special_token_132|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128141": { - "content": "<|reserved_special_token_133|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128142": { - "content": "<|reserved_special_token_134|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128143": { - "content": "<|reserved_special_token_135|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128144": { - "content": "<|reserved_special_token_136|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128145": { - "content": "<|reserved_special_token_137|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128146": { - "content": "<|reserved_special_token_138|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128147": { - "content": "<|reserved_special_token_139|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128148": { - "content": "<|reserved_special_token_140|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128149": { - "content": "<|reserved_special_token_141|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128150": { - "content": "<|reserved_special_token_142|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128151": { - "content": "<|reserved_special_token_143|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128152": { - "content": "<|reserved_special_token_144|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128153": { - "content": "<|reserved_special_token_145|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128154": { - "content": "<|reserved_special_token_146|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128155": { - "content": "<|reserved_special_token_147|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128156": { - "content": "<|reserved_special_token_148|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128157": { - "content": "<|reserved_special_token_149|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128158": { - "content": "<|reserved_special_token_150|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128159": { - "content": "<|reserved_special_token_151|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128160": { - "content": "<|reserved_special_token_152|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128161": { - "content": "<|reserved_special_token_153|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128162": { - "content": "<|reserved_special_token_154|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128163": { - "content": "<|reserved_special_token_155|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128164": { - "content": "<|reserved_special_token_156|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128165": { - "content": "<|reserved_special_token_157|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128166": { - "content": "<|reserved_special_token_158|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128167": { - "content": "<|reserved_special_token_159|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128168": { - "content": "<|reserved_special_token_160|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128169": { - "content": "<|reserved_special_token_161|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128170": { - "content": "<|reserved_special_token_162|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128171": { - "content": "<|reserved_special_token_163|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128172": { - "content": "<|reserved_special_token_164|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128173": { - "content": "<|reserved_special_token_165|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128174": { - "content": "<|reserved_special_token_166|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128175": { - "content": "<|reserved_special_token_167|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128176": { - "content": "<|reserved_special_token_168|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128177": { - "content": "<|reserved_special_token_169|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128178": { - "content": "<|reserved_special_token_170|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128179": { - "content": "<|reserved_special_token_171|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128180": { - "content": "<|reserved_special_token_172|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128181": { - "content": "<|reserved_special_token_173|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128182": { - "content": "<|reserved_special_token_174|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128183": { - "content": "<|reserved_special_token_175|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128184": { - "content": "<|reserved_special_token_176|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128185": { - "content": "<|reserved_special_token_177|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128186": { - "content": "<|reserved_special_token_178|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128187": { - "content": "<|reserved_special_token_179|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128188": { - "content": "<|reserved_special_token_180|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128189": { - "content": "<|reserved_special_token_181|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128190": { - "content": "<|reserved_special_token_182|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128191": { - "content": "<|reserved_special_token_183|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128192": { - "content": "<|reserved_special_token_184|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128193": { - "content": "<|reserved_special_token_185|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128194": { - "content": "<|reserved_special_token_186|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128195": { - "content": "<|reserved_special_token_187|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128196": { - "content": "<|reserved_special_token_188|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128197": { - "content": "<|reserved_special_token_189|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128198": { - "content": "<|reserved_special_token_190|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128199": { - "content": "<|reserved_special_token_191|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128200": { - "content": "<|reserved_special_token_192|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128201": { - "content": "<|reserved_special_token_193|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128202": { - "content": "<|reserved_special_token_194|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128203": { - "content": "<|reserved_special_token_195|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128204": { - "content": "<|reserved_special_token_196|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128205": { - "content": "<|reserved_special_token_197|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128206": { - "content": "<|reserved_special_token_198|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128207": { - "content": "<|reserved_special_token_199|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128208": { - "content": "<|reserved_special_token_200|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128209": { - "content": "<|reserved_special_token_201|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128210": { - "content": "<|reserved_special_token_202|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128211": { - "content": "<|reserved_special_token_203|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128212": { - "content": "<|reserved_special_token_204|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128213": { - "content": "<|reserved_special_token_205|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128214": { - "content": "<|reserved_special_token_206|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128215": { - "content": "<|reserved_special_token_207|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128216": { - "content": "<|reserved_special_token_208|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128217": { - "content": "<|reserved_special_token_209|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128218": { - "content": "<|reserved_special_token_210|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128219": { - "content": "<|reserved_special_token_211|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128220": { - "content": "<|reserved_special_token_212|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128221": { - "content": "<|reserved_special_token_213|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128222": { - "content": "<|reserved_special_token_214|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128223": { - "content": "<|reserved_special_token_215|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128224": { - "content": "<|reserved_special_token_216|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128225": { - "content": "<|reserved_special_token_217|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128226": { - "content": "<|reserved_special_token_218|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128227": { - "content": "<|reserved_special_token_219|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128228": { - "content": "<|reserved_special_token_220|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128229": { - "content": "<|reserved_special_token_221|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128230": { - "content": "<|reserved_special_token_222|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128231": { - "content": "<|reserved_special_token_223|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128232": { - "content": "<|reserved_special_token_224|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128233": { - "content": "<|reserved_special_token_225|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128234": { - "content": "<|reserved_special_token_226|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128235": { - "content": "<|reserved_special_token_227|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128236": { - "content": "<|reserved_special_token_228|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128237": { - "content": "<|reserved_special_token_229|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128238": { - "content": "<|reserved_special_token_230|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128239": { - "content": "<|reserved_special_token_231|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128240": { - "content": "<|reserved_special_token_232|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128241": { - "content": "<|reserved_special_token_233|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128242": { - "content": "<|reserved_special_token_234|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128243": { - "content": "<|reserved_special_token_235|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128244": { - "content": "<|reserved_special_token_236|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128245": { - "content": "<|reserved_special_token_237|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128246": { - "content": "<|reserved_special_token_238|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128247": { - "content": "<|reserved_special_token_239|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128248": { - "content": "<|reserved_special_token_240|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128249": { - "content": "<|reserved_special_token_241|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128250": { - "content": "<|reserved_special_token_242|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128251": { - "content": "<|reserved_special_token_243|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128252": { - "content": "<|reserved_special_token_244|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128253": { - "content": "<|reserved_special_token_245|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128254": { - "content": "<|reserved_special_token_246|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128255": { - "content": "<|reserved_special_token_247|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - } - }, - "bos_token": "<|begin_of_text|>", - "clean_up_tokenization_spaces": true, - "eos_token": "<|eot_id|>", - "extra_special_tokens": {}, - "model_input_names": [ - "input_ids", - "attention_mask" - ], - "model_max_length": 131072, - "pad_token": "<|eot_id|>", - "tokenizer_class": "PreTrainedTokenizerFast" -} diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw6.65_5-6-7-8bit/README.md b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw6.65_5-6-7-8bit/README.md deleted file mode 100644 index cf99ee1c240c053b61ce57729523811f63fbd11b..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw6.65_5-6-7-8bit/README.md +++ /dev/null @@ -1,17 +0,0 @@ -# Quantized Model Checkpoint - -**Base model:** meta-llama/Llama-3.1-8B-Instruct - -**Average bitwidth:** 6.6454 - -**Sensitivity method:** shapley - -**Constraints:** -- max_kl: 0.01 -- min_eap: 0.99 - -**Metrics:** -- predicted_kl: 0.001530 -- predicted_eap: 0.990005 - -See `quantization_config.txt` for full configuration details. diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw6.65_5-6-7-8bit/chat_template.jinja b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw6.65_5-6-7-8bit/chat_template.jinja deleted file mode 100644 index 33089ace1be88f22a10fe861ad49718d5d886090..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw6.65_5-6-7-8bit/chat_template.jinja +++ /dev/null @@ -1,109 +0,0 @@ -{{- bos_token }} -{%- if custom_tools is defined %} - {%- set tools = custom_tools %} -{%- endif %} -{%- if not tools_in_user_message is defined %} - {%- set tools_in_user_message = true %} -{%- endif %} -{%- if not date_string is defined %} - {%- set date_string = "26 Jul 2024" %} -{%- endif %} -{%- if not tools is defined %} - {%- set tools = none %} -{%- endif %} - -{#- This block extracts the system message, so we can slot it into the right place. #} -{%- if messages[0]['role'] == 'system' %} - {%- set system_message = messages[0]['content']|trim %} - {%- set messages = messages[1:] %} -{%- else %} - {%- set system_message = "" %} -{%- endif %} - -{#- System message + builtin tools #} -{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} -{%- if builtin_tools is defined or tools is not none %} - {{- "Environment: ipython\n" }} -{%- endif %} -{%- if builtin_tools is defined %} - {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} -{%- endif %} -{{- "Cutting Knowledge Date: December 2023\n" }} -{{- "Today Date: " + date_string + "\n\n" }} -{%- if tools is not none and not tools_in_user_message %} - {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} - {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} - {{- "Do not use variables.\n\n" }} - {%- for t in tools %} - {{- t | tojson(indent=4) }} - {{- "\n\n" }} - {%- endfor %} -{%- endif %} -{{- system_message }} -{{- "<|eot_id|>" }} - -{#- Custom tools are passed in a user message with some extra guidance #} -{%- if tools_in_user_message and not tools is none %} - {#- Extract the first user message so we can plug it in here #} - {%- if messages | length != 0 %} - {%- set first_user_message = messages[0]['content']|trim %} - {%- set messages = messages[1:] %} - {%- else %} - {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} -{%- endif %} - {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} - {{- "Given the following functions, please respond with a JSON for a function call " }} - {{- "with its proper arguments that best answers the given prompt.\n\n" }} - {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} - {{- "Do not use variables.\n\n" }} - {%- for t in tools %} - {{- t | tojson(indent=4) }} - {{- "\n\n" }} - {%- endfor %} - {{- first_user_message + "<|eot_id|>"}} -{%- endif %} - -{%- for message in messages %} - {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} - {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} - {%- elif 'tool_calls' in message %} - {%- if not message.tool_calls|length == 1 %} - {{- raise_exception("This model only supports single tool-calls at once!") }} - {%- endif %} - {%- set tool_call = message.tool_calls[0].function %} - {%- if builtin_tools is defined and tool_call.name in builtin_tools %} - {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} - {{- "<|python_tag|>" + tool_call.name + ".call(" }} - {%- for arg_name, arg_val in tool_call.arguments | items %} - {{- arg_name + '="' + arg_val + '"' }} - {%- if not loop.last %} - {{- ", " }} - {%- endif %} - {%- endfor %} - {{- ")" }} - {%- else %} - {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} - {{- '{"name": "' + tool_call.name + '", ' }} - {{- '"parameters": ' }} - {{- tool_call.arguments | tojson }} - {{- "}" }} - {%- endif %} - {%- if builtin_tools is defined %} - {#- This means we're in ipython mode #} - {{- "<|eom_id|>" }} - {%- else %} - {{- "<|eot_id|>" }} - {%- endif %} - {%- elif message.role == "tool" or message.role == "ipython" %} - {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} - {%- if message.content is mapping or message.content is iterable %} - {{- message.content | tojson }} - {%- else %} - {{- message.content }} - {%- endif %} - {{- "<|eot_id|>" }} - {%- endif %} -{%- endfor %} -{%- if add_generation_prompt %} - {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} -{%- endif %} diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw6.65_5-6-7-8bit/config.json b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw6.65_5-6-7-8bit/config.json deleted file mode 100644 index 2bf71dab1ee4525127aba58e9446aa0a1dd046e6..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw6.65_5-6-7-8bit/config.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "bos_token_id": 128000, - "dtype": "float16", - "eos_token_id": [ - 128001, - 128008, - 128009 - ], - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "transformers_version": "4.57.3", - "use_cache": false, - "vocab_size": 128256 -} diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw6.65_5-6-7-8bit/generation_config.json b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw6.65_5-6-7-8bit/generation_config.json deleted file mode 100644 index 993459bf55ed73c1390809c2e2a3d7c1c0e0d844..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw6.65_5-6-7-8bit/generation_config.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "bos_token_id": 128000, - "do_sample": true, - "eos_token_id": [ - 128001, - 128008, - 128009 - ], - "temperature": 0.6, - "top_p": 0.9, - "transformers_version": "4.57.3" -} diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw6.65_5-6-7-8bit/model-00001-of-00004.safetensors b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw6.65_5-6-7-8bit/model-00001-of-00004.safetensors deleted file mode 100644 index e2e037287539db905b084d29df5de02145d1dc56..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw6.65_5-6-7-8bit/model-00001-of-00004.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:87a988f90d2be901de9c23896e8d909980ebf5e7f2ed2a0a5cddd9598caf5188 -size 4976698592 diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw6.65_5-6-7-8bit/model-00002-of-00004.safetensors b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw6.65_5-6-7-8bit/model-00002-of-00004.safetensors deleted file mode 100644 index 0abe53e6edd324fe525b41bc66b90972608d935a..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw6.65_5-6-7-8bit/model-00002-of-00004.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:59e48041230a2cfe3f447d5d8fc881647a42e27bd910e8bfebebee50797814c8 -size 4999802616 diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw6.65_5-6-7-8bit/model-00003-of-00004.safetensors b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw6.65_5-6-7-8bit/model-00003-of-00004.safetensors deleted file mode 100644 index c19d15f926e12d2d336aee4fe7b92ee08194319d..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw6.65_5-6-7-8bit/model-00003-of-00004.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3e4ea4b99fc0060c4cb60868a42fc01bb7f28d6138bc8746f00d0ef780541c36 -size 4915916080 diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw6.65_5-6-7-8bit/model-00004-of-00004.safetensors b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw6.65_5-6-7-8bit/model-00004-of-00004.safetensors deleted file mode 100644 index b09e9cdcb05f5cf77b3ef3a8ffa431eb33ad02b9..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw6.65_5-6-7-8bit/model-00004-of-00004.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:44408391c116c33adf6e43ab53f84d75bee5e2956b293c34dc60509fb0fd825b -size 1168138808 diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw6.65_5-6-7-8bit/model.safetensors.index.json b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw6.65_5-6-7-8bit/model.safetensors.index.json deleted file mode 100644 index 5c64f1e87be95160fabc494eebfa0f7e68064af2..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw6.65_5-6-7-8bit/model.safetensors.index.json +++ /dev/null @@ -1,299 +0,0 @@ -{ - "metadata": { - "total_parameters": 8030261248, - "total_size": 16060522496 - }, - "weight_map": { - "lm_head.weight": "model-00004-of-00004.safetensors", - "model.embed_tokens.weight": "model-00001-of-00004.safetensors", - "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors", - "model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", - "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.norm.weight": "model-00004-of-00004.safetensors" - } -} diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw6.65_5-6-7-8bit/quantization_config.txt b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw6.65_5-6-7-8bit/quantization_config.txt deleted file mode 100644 index 5c883d05c17a0969361372b592934a146829d125..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw6.65_5-6-7-8bit/quantization_config.txt +++ /dev/null @@ -1,252 +0,0 @@ -# Model: meta-llama/Llama-3.1-8B-Instruct -# Layer directory: /nfs/scistore19/alistgrp/mhelcig/local/data/search/4_5_6_7_8bit_asym_g128/Llama-3.1-8B-Instruct/6bit/ -# Sensitivity method: shapley -# Estimation method: permutation_separate -# Available bitwidths: [4, 5, 6, 7, 8] -# Bitwidth map: {4: 4.3125, 5: 5.3125, 6: 6.3125, 7: 7.3125, 8: 8.3125} -# -# Mode: predicted_constraint_search -# Constraint max_kl: 0.01 -# Constraint min_eap: 0.99 -# Weights: nll=0.0, kl=0.0, eap=1.0 -# -# Average bitwidth: 6.6454 -# Total params: 6979321856 -# Total bits: 46380613632 -# Predicted KL: 0.001530 -# Predicted EAP: 0.990005 -# Predicted ETL: 0.009995 -# Satisfies constraints: True -# Solver calls: 1 -# Evaluations: 0 -# -# Bitwidth distribution: -# 8-bit: 86 layers (38.4%) -# 7-bit: 50 layers (22.3%) -# 6-bit: 83 layers (37.1%) -# 5-bit: 5 layers (2.2%) -# -model.layers.0.self_attn.q_proj: 7 -model.layers.0.self_attn.k_proj: 8 -model.layers.0.self_attn.v_proj: 8 -model.layers.0.self_attn.o_proj: 7 -model.layers.0.mlp.gate_proj: 6 -model.layers.0.mlp.up_proj: 6 -model.layers.0.mlp.down_proj: 6 -model.layers.1.self_attn.q_proj: 8 -model.layers.1.self_attn.k_proj: 8 -model.layers.1.self_attn.v_proj: 8 -model.layers.1.self_attn.o_proj: 7 -model.layers.1.mlp.gate_proj: 6 -model.layers.1.mlp.up_proj: 6 -model.layers.1.mlp.down_proj: 8 -model.layers.2.self_attn.q_proj: 8 -model.layers.2.self_attn.k_proj: 8 -model.layers.2.self_attn.v_proj: 8 -model.layers.2.self_attn.o_proj: 7 -model.layers.2.mlp.gate_proj: 6 -model.layers.2.mlp.up_proj: 6 -model.layers.2.mlp.down_proj: 6 -model.layers.3.self_attn.q_proj: 7 -model.layers.3.self_attn.k_proj: 8 -model.layers.3.self_attn.v_proj: 8 -model.layers.3.self_attn.o_proj: 7 -model.layers.3.mlp.gate_proj: 6 -model.layers.3.mlp.up_proj: 6 -model.layers.3.mlp.down_proj: 6 -model.layers.4.self_attn.q_proj: 7 -model.layers.4.self_attn.k_proj: 8 -model.layers.4.self_attn.v_proj: 8 -model.layers.4.self_attn.o_proj: 7 -model.layers.4.mlp.gate_proj: 6 -model.layers.4.mlp.up_proj: 6 -model.layers.4.mlp.down_proj: 6 -model.layers.5.self_attn.q_proj: 7 -model.layers.5.self_attn.k_proj: 8 -model.layers.5.self_attn.v_proj: 8 -model.layers.5.self_attn.o_proj: 7 -model.layers.5.mlp.gate_proj: 6 -model.layers.5.mlp.up_proj: 6 -model.layers.5.mlp.down_proj: 6 -model.layers.6.self_attn.q_proj: 7 -model.layers.6.self_attn.k_proj: 8 -model.layers.6.self_attn.v_proj: 8 -model.layers.6.self_attn.o_proj: 7 -model.layers.6.mlp.gate_proj: 7 -model.layers.6.mlp.up_proj: 7 -model.layers.6.mlp.down_proj: 6 -model.layers.7.self_attn.q_proj: 8 -model.layers.7.self_attn.k_proj: 8 -model.layers.7.self_attn.v_proj: 8 -model.layers.7.self_attn.o_proj: 7 -model.layers.7.mlp.gate_proj: 6 -model.layers.7.mlp.up_proj: 6 -model.layers.7.mlp.down_proj: 6 -model.layers.8.self_attn.q_proj: 8 -model.layers.8.self_attn.k_proj: 8 -model.layers.8.self_attn.v_proj: 8 -model.layers.8.self_attn.o_proj: 7 -model.layers.8.mlp.gate_proj: 6 -model.layers.8.mlp.up_proj: 6 -model.layers.8.mlp.down_proj: 6 -model.layers.9.self_attn.q_proj: 8 -model.layers.9.self_attn.k_proj: 8 -model.layers.9.self_attn.v_proj: 8 -model.layers.9.self_attn.o_proj: 7 -model.layers.9.mlp.gate_proj: 6 -model.layers.9.mlp.up_proj: 6 -model.layers.9.mlp.down_proj: 6 -model.layers.10.self_attn.q_proj: 8 -model.layers.10.self_attn.k_proj: 8 -model.layers.10.self_attn.v_proj: 8 -model.layers.10.self_attn.o_proj: 7 -model.layers.10.mlp.gate_proj: 6 -model.layers.10.mlp.up_proj: 6 -model.layers.10.mlp.down_proj: 6 -model.layers.11.self_attn.q_proj: 8 -model.layers.11.self_attn.k_proj: 8 -model.layers.11.self_attn.v_proj: 8 -model.layers.11.self_attn.o_proj: 7 -model.layers.11.mlp.gate_proj: 6 -model.layers.11.mlp.up_proj: 6 -model.layers.11.mlp.down_proj: 6 -model.layers.12.self_attn.q_proj: 8 -model.layers.12.self_attn.k_proj: 8 -model.layers.12.self_attn.v_proj: 8 -model.layers.12.self_attn.o_proj: 7 -model.layers.12.mlp.gate_proj: 6 -model.layers.12.mlp.up_proj: 6 -model.layers.12.mlp.down_proj: 6 -model.layers.13.self_attn.q_proj: 8 -model.layers.13.self_attn.k_proj: 8 -model.layers.13.self_attn.v_proj: 8 -model.layers.13.self_attn.o_proj: 7 -model.layers.13.mlp.gate_proj: 6 -model.layers.13.mlp.up_proj: 6 -model.layers.13.mlp.down_proj: 6 -model.layers.14.self_attn.q_proj: 8 -model.layers.14.self_attn.k_proj: 8 -model.layers.14.self_attn.v_proj: 8 -model.layers.14.self_attn.o_proj: 7 -model.layers.14.mlp.gate_proj: 6 -model.layers.14.mlp.up_proj: 6 -model.layers.14.mlp.down_proj: 6 -model.layers.15.self_attn.q_proj: 8 -model.layers.15.self_attn.k_proj: 8 -model.layers.15.self_attn.v_proj: 8 -model.layers.15.self_attn.o_proj: 7 -model.layers.15.mlp.gate_proj: 6 -model.layers.15.mlp.up_proj: 6 -model.layers.15.mlp.down_proj: 6 -model.layers.16.self_attn.q_proj: 8 -model.layers.16.self_attn.k_proj: 8 -model.layers.16.self_attn.v_proj: 8 -model.layers.16.self_attn.o_proj: 7 -model.layers.16.mlp.gate_proj: 6 -model.layers.16.mlp.up_proj: 6 -model.layers.16.mlp.down_proj: 6 -model.layers.17.self_attn.q_proj: 8 -model.layers.17.self_attn.k_proj: 8 -model.layers.17.self_attn.v_proj: 8 -model.layers.17.self_attn.o_proj: 7 -model.layers.17.mlp.gate_proj: 6 -model.layers.17.mlp.up_proj: 6 -model.layers.17.mlp.down_proj: 6 -model.layers.18.self_attn.q_proj: 8 -model.layers.18.self_attn.k_proj: 8 -model.layers.18.self_attn.v_proj: 8 -model.layers.18.self_attn.o_proj: 7 -model.layers.18.mlp.gate_proj: 6 -model.layers.18.mlp.up_proj: 6 -model.layers.18.mlp.down_proj: 6 -model.layers.19.self_attn.q_proj: 8 -model.layers.19.self_attn.k_proj: 8 -model.layers.19.self_attn.v_proj: 8 -model.layers.19.self_attn.o_proj: 7 -model.layers.19.mlp.gate_proj: 6 -model.layers.19.mlp.up_proj: 6 -model.layers.19.mlp.down_proj: 6 -model.layers.20.self_attn.q_proj: 7 -model.layers.20.self_attn.k_proj: 8 -model.layers.20.self_attn.v_proj: 8 -model.layers.20.self_attn.o_proj: 7 -model.layers.20.mlp.gate_proj: 6 -model.layers.20.mlp.up_proj: 6 -model.layers.20.mlp.down_proj: 6 -model.layers.21.self_attn.q_proj: 8 -model.layers.21.self_attn.k_proj: 8 -model.layers.21.self_attn.v_proj: 8 -model.layers.21.self_attn.o_proj: 7 -model.layers.21.mlp.gate_proj: 6 -model.layers.21.mlp.up_proj: 6 -model.layers.21.mlp.down_proj: 6 -model.layers.22.self_attn.q_proj: 7 -model.layers.22.self_attn.k_proj: 8 -model.layers.22.self_attn.v_proj: 8 -model.layers.22.self_attn.o_proj: 7 -model.layers.22.mlp.gate_proj: 6 -model.layers.22.mlp.up_proj: 6 -model.layers.22.mlp.down_proj: 6 -model.layers.23.self_attn.q_proj: 8 -model.layers.23.self_attn.k_proj: 8 -model.layers.23.self_attn.v_proj: 8 -model.layers.23.self_attn.o_proj: 7 -model.layers.23.mlp.gate_proj: 6 -model.layers.23.mlp.up_proj: 6 -model.layers.23.mlp.down_proj: 6 -model.layers.24.self_attn.q_proj: 7 -model.layers.24.self_attn.k_proj: 8 -model.layers.24.self_attn.v_proj: 8 -model.layers.24.self_attn.o_proj: 7 -model.layers.24.mlp.gate_proj: 5 -model.layers.24.mlp.up_proj: 5 -model.layers.24.mlp.down_proj: 5 -model.layers.25.self_attn.q_proj: 8 -model.layers.25.self_attn.k_proj: 8 -model.layers.25.self_attn.v_proj: 8 -model.layers.25.self_attn.o_proj: 7 -model.layers.25.mlp.gate_proj: 6 -model.layers.25.mlp.up_proj: 6 -model.layers.25.mlp.down_proj: 6 -model.layers.26.self_attn.q_proj: 7 -model.layers.26.self_attn.k_proj: 8 -model.layers.26.self_attn.v_proj: 8 -model.layers.26.self_attn.o_proj: 7 -model.layers.26.mlp.gate_proj: 6 -model.layers.26.mlp.up_proj: 6 -model.layers.26.mlp.down_proj: 5 -model.layers.27.self_attn.q_proj: 7 -model.layers.27.self_attn.k_proj: 8 -model.layers.27.self_attn.v_proj: 8 -model.layers.27.self_attn.o_proj: 7 -model.layers.27.mlp.gate_proj: 6 -model.layers.27.mlp.up_proj: 6 -model.layers.27.mlp.down_proj: 5 -model.layers.28.self_attn.q_proj: 7 -model.layers.28.self_attn.k_proj: 8 -model.layers.28.self_attn.v_proj: 8 -model.layers.28.self_attn.o_proj: 7 -model.layers.28.mlp.gate_proj: 6 -model.layers.28.mlp.up_proj: 6 -model.layers.28.mlp.down_proj: 6 -model.layers.29.self_attn.q_proj: 7 -model.layers.29.self_attn.k_proj: 8 -model.layers.29.self_attn.v_proj: 8 -model.layers.29.self_attn.o_proj: 7 -model.layers.29.mlp.gate_proj: 6 -model.layers.29.mlp.up_proj: 6 -model.layers.29.mlp.down_proj: 6 -model.layers.30.self_attn.q_proj: 7 -model.layers.30.self_attn.k_proj: 8 -model.layers.30.self_attn.v_proj: 8 -model.layers.30.self_attn.o_proj: 7 -model.layers.30.mlp.gate_proj: 7 -model.layers.30.mlp.up_proj: 7 -model.layers.30.mlp.down_proj: 6 -model.layers.31.self_attn.q_proj: 7 -model.layers.31.self_attn.k_proj: 8 -model.layers.31.self_attn.v_proj: 8 -model.layers.31.self_attn.o_proj: 7 -model.layers.31.mlp.gate_proj: 8 -model.layers.31.mlp.up_proj: 8 -model.layers.31.mlp.down_proj: 8 diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw6.65_5-6-7-8bit/special_tokens_map.json b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw6.65_5-6-7-8bit/special_tokens_map.json deleted file mode 100644 index b43be96621d147110fb8a18b5776ec6e38516127..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw6.65_5-6-7-8bit/special_tokens_map.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "bos_token": { - "content": "<|begin_of_text|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false - }, - "eos_token": { - "content": "<|eot_id|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false - }, - "pad_token": "<|eot_id|>" -} diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw6.65_5-6-7-8bit/tokenizer.json b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw6.65_5-6-7-8bit/tokenizer.json deleted file mode 100644 index 1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw6.65_5-6-7-8bit/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b -size 17209920 diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw6.65_5-6-7-8bit/tokenizer_config.json b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw6.65_5-6-7-8bit/tokenizer_config.json deleted file mode 100644 index 3beeacc86a6ca3cae14ad3004263ab74a4bac07a..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw6.65_5-6-7-8bit/tokenizer_config.json +++ /dev/null @@ -1,2063 +0,0 @@ -{ - "added_tokens_decoder": { - "128000": { - "content": "<|begin_of_text|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128001": { - "content": "<|end_of_text|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128002": { - "content": "<|reserved_special_token_0|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128003": { - "content": "<|reserved_special_token_1|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128004": { - "content": "<|finetune_right_pad_id|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128005": { - "content": "<|reserved_special_token_2|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128006": { - "content": "<|start_header_id|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128007": { - "content": "<|end_header_id|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128008": { - "content": "<|eom_id|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128009": { - "content": "<|eot_id|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128010": { - "content": "<|python_tag|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128011": { - "content": "<|reserved_special_token_3|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128012": { - "content": "<|reserved_special_token_4|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128013": { - "content": "<|reserved_special_token_5|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128014": { - "content": "<|reserved_special_token_6|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128015": { - "content": "<|reserved_special_token_7|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128016": { - "content": "<|reserved_special_token_8|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128017": { - "content": "<|reserved_special_token_9|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128018": { - "content": "<|reserved_special_token_10|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128019": { - "content": "<|reserved_special_token_11|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128020": { - "content": "<|reserved_special_token_12|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128021": { - "content": "<|reserved_special_token_13|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128022": { - "content": "<|reserved_special_token_14|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128023": { - "content": "<|reserved_special_token_15|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128024": { - "content": "<|reserved_special_token_16|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128025": { - "content": "<|reserved_special_token_17|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128026": { - "content": "<|reserved_special_token_18|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128027": { - "content": "<|reserved_special_token_19|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128028": { - "content": "<|reserved_special_token_20|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128029": { - "content": "<|reserved_special_token_21|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128030": { - "content": "<|reserved_special_token_22|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128031": { - "content": "<|reserved_special_token_23|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128032": { - "content": "<|reserved_special_token_24|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128033": { - "content": "<|reserved_special_token_25|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128034": { - "content": "<|reserved_special_token_26|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128035": { - "content": "<|reserved_special_token_27|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128036": { - "content": "<|reserved_special_token_28|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128037": { - "content": "<|reserved_special_token_29|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128038": { - "content": "<|reserved_special_token_30|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128039": { - "content": "<|reserved_special_token_31|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128040": { - "content": "<|reserved_special_token_32|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128041": { - "content": "<|reserved_special_token_33|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128042": { - "content": "<|reserved_special_token_34|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128043": { - "content": "<|reserved_special_token_35|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128044": { - "content": "<|reserved_special_token_36|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128045": { - "content": "<|reserved_special_token_37|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128046": { - "content": "<|reserved_special_token_38|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128047": { - "content": "<|reserved_special_token_39|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128048": { - "content": "<|reserved_special_token_40|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128049": { - "content": "<|reserved_special_token_41|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128050": { - "content": "<|reserved_special_token_42|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128051": { - "content": "<|reserved_special_token_43|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128052": { - "content": "<|reserved_special_token_44|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128053": { - "content": "<|reserved_special_token_45|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128054": { - "content": "<|reserved_special_token_46|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128055": { - "content": "<|reserved_special_token_47|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128056": { - "content": "<|reserved_special_token_48|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128057": { - "content": "<|reserved_special_token_49|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128058": { - "content": "<|reserved_special_token_50|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128059": { - "content": "<|reserved_special_token_51|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128060": { - "content": "<|reserved_special_token_52|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128061": { - "content": "<|reserved_special_token_53|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128062": { - "content": "<|reserved_special_token_54|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128063": { - "content": "<|reserved_special_token_55|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128064": { - "content": "<|reserved_special_token_56|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128065": { - "content": "<|reserved_special_token_57|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128066": { - "content": "<|reserved_special_token_58|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128067": { - "content": "<|reserved_special_token_59|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128068": { - "content": "<|reserved_special_token_60|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128069": { - "content": "<|reserved_special_token_61|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128070": { - "content": "<|reserved_special_token_62|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128071": { - "content": "<|reserved_special_token_63|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128072": { - "content": "<|reserved_special_token_64|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128073": { - "content": "<|reserved_special_token_65|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128074": { - "content": "<|reserved_special_token_66|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128075": { - "content": "<|reserved_special_token_67|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128076": { - "content": "<|reserved_special_token_68|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128077": { - "content": "<|reserved_special_token_69|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128078": { - "content": "<|reserved_special_token_70|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128079": { - "content": "<|reserved_special_token_71|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128080": { - "content": "<|reserved_special_token_72|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128081": { - "content": "<|reserved_special_token_73|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128082": { - "content": "<|reserved_special_token_74|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128083": { - "content": "<|reserved_special_token_75|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128084": { - "content": "<|reserved_special_token_76|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128085": { - "content": "<|reserved_special_token_77|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128086": { - "content": "<|reserved_special_token_78|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128087": { - "content": "<|reserved_special_token_79|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128088": { - "content": "<|reserved_special_token_80|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128089": { - "content": "<|reserved_special_token_81|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128090": { - "content": "<|reserved_special_token_82|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128091": { - "content": "<|reserved_special_token_83|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128092": { - "content": "<|reserved_special_token_84|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128093": { - "content": "<|reserved_special_token_85|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128094": { - "content": "<|reserved_special_token_86|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128095": { - "content": "<|reserved_special_token_87|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128096": { - "content": "<|reserved_special_token_88|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128097": { - "content": "<|reserved_special_token_89|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128098": { - "content": "<|reserved_special_token_90|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128099": { - "content": "<|reserved_special_token_91|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128100": { - "content": "<|reserved_special_token_92|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128101": { - "content": "<|reserved_special_token_93|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128102": { - "content": "<|reserved_special_token_94|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128103": { - "content": "<|reserved_special_token_95|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128104": { - "content": "<|reserved_special_token_96|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128105": { - "content": "<|reserved_special_token_97|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128106": { - "content": "<|reserved_special_token_98|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128107": { - "content": "<|reserved_special_token_99|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128108": { - "content": "<|reserved_special_token_100|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128109": { - "content": "<|reserved_special_token_101|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128110": { - "content": "<|reserved_special_token_102|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128111": { - "content": "<|reserved_special_token_103|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128112": { - "content": "<|reserved_special_token_104|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128113": { - "content": "<|reserved_special_token_105|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128114": { - "content": "<|reserved_special_token_106|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128115": { - "content": "<|reserved_special_token_107|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128116": { - "content": "<|reserved_special_token_108|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128117": { - "content": "<|reserved_special_token_109|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128118": { - "content": "<|reserved_special_token_110|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128119": { - "content": "<|reserved_special_token_111|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128120": { - "content": "<|reserved_special_token_112|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128121": { - "content": "<|reserved_special_token_113|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128122": { - "content": "<|reserved_special_token_114|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128123": { - "content": "<|reserved_special_token_115|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128124": { - "content": "<|reserved_special_token_116|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128125": { - "content": "<|reserved_special_token_117|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128126": { - "content": "<|reserved_special_token_118|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128127": { - "content": "<|reserved_special_token_119|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128128": { - "content": "<|reserved_special_token_120|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128129": { - "content": "<|reserved_special_token_121|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128130": { - "content": "<|reserved_special_token_122|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128131": { - "content": "<|reserved_special_token_123|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128132": { - "content": "<|reserved_special_token_124|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128133": { - "content": "<|reserved_special_token_125|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128134": { - "content": "<|reserved_special_token_126|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128135": { - "content": "<|reserved_special_token_127|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128136": { - "content": "<|reserved_special_token_128|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128137": { - "content": "<|reserved_special_token_129|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128138": { - "content": "<|reserved_special_token_130|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128139": { - "content": "<|reserved_special_token_131|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128140": { - "content": "<|reserved_special_token_132|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128141": { - "content": "<|reserved_special_token_133|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128142": { - "content": "<|reserved_special_token_134|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128143": { - "content": "<|reserved_special_token_135|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128144": { - "content": "<|reserved_special_token_136|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128145": { - "content": "<|reserved_special_token_137|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128146": { - "content": "<|reserved_special_token_138|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128147": { - "content": "<|reserved_special_token_139|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128148": { - "content": "<|reserved_special_token_140|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128149": { - "content": "<|reserved_special_token_141|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128150": { - "content": "<|reserved_special_token_142|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128151": { - "content": "<|reserved_special_token_143|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128152": { - "content": "<|reserved_special_token_144|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128153": { - "content": "<|reserved_special_token_145|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128154": { - "content": "<|reserved_special_token_146|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128155": { - "content": "<|reserved_special_token_147|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128156": { - "content": "<|reserved_special_token_148|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128157": { - "content": "<|reserved_special_token_149|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128158": { - "content": "<|reserved_special_token_150|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128159": { - "content": "<|reserved_special_token_151|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128160": { - "content": "<|reserved_special_token_152|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128161": { - "content": "<|reserved_special_token_153|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128162": { - "content": "<|reserved_special_token_154|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128163": { - "content": "<|reserved_special_token_155|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128164": { - "content": "<|reserved_special_token_156|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128165": { - "content": "<|reserved_special_token_157|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128166": { - "content": "<|reserved_special_token_158|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128167": { - "content": "<|reserved_special_token_159|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128168": { - "content": "<|reserved_special_token_160|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128169": { - "content": "<|reserved_special_token_161|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128170": { - "content": "<|reserved_special_token_162|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128171": { - "content": "<|reserved_special_token_163|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128172": { - "content": "<|reserved_special_token_164|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128173": { - "content": "<|reserved_special_token_165|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128174": { - "content": "<|reserved_special_token_166|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128175": { - "content": "<|reserved_special_token_167|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128176": { - "content": "<|reserved_special_token_168|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128177": { - "content": "<|reserved_special_token_169|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128178": { - "content": "<|reserved_special_token_170|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128179": { - "content": "<|reserved_special_token_171|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128180": { - "content": "<|reserved_special_token_172|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128181": { - "content": "<|reserved_special_token_173|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128182": { - "content": "<|reserved_special_token_174|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128183": { - "content": "<|reserved_special_token_175|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128184": { - "content": "<|reserved_special_token_176|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128185": { - "content": "<|reserved_special_token_177|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128186": { - "content": "<|reserved_special_token_178|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128187": { - "content": "<|reserved_special_token_179|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128188": { - "content": "<|reserved_special_token_180|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128189": { - "content": "<|reserved_special_token_181|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128190": { - "content": "<|reserved_special_token_182|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128191": { - "content": "<|reserved_special_token_183|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128192": { - "content": "<|reserved_special_token_184|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128193": { - "content": "<|reserved_special_token_185|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128194": { - "content": "<|reserved_special_token_186|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128195": { - "content": "<|reserved_special_token_187|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128196": { - "content": "<|reserved_special_token_188|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128197": { - "content": "<|reserved_special_token_189|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128198": { - "content": "<|reserved_special_token_190|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128199": { - "content": "<|reserved_special_token_191|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128200": { - "content": "<|reserved_special_token_192|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128201": { - "content": "<|reserved_special_token_193|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128202": { - "content": "<|reserved_special_token_194|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128203": { - "content": "<|reserved_special_token_195|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128204": { - "content": "<|reserved_special_token_196|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128205": { - "content": "<|reserved_special_token_197|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128206": { - "content": "<|reserved_special_token_198|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128207": { - "content": "<|reserved_special_token_199|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128208": { - "content": "<|reserved_special_token_200|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128209": { - "content": "<|reserved_special_token_201|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128210": { - "content": "<|reserved_special_token_202|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128211": { - "content": "<|reserved_special_token_203|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128212": { - "content": "<|reserved_special_token_204|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128213": { - "content": "<|reserved_special_token_205|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128214": { - "content": "<|reserved_special_token_206|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128215": { - "content": "<|reserved_special_token_207|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128216": { - "content": "<|reserved_special_token_208|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128217": { - "content": "<|reserved_special_token_209|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128218": { - "content": "<|reserved_special_token_210|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128219": { - "content": "<|reserved_special_token_211|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128220": { - "content": "<|reserved_special_token_212|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128221": { - "content": "<|reserved_special_token_213|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128222": { - "content": "<|reserved_special_token_214|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128223": { - "content": "<|reserved_special_token_215|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128224": { - "content": "<|reserved_special_token_216|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128225": { - "content": "<|reserved_special_token_217|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128226": { - "content": "<|reserved_special_token_218|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128227": { - "content": "<|reserved_special_token_219|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128228": { - "content": "<|reserved_special_token_220|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128229": { - "content": "<|reserved_special_token_221|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128230": { - "content": "<|reserved_special_token_222|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128231": { - "content": "<|reserved_special_token_223|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128232": { - "content": "<|reserved_special_token_224|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128233": { - "content": "<|reserved_special_token_225|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128234": { - "content": "<|reserved_special_token_226|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128235": { - "content": "<|reserved_special_token_227|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128236": { - "content": "<|reserved_special_token_228|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128237": { - "content": "<|reserved_special_token_229|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128238": { - "content": "<|reserved_special_token_230|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128239": { - "content": "<|reserved_special_token_231|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128240": { - "content": "<|reserved_special_token_232|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128241": { - "content": "<|reserved_special_token_233|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128242": { - "content": "<|reserved_special_token_234|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128243": { - "content": "<|reserved_special_token_235|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128244": { - "content": "<|reserved_special_token_236|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128245": { - "content": "<|reserved_special_token_237|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128246": { - "content": "<|reserved_special_token_238|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128247": { - "content": "<|reserved_special_token_239|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128248": { - "content": "<|reserved_special_token_240|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128249": { - "content": "<|reserved_special_token_241|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128250": { - "content": "<|reserved_special_token_242|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128251": { - "content": "<|reserved_special_token_243|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128252": { - "content": "<|reserved_special_token_244|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128253": { - "content": "<|reserved_special_token_245|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128254": { - "content": "<|reserved_special_token_246|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128255": { - "content": "<|reserved_special_token_247|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - } - }, - "bos_token": "<|begin_of_text|>", - "clean_up_tokenization_spaces": true, - "eos_token": "<|eot_id|>", - "extra_special_tokens": {}, - "model_input_names": [ - "input_ids", - "attention_mask" - ], - "model_max_length": 131072, - "pad_token": "<|eot_id|>", - "tokenizer_class": "PreTrainedTokenizerFast" -} diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw7.54_4-8bit/README.md b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw7.54_4-8bit/README.md deleted file mode 100644 index 829f7fae66fb636b1a016da919720f68d947b5ba..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw7.54_4-8bit/README.md +++ /dev/null @@ -1,17 +0,0 @@ -# Quantized Model Checkpoint - -**Base model:** meta-llama/Llama-3.1-8B-Instruct - -**Average bitwidth:** 7.5385 - -**Sensitivity method:** shapley - -**Constraints:** -- max_kl: 0.01 -- min_eap: 0.99 - -**Metrics:** -- predicted_kl: 0.005405 -- predicted_eap: 0.990085 - -See `quantization_config.txt` for full configuration details. diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw7.54_4-8bit/chat_template.jinja b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw7.54_4-8bit/chat_template.jinja deleted file mode 100644 index 33089ace1be88f22a10fe861ad49718d5d886090..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw7.54_4-8bit/chat_template.jinja +++ /dev/null @@ -1,109 +0,0 @@ -{{- bos_token }} -{%- if custom_tools is defined %} - {%- set tools = custom_tools %} -{%- endif %} -{%- if not tools_in_user_message is defined %} - {%- set tools_in_user_message = true %} -{%- endif %} -{%- if not date_string is defined %} - {%- set date_string = "26 Jul 2024" %} -{%- endif %} -{%- if not tools is defined %} - {%- set tools = none %} -{%- endif %} - -{#- This block extracts the system message, so we can slot it into the right place. #} -{%- if messages[0]['role'] == 'system' %} - {%- set system_message = messages[0]['content']|trim %} - {%- set messages = messages[1:] %} -{%- else %} - {%- set system_message = "" %} -{%- endif %} - -{#- System message + builtin tools #} -{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} -{%- if builtin_tools is defined or tools is not none %} - {{- "Environment: ipython\n" }} -{%- endif %} -{%- if builtin_tools is defined %} - {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} -{%- endif %} -{{- "Cutting Knowledge Date: December 2023\n" }} -{{- "Today Date: " + date_string + "\n\n" }} -{%- if tools is not none and not tools_in_user_message %} - {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} - {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} - {{- "Do not use variables.\n\n" }} - {%- for t in tools %} - {{- t | tojson(indent=4) }} - {{- "\n\n" }} - {%- endfor %} -{%- endif %} -{{- system_message }} -{{- "<|eot_id|>" }} - -{#- Custom tools are passed in a user message with some extra guidance #} -{%- if tools_in_user_message and not tools is none %} - {#- Extract the first user message so we can plug it in here #} - {%- if messages | length != 0 %} - {%- set first_user_message = messages[0]['content']|trim %} - {%- set messages = messages[1:] %} - {%- else %} - {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} -{%- endif %} - {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} - {{- "Given the following functions, please respond with a JSON for a function call " }} - {{- "with its proper arguments that best answers the given prompt.\n\n" }} - {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} - {{- "Do not use variables.\n\n" }} - {%- for t in tools %} - {{- t | tojson(indent=4) }} - {{- "\n\n" }} - {%- endfor %} - {{- first_user_message + "<|eot_id|>"}} -{%- endif %} - -{%- for message in messages %} - {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} - {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} - {%- elif 'tool_calls' in message %} - {%- if not message.tool_calls|length == 1 %} - {{- raise_exception("This model only supports single tool-calls at once!") }} - {%- endif %} - {%- set tool_call = message.tool_calls[0].function %} - {%- if builtin_tools is defined and tool_call.name in builtin_tools %} - {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} - {{- "<|python_tag|>" + tool_call.name + ".call(" }} - {%- for arg_name, arg_val in tool_call.arguments | items %} - {{- arg_name + '="' + arg_val + '"' }} - {%- if not loop.last %} - {{- ", " }} - {%- endif %} - {%- endfor %} - {{- ")" }} - {%- else %} - {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} - {{- '{"name": "' + tool_call.name + '", ' }} - {{- '"parameters": ' }} - {{- tool_call.arguments | tojson }} - {{- "}" }} - {%- endif %} - {%- if builtin_tools is defined %} - {#- This means we're in ipython mode #} - {{- "<|eom_id|>" }} - {%- else %} - {{- "<|eot_id|>" }} - {%- endif %} - {%- elif message.role == "tool" or message.role == "ipython" %} - {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} - {%- if message.content is mapping or message.content is iterable %} - {{- message.content | tojson }} - {%- else %} - {{- message.content }} - {%- endif %} - {{- "<|eot_id|>" }} - {%- endif %} -{%- endfor %} -{%- if add_generation_prompt %} - {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} -{%- endif %} diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw7.54_4-8bit/config.json b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw7.54_4-8bit/config.json deleted file mode 100644 index 2bf71dab1ee4525127aba58e9446aa0a1dd046e6..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw7.54_4-8bit/config.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "bos_token_id": 128000, - "dtype": "float16", - "eos_token_id": [ - 128001, - 128008, - 128009 - ], - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "transformers_version": "4.57.3", - "use_cache": false, - "vocab_size": 128256 -} diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw7.54_4-8bit/generation_config.json b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw7.54_4-8bit/generation_config.json deleted file mode 100644 index 993459bf55ed73c1390809c2e2a3d7c1c0e0d844..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw7.54_4-8bit/generation_config.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "bos_token_id": 128000, - "do_sample": true, - "eos_token_id": [ - 128001, - 128008, - 128009 - ], - "temperature": 0.6, - "top_p": 0.9, - "transformers_version": "4.57.3" -} diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw7.54_4-8bit/model-00001-of-00004.safetensors b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw7.54_4-8bit/model-00001-of-00004.safetensors deleted file mode 100644 index df0d353583539d7603192fb456743fd52816e29f..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw7.54_4-8bit/model-00001-of-00004.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:949a717376d70bd7c14cb9f0a04ef3e48fa6c9571a0959dbad1d7816052530b1 -size 4976698592 diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw7.54_4-8bit/model-00002-of-00004.safetensors b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw7.54_4-8bit/model-00002-of-00004.safetensors deleted file mode 100644 index 16b135e4bae98042f193fdd7efbf67531d3075f5..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw7.54_4-8bit/model-00002-of-00004.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0f33b8cada7e12ac207e90dafa27ee6b859f29d88c40984da826204a1091e5b3 -size 4999802616 diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw7.54_4-8bit/model-00003-of-00004.safetensors b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw7.54_4-8bit/model-00003-of-00004.safetensors deleted file mode 100644 index 25f02231992a456aa28f94a4d3bf9bac5cfe039e..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw7.54_4-8bit/model-00003-of-00004.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b7e16163743bc24257c0d41837eeea6ce95d243cfb00ffe7bf0f30f63dc0e6d4 -size 4915916080 diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw7.54_4-8bit/model-00004-of-00004.safetensors b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw7.54_4-8bit/model-00004-of-00004.safetensors deleted file mode 100644 index b09e9cdcb05f5cf77b3ef3a8ffa431eb33ad02b9..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw7.54_4-8bit/model-00004-of-00004.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:44408391c116c33adf6e43ab53f84d75bee5e2956b293c34dc60509fb0fd825b -size 1168138808 diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw7.54_4-8bit/model.safetensors.index.json b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw7.54_4-8bit/model.safetensors.index.json deleted file mode 100644 index 5c64f1e87be95160fabc494eebfa0f7e68064af2..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw7.54_4-8bit/model.safetensors.index.json +++ /dev/null @@ -1,299 +0,0 @@ -{ - "metadata": { - "total_parameters": 8030261248, - "total_size": 16060522496 - }, - "weight_map": { - "lm_head.weight": "model-00004-of-00004.safetensors", - "model.embed_tokens.weight": "model-00001-of-00004.safetensors", - "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors", - "model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", - "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.norm.weight": "model-00004-of-00004.safetensors" - } -} diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw7.54_4-8bit/quantization_config.txt b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw7.54_4-8bit/quantization_config.txt deleted file mode 100644 index 56a43a23955777a601e2bee2441a33d050bba58f..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw7.54_4-8bit/quantization_config.txt +++ /dev/null @@ -1,250 +0,0 @@ -# Model: meta-llama/Llama-3.1-8B-Instruct -# Layer directory: /nfs/scistore19/alistgrp/mhelcig/local/data/search/4_5_6_7_8bit_asym_g128/Llama-3.1-8B-Instruct/6bit/ -# Sensitivity method: shapley -# Estimation method: permutation_separate -# Available bitwidths: [4, 8] -# Bitwidth map: {4: 4.3125, 5: 5.3125, 6: 6.3125, 7: 7.3125, 8: 8.3125} -# -# Mode: predicted_constraint_search -# Constraint max_kl: 0.01 -# Constraint min_eap: 0.99 -# Weights: nll=0.0, kl=0.0, eap=1.0 -# -# Average bitwidth: 7.5385 -# Total params: 6979321856 -# Total bits: 52613349376 -# Predicted KL: 0.005405 -# Predicted EAP: 0.990085 -# Predicted ETL: 0.009915 -# Satisfies constraints: True -# Solver calls: 1 -# Evaluations: 0 -# -# Bitwidth distribution: -# 8-bit: 201 layers (89.7%) -# 4-bit: 23 layers (10.3%) -# -model.layers.0.self_attn.q_proj: 8 -model.layers.0.self_attn.k_proj: 8 -model.layers.0.self_attn.v_proj: 8 -model.layers.0.self_attn.o_proj: 8 -model.layers.0.mlp.gate_proj: 4 -model.layers.0.mlp.up_proj: 4 -model.layers.0.mlp.down_proj: 8 -model.layers.1.self_attn.q_proj: 8 -model.layers.1.self_attn.k_proj: 8 -model.layers.1.self_attn.v_proj: 8 -model.layers.1.self_attn.o_proj: 8 -model.layers.1.mlp.gate_proj: 8 -model.layers.1.mlp.up_proj: 8 -model.layers.1.mlp.down_proj: 8 -model.layers.2.self_attn.q_proj: 8 -model.layers.2.self_attn.k_proj: 8 -model.layers.2.self_attn.v_proj: 8 -model.layers.2.self_attn.o_proj: 8 -model.layers.2.mlp.gate_proj: 8 -model.layers.2.mlp.up_proj: 8 -model.layers.2.mlp.down_proj: 4 -model.layers.3.self_attn.q_proj: 8 -model.layers.3.self_attn.k_proj: 8 -model.layers.3.self_attn.v_proj: 8 -model.layers.3.self_attn.o_proj: 8 -model.layers.3.mlp.gate_proj: 8 -model.layers.3.mlp.up_proj: 8 -model.layers.3.mlp.down_proj: 8 -model.layers.4.self_attn.q_proj: 8 -model.layers.4.self_attn.k_proj: 8 -model.layers.4.self_attn.v_proj: 8 -model.layers.4.self_attn.o_proj: 8 -model.layers.4.mlp.gate_proj: 8 -model.layers.4.mlp.up_proj: 8 -model.layers.4.mlp.down_proj: 8 -model.layers.5.self_attn.q_proj: 8 -model.layers.5.self_attn.k_proj: 8 -model.layers.5.self_attn.v_proj: 8 -model.layers.5.self_attn.o_proj: 8 -model.layers.5.mlp.gate_proj: 8 -model.layers.5.mlp.up_proj: 8 -model.layers.5.mlp.down_proj: 8 -model.layers.6.self_attn.q_proj: 8 -model.layers.6.self_attn.k_proj: 8 -model.layers.6.self_attn.v_proj: 8 -model.layers.6.self_attn.o_proj: 8 -model.layers.6.mlp.gate_proj: 8 -model.layers.6.mlp.up_proj: 8 -model.layers.6.mlp.down_proj: 8 -model.layers.7.self_attn.q_proj: 8 -model.layers.7.self_attn.k_proj: 8 -model.layers.7.self_attn.v_proj: 8 -model.layers.7.self_attn.o_proj: 8 -model.layers.7.mlp.gate_proj: 8 -model.layers.7.mlp.up_proj: 8 -model.layers.7.mlp.down_proj: 4 -model.layers.8.self_attn.q_proj: 8 -model.layers.8.self_attn.k_proj: 8 -model.layers.8.self_attn.v_proj: 8 -model.layers.8.self_attn.o_proj: 8 -model.layers.8.mlp.gate_proj: 8 -model.layers.8.mlp.up_proj: 8 -model.layers.8.mlp.down_proj: 4 -model.layers.9.self_attn.q_proj: 8 -model.layers.9.self_attn.k_proj: 8 -model.layers.9.self_attn.v_proj: 8 -model.layers.9.self_attn.o_proj: 8 -model.layers.9.mlp.gate_proj: 8 -model.layers.9.mlp.up_proj: 8 -model.layers.9.mlp.down_proj: 4 -model.layers.10.self_attn.q_proj: 8 -model.layers.10.self_attn.k_proj: 8 -model.layers.10.self_attn.v_proj: 8 -model.layers.10.self_attn.o_proj: 8 -model.layers.10.mlp.gate_proj: 8 -model.layers.10.mlp.up_proj: 8 -model.layers.10.mlp.down_proj: 4 -model.layers.11.self_attn.q_proj: 8 -model.layers.11.self_attn.k_proj: 8 -model.layers.11.self_attn.v_proj: 8 -model.layers.11.self_attn.o_proj: 8 -model.layers.11.mlp.gate_proj: 8 -model.layers.11.mlp.up_proj: 8 -model.layers.11.mlp.down_proj: 4 -model.layers.12.self_attn.q_proj: 8 -model.layers.12.self_attn.k_proj: 8 -model.layers.12.self_attn.v_proj: 8 -model.layers.12.self_attn.o_proj: 8 -model.layers.12.mlp.gate_proj: 8 -model.layers.12.mlp.up_proj: 8 -model.layers.12.mlp.down_proj: 4 -model.layers.13.self_attn.q_proj: 8 -model.layers.13.self_attn.k_proj: 8 -model.layers.13.self_attn.v_proj: 8 -model.layers.13.self_attn.o_proj: 8 -model.layers.13.mlp.gate_proj: 8 -model.layers.13.mlp.up_proj: 8 -model.layers.13.mlp.down_proj: 8 -model.layers.14.self_attn.q_proj: 8 -model.layers.14.self_attn.k_proj: 8 -model.layers.14.self_attn.v_proj: 8 -model.layers.14.self_attn.o_proj: 8 -model.layers.14.mlp.gate_proj: 8 -model.layers.14.mlp.up_proj: 8 -model.layers.14.mlp.down_proj: 8 -model.layers.15.self_attn.q_proj: 8 -model.layers.15.self_attn.k_proj: 8 -model.layers.15.self_attn.v_proj: 8 -model.layers.15.self_attn.o_proj: 8 -model.layers.15.mlp.gate_proj: 8 -model.layers.15.mlp.up_proj: 8 -model.layers.15.mlp.down_proj: 8 -model.layers.16.self_attn.q_proj: 8 -model.layers.16.self_attn.k_proj: 8 -model.layers.16.self_attn.v_proj: 8 -model.layers.16.self_attn.o_proj: 8 -model.layers.16.mlp.gate_proj: 8 -model.layers.16.mlp.up_proj: 8 -model.layers.16.mlp.down_proj: 8 -model.layers.17.self_attn.q_proj: 8 -model.layers.17.self_attn.k_proj: 8 -model.layers.17.self_attn.v_proj: 8 -model.layers.17.self_attn.o_proj: 8 -model.layers.17.mlp.gate_proj: 8 -model.layers.17.mlp.up_proj: 8 -model.layers.17.mlp.down_proj: 8 -model.layers.18.self_attn.q_proj: 8 -model.layers.18.self_attn.k_proj: 8 -model.layers.18.self_attn.v_proj: 8 -model.layers.18.self_attn.o_proj: 8 -model.layers.18.mlp.gate_proj: 8 -model.layers.18.mlp.up_proj: 8 -model.layers.18.mlp.down_proj: 8 -model.layers.19.self_attn.q_proj: 8 -model.layers.19.self_attn.k_proj: 8 -model.layers.19.self_attn.v_proj: 8 -model.layers.19.self_attn.o_proj: 8 -model.layers.19.mlp.gate_proj: 8 -model.layers.19.mlp.up_proj: 8 -model.layers.19.mlp.down_proj: 4 -model.layers.20.self_attn.q_proj: 8 -model.layers.20.self_attn.k_proj: 8 -model.layers.20.self_attn.v_proj: 8 -model.layers.20.self_attn.o_proj: 8 -model.layers.20.mlp.gate_proj: 8 -model.layers.20.mlp.up_proj: 8 -model.layers.20.mlp.down_proj: 8 -model.layers.21.self_attn.q_proj: 8 -model.layers.21.self_attn.k_proj: 8 -model.layers.21.self_attn.v_proj: 8 -model.layers.21.self_attn.o_proj: 8 -model.layers.21.mlp.gate_proj: 4 -model.layers.21.mlp.up_proj: 4 -model.layers.21.mlp.down_proj: 8 -model.layers.22.self_attn.q_proj: 8 -model.layers.22.self_attn.k_proj: 8 -model.layers.22.self_attn.v_proj: 8 -model.layers.22.self_attn.o_proj: 8 -model.layers.22.mlp.gate_proj: 8 -model.layers.22.mlp.up_proj: 8 -model.layers.22.mlp.down_proj: 4 -model.layers.23.self_attn.q_proj: 8 -model.layers.23.self_attn.k_proj: 8 -model.layers.23.self_attn.v_proj: 8 -model.layers.23.self_attn.o_proj: 8 -model.layers.23.mlp.gate_proj: 8 -model.layers.23.mlp.up_proj: 8 -model.layers.23.mlp.down_proj: 4 -model.layers.24.self_attn.q_proj: 8 -model.layers.24.self_attn.k_proj: 8 -model.layers.24.self_attn.v_proj: 8 -model.layers.24.self_attn.o_proj: 8 -model.layers.24.mlp.gate_proj: 8 -model.layers.24.mlp.up_proj: 8 -model.layers.24.mlp.down_proj: 4 -model.layers.25.self_attn.q_proj: 8 -model.layers.25.self_attn.k_proj: 8 -model.layers.25.self_attn.v_proj: 8 -model.layers.25.self_attn.o_proj: 8 -model.layers.25.mlp.gate_proj: 8 -model.layers.25.mlp.up_proj: 8 -model.layers.25.mlp.down_proj: 4 -model.layers.26.self_attn.q_proj: 8 -model.layers.26.self_attn.k_proj: 8 -model.layers.26.self_attn.v_proj: 8 -model.layers.26.self_attn.o_proj: 8 -model.layers.26.mlp.gate_proj: 8 -model.layers.26.mlp.up_proj: 4 -model.layers.26.mlp.down_proj: 4 -model.layers.27.self_attn.q_proj: 8 -model.layers.27.self_attn.k_proj: 8 -model.layers.27.self_attn.v_proj: 8 -model.layers.27.self_attn.o_proj: 8 -model.layers.27.mlp.gate_proj: 8 -model.layers.27.mlp.up_proj: 8 -model.layers.27.mlp.down_proj: 4 -model.layers.28.self_attn.q_proj: 8 -model.layers.28.self_attn.k_proj: 8 -model.layers.28.self_attn.v_proj: 8 -model.layers.28.self_attn.o_proj: 8 -model.layers.28.mlp.gate_proj: 4 -model.layers.28.mlp.up_proj: 4 -model.layers.28.mlp.down_proj: 4 -model.layers.29.self_attn.q_proj: 8 -model.layers.29.self_attn.k_proj: 8 -model.layers.29.self_attn.v_proj: 8 -model.layers.29.self_attn.o_proj: 8 -model.layers.29.mlp.gate_proj: 8 -model.layers.29.mlp.up_proj: 8 -model.layers.29.mlp.down_proj: 4 -model.layers.30.self_attn.q_proj: 8 -model.layers.30.self_attn.k_proj: 8 -model.layers.30.self_attn.v_proj: 8 -model.layers.30.self_attn.o_proj: 8 -model.layers.30.mlp.gate_proj: 8 -model.layers.30.mlp.up_proj: 8 -model.layers.30.mlp.down_proj: 8 -model.layers.31.self_attn.q_proj: 8 -model.layers.31.self_attn.k_proj: 8 -model.layers.31.self_attn.v_proj: 8 -model.layers.31.self_attn.o_proj: 8 -model.layers.31.mlp.gate_proj: 8 -model.layers.31.mlp.up_proj: 8 -model.layers.31.mlp.down_proj: 8 diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw7.54_4-8bit/special_tokens_map.json b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw7.54_4-8bit/special_tokens_map.json deleted file mode 100644 index b43be96621d147110fb8a18b5776ec6e38516127..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw7.54_4-8bit/special_tokens_map.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "bos_token": { - "content": "<|begin_of_text|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false - }, - "eos_token": { - "content": "<|eot_id|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false - }, - "pad_token": "<|eot_id|>" -} diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw7.54_4-8bit/tokenizer.json b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw7.54_4-8bit/tokenizer.json deleted file mode 100644 index 1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw7.54_4-8bit/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b -size 17209920 diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw7.54_4-8bit/tokenizer_config.json b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw7.54_4-8bit/tokenizer_config.json deleted file mode 100644 index 3beeacc86a6ca3cae14ad3004263ab74a4bac07a..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_eap0.99_sha_bw7.54_4-8bit/tokenizer_config.json +++ /dev/null @@ -1,2063 +0,0 @@ -{ - "added_tokens_decoder": { - "128000": { - "content": "<|begin_of_text|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128001": { - "content": "<|end_of_text|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128002": { - "content": "<|reserved_special_token_0|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128003": { - "content": "<|reserved_special_token_1|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128004": { - "content": "<|finetune_right_pad_id|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128005": { - "content": "<|reserved_special_token_2|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128006": { - "content": "<|start_header_id|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128007": { - "content": "<|end_header_id|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128008": { - "content": "<|eom_id|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128009": { - "content": "<|eot_id|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128010": { - "content": "<|python_tag|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128011": { - "content": "<|reserved_special_token_3|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128012": { - "content": "<|reserved_special_token_4|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128013": { - "content": "<|reserved_special_token_5|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128014": { - "content": "<|reserved_special_token_6|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128015": { - "content": "<|reserved_special_token_7|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128016": { - "content": "<|reserved_special_token_8|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128017": { - "content": "<|reserved_special_token_9|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128018": { - "content": "<|reserved_special_token_10|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128019": { - "content": "<|reserved_special_token_11|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128020": { - "content": "<|reserved_special_token_12|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128021": { - "content": "<|reserved_special_token_13|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128022": { - "content": "<|reserved_special_token_14|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128023": { - "content": "<|reserved_special_token_15|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128024": { - "content": "<|reserved_special_token_16|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128025": { - "content": "<|reserved_special_token_17|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128026": { - "content": "<|reserved_special_token_18|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128027": { - "content": "<|reserved_special_token_19|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128028": { - "content": "<|reserved_special_token_20|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128029": { - "content": "<|reserved_special_token_21|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128030": { - "content": "<|reserved_special_token_22|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128031": { - "content": "<|reserved_special_token_23|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128032": { - "content": "<|reserved_special_token_24|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128033": { - "content": "<|reserved_special_token_25|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128034": { - "content": "<|reserved_special_token_26|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128035": { - "content": "<|reserved_special_token_27|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128036": { - "content": "<|reserved_special_token_28|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128037": { - "content": "<|reserved_special_token_29|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128038": { - "content": "<|reserved_special_token_30|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128039": { - "content": "<|reserved_special_token_31|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128040": { - "content": "<|reserved_special_token_32|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128041": { - "content": "<|reserved_special_token_33|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128042": { - "content": "<|reserved_special_token_34|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128043": { - "content": "<|reserved_special_token_35|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128044": { - "content": "<|reserved_special_token_36|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128045": { - "content": "<|reserved_special_token_37|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128046": { - "content": "<|reserved_special_token_38|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128047": { - "content": "<|reserved_special_token_39|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128048": { - "content": "<|reserved_special_token_40|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128049": { - "content": "<|reserved_special_token_41|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128050": { - "content": "<|reserved_special_token_42|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128051": { - "content": "<|reserved_special_token_43|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128052": { - "content": "<|reserved_special_token_44|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128053": { - "content": "<|reserved_special_token_45|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128054": { - "content": "<|reserved_special_token_46|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128055": { - "content": "<|reserved_special_token_47|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128056": { - "content": "<|reserved_special_token_48|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128057": { - "content": "<|reserved_special_token_49|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128058": { - "content": "<|reserved_special_token_50|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128059": { - "content": "<|reserved_special_token_51|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128060": { - "content": "<|reserved_special_token_52|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128061": { - "content": "<|reserved_special_token_53|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128062": { - "content": "<|reserved_special_token_54|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128063": { - "content": "<|reserved_special_token_55|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128064": { - "content": "<|reserved_special_token_56|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128065": { - "content": "<|reserved_special_token_57|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128066": { - "content": "<|reserved_special_token_58|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128067": { - "content": "<|reserved_special_token_59|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128068": { - "content": "<|reserved_special_token_60|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128069": { - "content": "<|reserved_special_token_61|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128070": { - "content": "<|reserved_special_token_62|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128071": { - "content": "<|reserved_special_token_63|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128072": { - "content": "<|reserved_special_token_64|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128073": { - "content": "<|reserved_special_token_65|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128074": { - "content": "<|reserved_special_token_66|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128075": { - "content": "<|reserved_special_token_67|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128076": { - "content": "<|reserved_special_token_68|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128077": { - "content": "<|reserved_special_token_69|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128078": { - "content": "<|reserved_special_token_70|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128079": { - "content": "<|reserved_special_token_71|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128080": { - "content": "<|reserved_special_token_72|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128081": { - "content": "<|reserved_special_token_73|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128082": { - "content": "<|reserved_special_token_74|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128083": { - "content": "<|reserved_special_token_75|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128084": { - "content": "<|reserved_special_token_76|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128085": { - "content": "<|reserved_special_token_77|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128086": { - "content": "<|reserved_special_token_78|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128087": { - "content": "<|reserved_special_token_79|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128088": { - "content": "<|reserved_special_token_80|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128089": { - "content": "<|reserved_special_token_81|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128090": { - "content": "<|reserved_special_token_82|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128091": { - "content": "<|reserved_special_token_83|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128092": { - "content": "<|reserved_special_token_84|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128093": { - "content": "<|reserved_special_token_85|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128094": { - "content": "<|reserved_special_token_86|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128095": { - "content": "<|reserved_special_token_87|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128096": { - "content": "<|reserved_special_token_88|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128097": { - "content": "<|reserved_special_token_89|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128098": { - "content": "<|reserved_special_token_90|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128099": { - "content": "<|reserved_special_token_91|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128100": { - "content": "<|reserved_special_token_92|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128101": { - "content": "<|reserved_special_token_93|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128102": { - "content": "<|reserved_special_token_94|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128103": { - "content": "<|reserved_special_token_95|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128104": { - "content": "<|reserved_special_token_96|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128105": { - "content": "<|reserved_special_token_97|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128106": { - "content": "<|reserved_special_token_98|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128107": { - "content": "<|reserved_special_token_99|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128108": { - "content": "<|reserved_special_token_100|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128109": { - "content": "<|reserved_special_token_101|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128110": { - "content": "<|reserved_special_token_102|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128111": { - "content": "<|reserved_special_token_103|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128112": { - "content": "<|reserved_special_token_104|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128113": { - "content": "<|reserved_special_token_105|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128114": { - "content": "<|reserved_special_token_106|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128115": { - "content": "<|reserved_special_token_107|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128116": { - "content": "<|reserved_special_token_108|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128117": { - "content": "<|reserved_special_token_109|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128118": { - "content": "<|reserved_special_token_110|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128119": { - "content": "<|reserved_special_token_111|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128120": { - "content": "<|reserved_special_token_112|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128121": { - "content": "<|reserved_special_token_113|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128122": { - "content": "<|reserved_special_token_114|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128123": { - "content": "<|reserved_special_token_115|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128124": { - "content": "<|reserved_special_token_116|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128125": { - "content": "<|reserved_special_token_117|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128126": { - "content": "<|reserved_special_token_118|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128127": { - "content": "<|reserved_special_token_119|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128128": { - "content": "<|reserved_special_token_120|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128129": { - "content": "<|reserved_special_token_121|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128130": { - "content": "<|reserved_special_token_122|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128131": { - "content": "<|reserved_special_token_123|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128132": { - "content": "<|reserved_special_token_124|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128133": { - "content": "<|reserved_special_token_125|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128134": { - "content": "<|reserved_special_token_126|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128135": { - "content": "<|reserved_special_token_127|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128136": { - "content": "<|reserved_special_token_128|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128137": { - "content": "<|reserved_special_token_129|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128138": { - "content": "<|reserved_special_token_130|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128139": { - "content": "<|reserved_special_token_131|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128140": { - "content": "<|reserved_special_token_132|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128141": { - "content": "<|reserved_special_token_133|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128142": { - "content": "<|reserved_special_token_134|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128143": { - "content": "<|reserved_special_token_135|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128144": { - "content": "<|reserved_special_token_136|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128145": { - "content": "<|reserved_special_token_137|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128146": { - "content": "<|reserved_special_token_138|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128147": { - "content": "<|reserved_special_token_139|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128148": { - "content": "<|reserved_special_token_140|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128149": { - "content": "<|reserved_special_token_141|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128150": { - "content": "<|reserved_special_token_142|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128151": { - "content": "<|reserved_special_token_143|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128152": { - "content": "<|reserved_special_token_144|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128153": { - "content": "<|reserved_special_token_145|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128154": { - "content": "<|reserved_special_token_146|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128155": { - "content": "<|reserved_special_token_147|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128156": { - "content": "<|reserved_special_token_148|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128157": { - "content": "<|reserved_special_token_149|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128158": { - "content": "<|reserved_special_token_150|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128159": { - "content": "<|reserved_special_token_151|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128160": { - "content": "<|reserved_special_token_152|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128161": { - "content": "<|reserved_special_token_153|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128162": { - "content": "<|reserved_special_token_154|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128163": { - "content": "<|reserved_special_token_155|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128164": { - "content": "<|reserved_special_token_156|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128165": { - "content": "<|reserved_special_token_157|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128166": { - "content": "<|reserved_special_token_158|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128167": { - "content": "<|reserved_special_token_159|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128168": { - "content": "<|reserved_special_token_160|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128169": { - "content": "<|reserved_special_token_161|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128170": { - "content": "<|reserved_special_token_162|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128171": { - "content": "<|reserved_special_token_163|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128172": { - "content": "<|reserved_special_token_164|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128173": { - "content": "<|reserved_special_token_165|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128174": { - "content": "<|reserved_special_token_166|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128175": { - "content": "<|reserved_special_token_167|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128176": { - "content": "<|reserved_special_token_168|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128177": { - "content": "<|reserved_special_token_169|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128178": { - "content": "<|reserved_special_token_170|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128179": { - "content": "<|reserved_special_token_171|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128180": { - "content": "<|reserved_special_token_172|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128181": { - "content": "<|reserved_special_token_173|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128182": { - "content": "<|reserved_special_token_174|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128183": { - "content": "<|reserved_special_token_175|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128184": { - "content": "<|reserved_special_token_176|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128185": { - "content": "<|reserved_special_token_177|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128186": { - "content": "<|reserved_special_token_178|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128187": { - "content": "<|reserved_special_token_179|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128188": { - "content": "<|reserved_special_token_180|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128189": { - "content": "<|reserved_special_token_181|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128190": { - "content": "<|reserved_special_token_182|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128191": { - "content": "<|reserved_special_token_183|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128192": { - "content": "<|reserved_special_token_184|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128193": { - "content": "<|reserved_special_token_185|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128194": { - "content": "<|reserved_special_token_186|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128195": { - "content": "<|reserved_special_token_187|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128196": { - "content": "<|reserved_special_token_188|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128197": { - "content": "<|reserved_special_token_189|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128198": { - "content": "<|reserved_special_token_190|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128199": { - "content": "<|reserved_special_token_191|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128200": { - "content": "<|reserved_special_token_192|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128201": { - "content": "<|reserved_special_token_193|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128202": { - "content": "<|reserved_special_token_194|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128203": { - "content": "<|reserved_special_token_195|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128204": { - "content": "<|reserved_special_token_196|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128205": { - "content": "<|reserved_special_token_197|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128206": { - "content": "<|reserved_special_token_198|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128207": { - "content": "<|reserved_special_token_199|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128208": { - "content": "<|reserved_special_token_200|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128209": { - "content": "<|reserved_special_token_201|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128210": { - "content": "<|reserved_special_token_202|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128211": { - "content": "<|reserved_special_token_203|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128212": { - "content": "<|reserved_special_token_204|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128213": { - "content": "<|reserved_special_token_205|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128214": { - "content": "<|reserved_special_token_206|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128215": { - "content": "<|reserved_special_token_207|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128216": { - "content": "<|reserved_special_token_208|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128217": { - "content": "<|reserved_special_token_209|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128218": { - "content": "<|reserved_special_token_210|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128219": { - "content": "<|reserved_special_token_211|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128220": { - "content": "<|reserved_special_token_212|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128221": { - "content": "<|reserved_special_token_213|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128222": { - "content": "<|reserved_special_token_214|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128223": { - "content": "<|reserved_special_token_215|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128224": { - "content": "<|reserved_special_token_216|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128225": { - "content": "<|reserved_special_token_217|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128226": { - "content": "<|reserved_special_token_218|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128227": { - "content": "<|reserved_special_token_219|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128228": { - "content": "<|reserved_special_token_220|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128229": { - "content": "<|reserved_special_token_221|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128230": { - "content": "<|reserved_special_token_222|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128231": { - "content": "<|reserved_special_token_223|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128232": { - "content": "<|reserved_special_token_224|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128233": { - "content": "<|reserved_special_token_225|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128234": { - "content": "<|reserved_special_token_226|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128235": { - "content": "<|reserved_special_token_227|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128236": { - "content": "<|reserved_special_token_228|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128237": { - "content": "<|reserved_special_token_229|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128238": { - "content": "<|reserved_special_token_230|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128239": { - "content": "<|reserved_special_token_231|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128240": { - "content": "<|reserved_special_token_232|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128241": { - "content": "<|reserved_special_token_233|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128242": { - "content": "<|reserved_special_token_234|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128243": { - "content": "<|reserved_special_token_235|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128244": { - "content": "<|reserved_special_token_236|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128245": { - "content": "<|reserved_special_token_237|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128246": { - "content": "<|reserved_special_token_238|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128247": { - "content": "<|reserved_special_token_239|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128248": { - "content": "<|reserved_special_token_240|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128249": { - "content": "<|reserved_special_token_241|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128250": { - "content": "<|reserved_special_token_242|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128251": { - "content": "<|reserved_special_token_243|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128252": { - "content": "<|reserved_special_token_244|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128253": { - "content": "<|reserved_special_token_245|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128254": { - "content": "<|reserved_special_token_246|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128255": { - "content": "<|reserved_special_token_247|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - } - }, - "bos_token": "<|begin_of_text|>", - "clean_up_tokenization_spaces": true, - "eos_token": "<|eot_id|>", - "extra_special_tokens": {}, - "model_input_names": [ - "input_ids", - "attention_mask" - ], - "model_max_length": 131072, - "pad_token": "<|eot_id|>", - "tokenizer_class": "PreTrainedTokenizerFast" -} diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw6.00_5-6-7-8bit/README.md b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw6.00_5-6-7-8bit/README.md deleted file mode 100644 index 9dcded20fbdfcc31c260b00acd305909032b1111..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw6.00_5-6-7-8bit/README.md +++ /dev/null @@ -1,16 +0,0 @@ -# Quantized Model Checkpoint - -**Base model:** meta-llama/Llama-3.1-8B-Instruct - -**Average bitwidth:** 6.0048 - -**Sensitivity method:** linear - -**Constraints:** -- max_kl: 0.01 - -**Metrics:** -- predicted_kl: 0.009996 -- predicted_eap: 0.610965 - -See `quantization_config.txt` for full configuration details. diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw6.00_5-6-7-8bit/chat_template.jinja b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw6.00_5-6-7-8bit/chat_template.jinja deleted file mode 100644 index 33089ace1be88f22a10fe861ad49718d5d886090..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw6.00_5-6-7-8bit/chat_template.jinja +++ /dev/null @@ -1,109 +0,0 @@ -{{- bos_token }} -{%- if custom_tools is defined %} - {%- set tools = custom_tools %} -{%- endif %} -{%- if not tools_in_user_message is defined %} - {%- set tools_in_user_message = true %} -{%- endif %} -{%- if not date_string is defined %} - {%- set date_string = "26 Jul 2024" %} -{%- endif %} -{%- if not tools is defined %} - {%- set tools = none %} -{%- endif %} - -{#- This block extracts the system message, so we can slot it into the right place. #} -{%- if messages[0]['role'] == 'system' %} - {%- set system_message = messages[0]['content']|trim %} - {%- set messages = messages[1:] %} -{%- else %} - {%- set system_message = "" %} -{%- endif %} - -{#- System message + builtin tools #} -{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} -{%- if builtin_tools is defined or tools is not none %} - {{- "Environment: ipython\n" }} -{%- endif %} -{%- if builtin_tools is defined %} - {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} -{%- endif %} -{{- "Cutting Knowledge Date: December 2023\n" }} -{{- "Today Date: " + date_string + "\n\n" }} -{%- if tools is not none and not tools_in_user_message %} - {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} - {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} - {{- "Do not use variables.\n\n" }} - {%- for t in tools %} - {{- t | tojson(indent=4) }} - {{- "\n\n" }} - {%- endfor %} -{%- endif %} -{{- system_message }} -{{- "<|eot_id|>" }} - -{#- Custom tools are passed in a user message with some extra guidance #} -{%- if tools_in_user_message and not tools is none %} - {#- Extract the first user message so we can plug it in here #} - {%- if messages | length != 0 %} - {%- set first_user_message = messages[0]['content']|trim %} - {%- set messages = messages[1:] %} - {%- else %} - {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} -{%- endif %} - {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} - {{- "Given the following functions, please respond with a JSON for a function call " }} - {{- "with its proper arguments that best answers the given prompt.\n\n" }} - {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} - {{- "Do not use variables.\n\n" }} - {%- for t in tools %} - {{- t | tojson(indent=4) }} - {{- "\n\n" }} - {%- endfor %} - {{- first_user_message + "<|eot_id|>"}} -{%- endif %} - -{%- for message in messages %} - {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} - {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} - {%- elif 'tool_calls' in message %} - {%- if not message.tool_calls|length == 1 %} - {{- raise_exception("This model only supports single tool-calls at once!") }} - {%- endif %} - {%- set tool_call = message.tool_calls[0].function %} - {%- if builtin_tools is defined and tool_call.name in builtin_tools %} - {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} - {{- "<|python_tag|>" + tool_call.name + ".call(" }} - {%- for arg_name, arg_val in tool_call.arguments | items %} - {{- arg_name + '="' + arg_val + '"' }} - {%- if not loop.last %} - {{- ", " }} - {%- endif %} - {%- endfor %} - {{- ")" }} - {%- else %} - {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} - {{- '{"name": "' + tool_call.name + '", ' }} - {{- '"parameters": ' }} - {{- tool_call.arguments | tojson }} - {{- "}" }} - {%- endif %} - {%- if builtin_tools is defined %} - {#- This means we're in ipython mode #} - {{- "<|eom_id|>" }} - {%- else %} - {{- "<|eot_id|>" }} - {%- endif %} - {%- elif message.role == "tool" or message.role == "ipython" %} - {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} - {%- if message.content is mapping or message.content is iterable %} - {{- message.content | tojson }} - {%- else %} - {{- message.content }} - {%- endif %} - {{- "<|eot_id|>" }} - {%- endif %} -{%- endfor %} -{%- if add_generation_prompt %} - {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} -{%- endif %} diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw6.00_5-6-7-8bit/config.json b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw6.00_5-6-7-8bit/config.json deleted file mode 100644 index 2bf71dab1ee4525127aba58e9446aa0a1dd046e6..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw6.00_5-6-7-8bit/config.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "bos_token_id": 128000, - "dtype": "float16", - "eos_token_id": [ - 128001, - 128008, - 128009 - ], - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "transformers_version": "4.57.3", - "use_cache": false, - "vocab_size": 128256 -} diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw6.00_5-6-7-8bit/generation_config.json b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw6.00_5-6-7-8bit/generation_config.json deleted file mode 100644 index 993459bf55ed73c1390809c2e2a3d7c1c0e0d844..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw6.00_5-6-7-8bit/generation_config.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "bos_token_id": 128000, - "do_sample": true, - "eos_token_id": [ - 128001, - 128008, - 128009 - ], - "temperature": 0.6, - "top_p": 0.9, - "transformers_version": "4.57.3" -} diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw6.00_5-6-7-8bit/model-00001-of-00004.safetensors b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw6.00_5-6-7-8bit/model-00001-of-00004.safetensors deleted file mode 100644 index 4289216f8defce21d170608ac8173e619c97a35b..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw6.00_5-6-7-8bit/model-00001-of-00004.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3985a3e4bcb7e75b508abfc96ecc9115048de1eb13af2279677ffd6668e624da -size 4976698592 diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw6.00_5-6-7-8bit/model-00002-of-00004.safetensors b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw6.00_5-6-7-8bit/model-00002-of-00004.safetensors deleted file mode 100644 index cc90509b416db893fae4c2223b06b9845246dbe7..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw6.00_5-6-7-8bit/model-00002-of-00004.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:daeb897ec2af846c6773c09df9b8620424cb10617f1b7a6df335c61b85b97be1 -size 4999802616 diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw6.00_5-6-7-8bit/model-00003-of-00004.safetensors b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw6.00_5-6-7-8bit/model-00003-of-00004.safetensors deleted file mode 100644 index 787c5fdaf0be4e6e87a7cfc0dcef2576afc9b814..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw6.00_5-6-7-8bit/model-00003-of-00004.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ff8bc5257342fd73a5926861e1964b59e0953d898180078409bb3dd1ac53ba61 -size 4915916080 diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw6.00_5-6-7-8bit/model-00004-of-00004.safetensors b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw6.00_5-6-7-8bit/model-00004-of-00004.safetensors deleted file mode 100644 index 976b1ea5acdcdffad7522222a9bac6b83645c36c..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw6.00_5-6-7-8bit/model-00004-of-00004.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:41b6804c14c14e5bc8c6d95f57bc69272bacb1000108ec3d76ccdf601f1b08f8 -size 1168138808 diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw6.00_5-6-7-8bit/model.safetensors.index.json b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw6.00_5-6-7-8bit/model.safetensors.index.json deleted file mode 100644 index 5c64f1e87be95160fabc494eebfa0f7e68064af2..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw6.00_5-6-7-8bit/model.safetensors.index.json +++ /dev/null @@ -1,299 +0,0 @@ -{ - "metadata": { - "total_parameters": 8030261248, - "total_size": 16060522496 - }, - "weight_map": { - "lm_head.weight": "model-00004-of-00004.safetensors", - "model.embed_tokens.weight": "model-00001-of-00004.safetensors", - "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors", - "model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", - "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.norm.weight": "model-00004-of-00004.safetensors" - } -} diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw6.00_5-6-7-8bit/quantization_config.txt b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw6.00_5-6-7-8bit/quantization_config.txt deleted file mode 100644 index 12a50a7e88e883cc2f631b0b0994e90813319b2a..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw6.00_5-6-7-8bit/quantization_config.txt +++ /dev/null @@ -1,251 +0,0 @@ -# Model: meta-llama/Llama-3.1-8B-Instruct -# Layer directory: /nfs/scistore19/alistgrp/mhelcig/local/data/search/4_5_6_7_8bit_asym_g128/Llama-3.1-8B-Instruct/6bit/ -# Sensitivity method: linear -# Estimation method: linear -# Available bitwidths: [4, 5, 6, 7, 8] -# Bitwidth map: {4: 4.3125, 5: 5.3125, 6: 6.3125, 7: 7.3125, 8: 8.3125} -# -# Mode: predicted_constraint_search -# Constraint max_kl: 0.01 -# Weights: nll=0.0, kl=0.0, eap=1.0 -# -# Average bitwidth: 6.0048 -# Total params: 6979321856 -# Total bits: 41909485568 -# Predicted KL: 0.009996 -# Predicted EAP: 0.610965 -# Predicted ETL: 0.389035 -# Satisfies constraints: True -# Solver calls: 1 -# Evaluations: 0 -# -# Bitwidth distribution: -# 8-bit: 26 layers (11.6%) -# 7-bit: 55 layers (24.6%) -# 6-bit: 84 layers (37.5%) -# 5-bit: 59 layers (26.3%) -# -model.layers.0.self_attn.q_proj: 8 -model.layers.0.self_attn.k_proj: 8 -model.layers.0.self_attn.v_proj: 8 -model.layers.0.self_attn.o_proj: 6 -model.layers.0.mlp.gate_proj: 5 -model.layers.0.mlp.up_proj: 5 -model.layers.0.mlp.down_proj: 6 -model.layers.1.self_attn.q_proj: 7 -model.layers.1.self_attn.k_proj: 8 -model.layers.1.self_attn.v_proj: 8 -model.layers.1.self_attn.o_proj: 6 -model.layers.1.mlp.gate_proj: 5 -model.layers.1.mlp.up_proj: 5 -model.layers.1.mlp.down_proj: 8 -model.layers.2.self_attn.q_proj: 7 -model.layers.2.self_attn.k_proj: 8 -model.layers.2.self_attn.v_proj: 8 -model.layers.2.self_attn.o_proj: 6 -model.layers.2.mlp.gate_proj: 6 -model.layers.2.mlp.up_proj: 6 -model.layers.2.mlp.down_proj: 5 -model.layers.3.self_attn.q_proj: 7 -model.layers.3.self_attn.k_proj: 8 -model.layers.3.self_attn.v_proj: 8 -model.layers.3.self_attn.o_proj: 6 -model.layers.3.mlp.gate_proj: 6 -model.layers.3.mlp.up_proj: 6 -model.layers.3.mlp.down_proj: 5 -model.layers.4.self_attn.q_proj: 7 -model.layers.4.self_attn.k_proj: 8 -model.layers.4.self_attn.v_proj: 8 -model.layers.4.self_attn.o_proj: 6 -model.layers.4.mlp.gate_proj: 6 -model.layers.4.mlp.up_proj: 6 -model.layers.4.mlp.down_proj: 5 -model.layers.5.self_attn.q_proj: 6 -model.layers.5.self_attn.k_proj: 7 -model.layers.5.self_attn.v_proj: 7 -model.layers.5.self_attn.o_proj: 6 -model.layers.5.mlp.gate_proj: 6 -model.layers.5.mlp.up_proj: 6 -model.layers.5.mlp.down_proj: 5 -model.layers.6.self_attn.q_proj: 6 -model.layers.6.self_attn.k_proj: 7 -model.layers.6.self_attn.v_proj: 7 -model.layers.6.self_attn.o_proj: 6 -model.layers.6.mlp.gate_proj: 6 -model.layers.6.mlp.up_proj: 6 -model.layers.6.mlp.down_proj: 5 -model.layers.7.self_attn.q_proj: 6 -model.layers.7.self_attn.k_proj: 7 -model.layers.7.self_attn.v_proj: 7 -model.layers.7.self_attn.o_proj: 6 -model.layers.7.mlp.gate_proj: 6 -model.layers.7.mlp.up_proj: 6 -model.layers.7.mlp.down_proj: 5 -model.layers.8.self_attn.q_proj: 6 -model.layers.8.self_attn.k_proj: 7 -model.layers.8.self_attn.v_proj: 7 -model.layers.8.self_attn.o_proj: 6 -model.layers.8.mlp.gate_proj: 6 -model.layers.8.mlp.up_proj: 5 -model.layers.8.mlp.down_proj: 5 -model.layers.9.self_attn.q_proj: 7 -model.layers.9.self_attn.k_proj: 8 -model.layers.9.self_attn.v_proj: 8 -model.layers.9.self_attn.o_proj: 6 -model.layers.9.mlp.gate_proj: 6 -model.layers.9.mlp.up_proj: 5 -model.layers.9.mlp.down_proj: 5 -model.layers.10.self_attn.q_proj: 6 -model.layers.10.self_attn.k_proj: 8 -model.layers.10.self_attn.v_proj: 7 -model.layers.10.self_attn.o_proj: 6 -model.layers.10.mlp.gate_proj: 5 -model.layers.10.mlp.up_proj: 5 -model.layers.10.mlp.down_proj: 5 -model.layers.11.self_attn.q_proj: 7 -model.layers.11.self_attn.k_proj: 8 -model.layers.11.self_attn.v_proj: 7 -model.layers.11.self_attn.o_proj: 6 -model.layers.11.mlp.gate_proj: 5 -model.layers.11.mlp.up_proj: 5 -model.layers.11.mlp.down_proj: 5 -model.layers.12.self_attn.q_proj: 7 -model.layers.12.self_attn.k_proj: 7 -model.layers.12.self_attn.v_proj: 7 -model.layers.12.self_attn.o_proj: 6 -model.layers.12.mlp.gate_proj: 6 -model.layers.12.mlp.up_proj: 6 -model.layers.12.mlp.down_proj: 5 -model.layers.13.self_attn.q_proj: 7 -model.layers.13.self_attn.k_proj: 8 -model.layers.13.self_attn.v_proj: 8 -model.layers.13.self_attn.o_proj: 6 -model.layers.13.mlp.gate_proj: 6 -model.layers.13.mlp.up_proj: 6 -model.layers.13.mlp.down_proj: 5 -model.layers.14.self_attn.q_proj: 7 -model.layers.14.self_attn.k_proj: 8 -model.layers.14.self_attn.v_proj: 8 -model.layers.14.self_attn.o_proj: 6 -model.layers.14.mlp.gate_proj: 6 -model.layers.14.mlp.up_proj: 6 -model.layers.14.mlp.down_proj: 5 -model.layers.15.self_attn.q_proj: 7 -model.layers.15.self_attn.k_proj: 8 -model.layers.15.self_attn.v_proj: 8 -model.layers.15.self_attn.o_proj: 6 -model.layers.15.mlp.gate_proj: 6 -model.layers.15.mlp.up_proj: 6 -model.layers.15.mlp.down_proj: 5 -model.layers.16.self_attn.q_proj: 7 -model.layers.16.self_attn.k_proj: 8 -model.layers.16.self_attn.v_proj: 8 -model.layers.16.self_attn.o_proj: 6 -model.layers.16.mlp.gate_proj: 6 -model.layers.16.mlp.up_proj: 6 -model.layers.16.mlp.down_proj: 5 -model.layers.17.self_attn.q_proj: 6 -model.layers.17.self_attn.k_proj: 7 -model.layers.17.self_attn.v_proj: 7 -model.layers.17.self_attn.o_proj: 6 -model.layers.17.mlp.gate_proj: 6 -model.layers.17.mlp.up_proj: 6 -model.layers.17.mlp.down_proj: 6 -model.layers.18.self_attn.q_proj: 6 -model.layers.18.self_attn.k_proj: 7 -model.layers.18.self_attn.v_proj: 7 -model.layers.18.self_attn.o_proj: 6 -model.layers.18.mlp.gate_proj: 6 -model.layers.18.mlp.up_proj: 6 -model.layers.18.mlp.down_proj: 5 -model.layers.19.self_attn.q_proj: 6 -model.layers.19.self_attn.k_proj: 7 -model.layers.19.self_attn.v_proj: 7 -model.layers.19.self_attn.o_proj: 6 -model.layers.19.mlp.gate_proj: 6 -model.layers.19.mlp.up_proj: 6 -model.layers.19.mlp.down_proj: 5 -model.layers.20.self_attn.q_proj: 6 -model.layers.20.self_attn.k_proj: 7 -model.layers.20.self_attn.v_proj: 7 -model.layers.20.self_attn.o_proj: 5 -model.layers.20.mlp.gate_proj: 6 -model.layers.20.mlp.up_proj: 6 -model.layers.20.mlp.down_proj: 5 -model.layers.21.self_attn.q_proj: 6 -model.layers.21.self_attn.k_proj: 7 -model.layers.21.self_attn.v_proj: 7 -model.layers.21.self_attn.o_proj: 6 -model.layers.21.mlp.gate_proj: 6 -model.layers.21.mlp.up_proj: 6 -model.layers.21.mlp.down_proj: 5 -model.layers.22.self_attn.q_proj: 6 -model.layers.22.self_attn.k_proj: 7 -model.layers.22.self_attn.v_proj: 7 -model.layers.22.self_attn.o_proj: 5 -model.layers.22.mlp.gate_proj: 6 -model.layers.22.mlp.up_proj: 6 -model.layers.22.mlp.down_proj: 5 -model.layers.23.self_attn.q_proj: 6 -model.layers.23.self_attn.k_proj: 7 -model.layers.23.self_attn.v_proj: 7 -model.layers.23.self_attn.o_proj: 5 -model.layers.23.mlp.gate_proj: 6 -model.layers.23.mlp.up_proj: 6 -model.layers.23.mlp.down_proj: 5 -model.layers.24.self_attn.q_proj: 6 -model.layers.24.self_attn.k_proj: 7 -model.layers.24.self_attn.v_proj: 7 -model.layers.24.self_attn.o_proj: 5 -model.layers.24.mlp.gate_proj: 5 -model.layers.24.mlp.up_proj: 5 -model.layers.24.mlp.down_proj: 5 -model.layers.25.self_attn.q_proj: 6 -model.layers.25.self_attn.k_proj: 7 -model.layers.25.self_attn.v_proj: 7 -model.layers.25.self_attn.o_proj: 5 -model.layers.25.mlp.gate_proj: 5 -model.layers.25.mlp.up_proj: 5 -model.layers.25.mlp.down_proj: 5 -model.layers.26.self_attn.q_proj: 6 -model.layers.26.self_attn.k_proj: 7 -model.layers.26.self_attn.v_proj: 7 -model.layers.26.self_attn.o_proj: 5 -model.layers.26.mlp.gate_proj: 5 -model.layers.26.mlp.up_proj: 5 -model.layers.26.mlp.down_proj: 5 -model.layers.27.self_attn.q_proj: 6 -model.layers.27.self_attn.k_proj: 7 -model.layers.27.self_attn.v_proj: 7 -model.layers.27.self_attn.o_proj: 5 -model.layers.27.mlp.gate_proj: 5 -model.layers.27.mlp.up_proj: 5 -model.layers.27.mlp.down_proj: 5 -model.layers.28.self_attn.q_proj: 6 -model.layers.28.self_attn.k_proj: 7 -model.layers.28.self_attn.v_proj: 7 -model.layers.28.self_attn.o_proj: 5 -model.layers.28.mlp.gate_proj: 5 -model.layers.28.mlp.up_proj: 5 -model.layers.28.mlp.down_proj: 5 -model.layers.29.self_attn.q_proj: 7 -model.layers.29.self_attn.k_proj: 8 -model.layers.29.self_attn.v_proj: 8 -model.layers.29.self_attn.o_proj: 5 -model.layers.29.mlp.gate_proj: 5 -model.layers.29.mlp.up_proj: 5 -model.layers.29.mlp.down_proj: 5 -model.layers.30.self_attn.q_proj: 6 -model.layers.30.self_attn.k_proj: 7 -model.layers.30.self_attn.v_proj: 7 -model.layers.30.self_attn.o_proj: 5 -model.layers.30.mlp.gate_proj: 6 -model.layers.30.mlp.up_proj: 6 -model.layers.30.mlp.down_proj: 6 -model.layers.31.self_attn.q_proj: 6 -model.layers.31.self_attn.k_proj: 7 -model.layers.31.self_attn.v_proj: 7 -model.layers.31.self_attn.o_proj: 6 -model.layers.31.mlp.gate_proj: 7 -model.layers.31.mlp.up_proj: 7 -model.layers.31.mlp.down_proj: 7 diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw6.00_5-6-7-8bit/special_tokens_map.json b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw6.00_5-6-7-8bit/special_tokens_map.json deleted file mode 100644 index b43be96621d147110fb8a18b5776ec6e38516127..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw6.00_5-6-7-8bit/special_tokens_map.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "bos_token": { - "content": "<|begin_of_text|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false - }, - "eos_token": { - "content": "<|eot_id|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false - }, - "pad_token": "<|eot_id|>" -} diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw6.00_5-6-7-8bit/tokenizer.json b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw6.00_5-6-7-8bit/tokenizer.json deleted file mode 100644 index 1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw6.00_5-6-7-8bit/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b -size 17209920 diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw6.00_5-6-7-8bit/tokenizer_config.json b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw6.00_5-6-7-8bit/tokenizer_config.json deleted file mode 100644 index 3beeacc86a6ca3cae14ad3004263ab74a4bac07a..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw6.00_5-6-7-8bit/tokenizer_config.json +++ /dev/null @@ -1,2063 +0,0 @@ -{ - "added_tokens_decoder": { - "128000": { - "content": "<|begin_of_text|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128001": { - "content": "<|end_of_text|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128002": { - "content": "<|reserved_special_token_0|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128003": { - "content": "<|reserved_special_token_1|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128004": { - "content": "<|finetune_right_pad_id|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128005": { - "content": "<|reserved_special_token_2|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128006": { - "content": "<|start_header_id|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128007": { - "content": "<|end_header_id|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128008": { - "content": "<|eom_id|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128009": { - "content": "<|eot_id|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128010": { - "content": "<|python_tag|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128011": { - "content": "<|reserved_special_token_3|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128012": { - "content": "<|reserved_special_token_4|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128013": { - "content": "<|reserved_special_token_5|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128014": { - "content": "<|reserved_special_token_6|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128015": { - "content": "<|reserved_special_token_7|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128016": { - "content": "<|reserved_special_token_8|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128017": { - "content": "<|reserved_special_token_9|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128018": { - "content": "<|reserved_special_token_10|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128019": { - "content": "<|reserved_special_token_11|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128020": { - "content": "<|reserved_special_token_12|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128021": { - "content": "<|reserved_special_token_13|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128022": { - "content": "<|reserved_special_token_14|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128023": { - "content": "<|reserved_special_token_15|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128024": { - "content": "<|reserved_special_token_16|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128025": { - "content": "<|reserved_special_token_17|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128026": { - "content": "<|reserved_special_token_18|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128027": { - "content": "<|reserved_special_token_19|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128028": { - "content": "<|reserved_special_token_20|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128029": { - "content": "<|reserved_special_token_21|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128030": { - "content": "<|reserved_special_token_22|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128031": { - "content": "<|reserved_special_token_23|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128032": { - "content": "<|reserved_special_token_24|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128033": { - "content": "<|reserved_special_token_25|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128034": { - "content": "<|reserved_special_token_26|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128035": { - "content": "<|reserved_special_token_27|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128036": { - "content": "<|reserved_special_token_28|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128037": { - "content": "<|reserved_special_token_29|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128038": { - "content": "<|reserved_special_token_30|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128039": { - "content": "<|reserved_special_token_31|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128040": { - "content": "<|reserved_special_token_32|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128041": { - "content": "<|reserved_special_token_33|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128042": { - "content": "<|reserved_special_token_34|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128043": { - "content": "<|reserved_special_token_35|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128044": { - "content": "<|reserved_special_token_36|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128045": { - "content": "<|reserved_special_token_37|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128046": { - "content": "<|reserved_special_token_38|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128047": { - "content": "<|reserved_special_token_39|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128048": { - "content": "<|reserved_special_token_40|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128049": { - "content": "<|reserved_special_token_41|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128050": { - "content": "<|reserved_special_token_42|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128051": { - "content": "<|reserved_special_token_43|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128052": { - "content": "<|reserved_special_token_44|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128053": { - "content": "<|reserved_special_token_45|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128054": { - "content": "<|reserved_special_token_46|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128055": { - "content": "<|reserved_special_token_47|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128056": { - "content": "<|reserved_special_token_48|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128057": { - "content": "<|reserved_special_token_49|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128058": { - "content": "<|reserved_special_token_50|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128059": { - "content": "<|reserved_special_token_51|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128060": { - "content": "<|reserved_special_token_52|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128061": { - "content": "<|reserved_special_token_53|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128062": { - "content": "<|reserved_special_token_54|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128063": { - "content": "<|reserved_special_token_55|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128064": { - "content": "<|reserved_special_token_56|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128065": { - "content": "<|reserved_special_token_57|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128066": { - "content": "<|reserved_special_token_58|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128067": { - "content": "<|reserved_special_token_59|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128068": { - "content": "<|reserved_special_token_60|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128069": { - "content": "<|reserved_special_token_61|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128070": { - "content": "<|reserved_special_token_62|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128071": { - "content": "<|reserved_special_token_63|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128072": { - "content": "<|reserved_special_token_64|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128073": { - "content": "<|reserved_special_token_65|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128074": { - "content": "<|reserved_special_token_66|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128075": { - "content": "<|reserved_special_token_67|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128076": { - "content": "<|reserved_special_token_68|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128077": { - "content": "<|reserved_special_token_69|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128078": { - "content": "<|reserved_special_token_70|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128079": { - "content": "<|reserved_special_token_71|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128080": { - "content": "<|reserved_special_token_72|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128081": { - "content": "<|reserved_special_token_73|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128082": { - "content": "<|reserved_special_token_74|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128083": { - "content": "<|reserved_special_token_75|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128084": { - "content": "<|reserved_special_token_76|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128085": { - "content": "<|reserved_special_token_77|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128086": { - "content": "<|reserved_special_token_78|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128087": { - "content": "<|reserved_special_token_79|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128088": { - "content": "<|reserved_special_token_80|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128089": { - "content": "<|reserved_special_token_81|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128090": { - "content": "<|reserved_special_token_82|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128091": { - "content": "<|reserved_special_token_83|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128092": { - "content": "<|reserved_special_token_84|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128093": { - "content": "<|reserved_special_token_85|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128094": { - "content": "<|reserved_special_token_86|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128095": { - "content": "<|reserved_special_token_87|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128096": { - "content": "<|reserved_special_token_88|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128097": { - "content": "<|reserved_special_token_89|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128098": { - "content": "<|reserved_special_token_90|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128099": { - "content": "<|reserved_special_token_91|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128100": { - "content": "<|reserved_special_token_92|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128101": { - "content": "<|reserved_special_token_93|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128102": { - "content": "<|reserved_special_token_94|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128103": { - "content": "<|reserved_special_token_95|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128104": { - "content": "<|reserved_special_token_96|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128105": { - "content": "<|reserved_special_token_97|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128106": { - "content": "<|reserved_special_token_98|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128107": { - "content": "<|reserved_special_token_99|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128108": { - "content": "<|reserved_special_token_100|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128109": { - "content": "<|reserved_special_token_101|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128110": { - "content": "<|reserved_special_token_102|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128111": { - "content": "<|reserved_special_token_103|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128112": { - "content": "<|reserved_special_token_104|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128113": { - "content": "<|reserved_special_token_105|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128114": { - "content": "<|reserved_special_token_106|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128115": { - "content": "<|reserved_special_token_107|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128116": { - "content": "<|reserved_special_token_108|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128117": { - "content": "<|reserved_special_token_109|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128118": { - "content": "<|reserved_special_token_110|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128119": { - "content": "<|reserved_special_token_111|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128120": { - "content": "<|reserved_special_token_112|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128121": { - "content": "<|reserved_special_token_113|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128122": { - "content": "<|reserved_special_token_114|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128123": { - "content": "<|reserved_special_token_115|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128124": { - "content": "<|reserved_special_token_116|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128125": { - "content": "<|reserved_special_token_117|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128126": { - "content": "<|reserved_special_token_118|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128127": { - "content": "<|reserved_special_token_119|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128128": { - "content": "<|reserved_special_token_120|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128129": { - "content": "<|reserved_special_token_121|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128130": { - "content": "<|reserved_special_token_122|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128131": { - "content": "<|reserved_special_token_123|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128132": { - "content": "<|reserved_special_token_124|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128133": { - "content": "<|reserved_special_token_125|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128134": { - "content": "<|reserved_special_token_126|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128135": { - "content": "<|reserved_special_token_127|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128136": { - "content": "<|reserved_special_token_128|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128137": { - "content": "<|reserved_special_token_129|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128138": { - "content": "<|reserved_special_token_130|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128139": { - "content": "<|reserved_special_token_131|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128140": { - "content": "<|reserved_special_token_132|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128141": { - "content": "<|reserved_special_token_133|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128142": { - "content": "<|reserved_special_token_134|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128143": { - "content": "<|reserved_special_token_135|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128144": { - "content": "<|reserved_special_token_136|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128145": { - "content": "<|reserved_special_token_137|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128146": { - "content": "<|reserved_special_token_138|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128147": { - "content": "<|reserved_special_token_139|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128148": { - "content": "<|reserved_special_token_140|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128149": { - "content": "<|reserved_special_token_141|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128150": { - "content": "<|reserved_special_token_142|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128151": { - "content": "<|reserved_special_token_143|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128152": { - "content": "<|reserved_special_token_144|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128153": { - "content": "<|reserved_special_token_145|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128154": { - "content": "<|reserved_special_token_146|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128155": { - "content": "<|reserved_special_token_147|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128156": { - "content": "<|reserved_special_token_148|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128157": { - "content": "<|reserved_special_token_149|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128158": { - "content": "<|reserved_special_token_150|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128159": { - "content": "<|reserved_special_token_151|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128160": { - "content": "<|reserved_special_token_152|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128161": { - "content": "<|reserved_special_token_153|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128162": { - "content": "<|reserved_special_token_154|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128163": { - "content": "<|reserved_special_token_155|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128164": { - "content": "<|reserved_special_token_156|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128165": { - "content": "<|reserved_special_token_157|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128166": { - "content": "<|reserved_special_token_158|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128167": { - "content": "<|reserved_special_token_159|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128168": { - "content": "<|reserved_special_token_160|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128169": { - "content": "<|reserved_special_token_161|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128170": { - "content": "<|reserved_special_token_162|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128171": { - "content": "<|reserved_special_token_163|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128172": { - "content": "<|reserved_special_token_164|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128173": { - "content": "<|reserved_special_token_165|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128174": { - "content": "<|reserved_special_token_166|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128175": { - "content": "<|reserved_special_token_167|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128176": { - "content": "<|reserved_special_token_168|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128177": { - "content": "<|reserved_special_token_169|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128178": { - "content": "<|reserved_special_token_170|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128179": { - "content": "<|reserved_special_token_171|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128180": { - "content": "<|reserved_special_token_172|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128181": { - "content": "<|reserved_special_token_173|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128182": { - "content": "<|reserved_special_token_174|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128183": { - "content": "<|reserved_special_token_175|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128184": { - "content": "<|reserved_special_token_176|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128185": { - "content": "<|reserved_special_token_177|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128186": { - "content": "<|reserved_special_token_178|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128187": { - "content": "<|reserved_special_token_179|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128188": { - "content": "<|reserved_special_token_180|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128189": { - "content": "<|reserved_special_token_181|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128190": { - "content": "<|reserved_special_token_182|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128191": { - "content": "<|reserved_special_token_183|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128192": { - "content": "<|reserved_special_token_184|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128193": { - "content": "<|reserved_special_token_185|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128194": { - "content": "<|reserved_special_token_186|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128195": { - "content": "<|reserved_special_token_187|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128196": { - "content": "<|reserved_special_token_188|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128197": { - "content": "<|reserved_special_token_189|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128198": { - "content": "<|reserved_special_token_190|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128199": { - "content": "<|reserved_special_token_191|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128200": { - "content": "<|reserved_special_token_192|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128201": { - "content": "<|reserved_special_token_193|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128202": { - "content": "<|reserved_special_token_194|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128203": { - "content": "<|reserved_special_token_195|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128204": { - "content": "<|reserved_special_token_196|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128205": { - "content": "<|reserved_special_token_197|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128206": { - "content": "<|reserved_special_token_198|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128207": { - "content": "<|reserved_special_token_199|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128208": { - "content": "<|reserved_special_token_200|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128209": { - "content": "<|reserved_special_token_201|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128210": { - "content": "<|reserved_special_token_202|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128211": { - "content": "<|reserved_special_token_203|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128212": { - "content": "<|reserved_special_token_204|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128213": { - "content": "<|reserved_special_token_205|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128214": { - "content": "<|reserved_special_token_206|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128215": { - "content": "<|reserved_special_token_207|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128216": { - "content": "<|reserved_special_token_208|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128217": { - "content": "<|reserved_special_token_209|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128218": { - "content": "<|reserved_special_token_210|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128219": { - "content": "<|reserved_special_token_211|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128220": { - "content": "<|reserved_special_token_212|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128221": { - "content": "<|reserved_special_token_213|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128222": { - "content": "<|reserved_special_token_214|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128223": { - "content": "<|reserved_special_token_215|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128224": { - "content": "<|reserved_special_token_216|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128225": { - "content": "<|reserved_special_token_217|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128226": { - "content": "<|reserved_special_token_218|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128227": { - "content": "<|reserved_special_token_219|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128228": { - "content": "<|reserved_special_token_220|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128229": { - "content": "<|reserved_special_token_221|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128230": { - "content": "<|reserved_special_token_222|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128231": { - "content": "<|reserved_special_token_223|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128232": { - "content": "<|reserved_special_token_224|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128233": { - "content": "<|reserved_special_token_225|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128234": { - "content": "<|reserved_special_token_226|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128235": { - "content": "<|reserved_special_token_227|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128236": { - "content": "<|reserved_special_token_228|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128237": { - "content": "<|reserved_special_token_229|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128238": { - "content": "<|reserved_special_token_230|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128239": { - "content": "<|reserved_special_token_231|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128240": { - "content": "<|reserved_special_token_232|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128241": { - "content": "<|reserved_special_token_233|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128242": { - "content": "<|reserved_special_token_234|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128243": { - "content": "<|reserved_special_token_235|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128244": { - "content": "<|reserved_special_token_236|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128245": { - "content": "<|reserved_special_token_237|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128246": { - "content": "<|reserved_special_token_238|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128247": { - "content": "<|reserved_special_token_239|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128248": { - "content": "<|reserved_special_token_240|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128249": { - "content": "<|reserved_special_token_241|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128250": { - "content": "<|reserved_special_token_242|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128251": { - "content": "<|reserved_special_token_243|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128252": { - "content": "<|reserved_special_token_244|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128253": { - "content": "<|reserved_special_token_245|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128254": { - "content": "<|reserved_special_token_246|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128255": { - "content": "<|reserved_special_token_247|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - } - }, - "bos_token": "<|begin_of_text|>", - "clean_up_tokenization_spaces": true, - "eos_token": "<|eot_id|>", - "extra_special_tokens": {}, - "model_input_names": [ - "input_ids", - "attention_mask" - ], - "model_max_length": 131072, - "pad_token": "<|eot_id|>", - "tokenizer_class": "PreTrainedTokenizerFast" -} diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw7.56_4-8bit/README.md b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw7.56_4-8bit/README.md deleted file mode 100644 index 724dfdc497a660621939174b21f7e84e7e5170fc..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw7.56_4-8bit/README.md +++ /dev/null @@ -1,16 +0,0 @@ -# Quantized Model Checkpoint - -**Base model:** meta-llama/Llama-3.1-8B-Instruct - -**Average bitwidth:** 7.5625 - -**Sensitivity method:** linear - -**Constraints:** -- max_kl: 0.01 - -**Metrics:** -- predicted_kl: 0.009987 -- predicted_eap: 0.554600 - -See `quantization_config.txt` for full configuration details. diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw7.56_4-8bit/chat_template.jinja b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw7.56_4-8bit/chat_template.jinja deleted file mode 100644 index 33089ace1be88f22a10fe861ad49718d5d886090..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw7.56_4-8bit/chat_template.jinja +++ /dev/null @@ -1,109 +0,0 @@ -{{- bos_token }} -{%- if custom_tools is defined %} - {%- set tools = custom_tools %} -{%- endif %} -{%- if not tools_in_user_message is defined %} - {%- set tools_in_user_message = true %} -{%- endif %} -{%- if not date_string is defined %} - {%- set date_string = "26 Jul 2024" %} -{%- endif %} -{%- if not tools is defined %} - {%- set tools = none %} -{%- endif %} - -{#- This block extracts the system message, so we can slot it into the right place. #} -{%- if messages[0]['role'] == 'system' %} - {%- set system_message = messages[0]['content']|trim %} - {%- set messages = messages[1:] %} -{%- else %} - {%- set system_message = "" %} -{%- endif %} - -{#- System message + builtin tools #} -{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} -{%- if builtin_tools is defined or tools is not none %} - {{- "Environment: ipython\n" }} -{%- endif %} -{%- if builtin_tools is defined %} - {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} -{%- endif %} -{{- "Cutting Knowledge Date: December 2023\n" }} -{{- "Today Date: " + date_string + "\n\n" }} -{%- if tools is not none and not tools_in_user_message %} - {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} - {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} - {{- "Do not use variables.\n\n" }} - {%- for t in tools %} - {{- t | tojson(indent=4) }} - {{- "\n\n" }} - {%- endfor %} -{%- endif %} -{{- system_message }} -{{- "<|eot_id|>" }} - -{#- Custom tools are passed in a user message with some extra guidance #} -{%- if tools_in_user_message and not tools is none %} - {#- Extract the first user message so we can plug it in here #} - {%- if messages | length != 0 %} - {%- set first_user_message = messages[0]['content']|trim %} - {%- set messages = messages[1:] %} - {%- else %} - {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} -{%- endif %} - {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} - {{- "Given the following functions, please respond with a JSON for a function call " }} - {{- "with its proper arguments that best answers the given prompt.\n\n" }} - {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} - {{- "Do not use variables.\n\n" }} - {%- for t in tools %} - {{- t | tojson(indent=4) }} - {{- "\n\n" }} - {%- endfor %} - {{- first_user_message + "<|eot_id|>"}} -{%- endif %} - -{%- for message in messages %} - {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} - {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} - {%- elif 'tool_calls' in message %} - {%- if not message.tool_calls|length == 1 %} - {{- raise_exception("This model only supports single tool-calls at once!") }} - {%- endif %} - {%- set tool_call = message.tool_calls[0].function %} - {%- if builtin_tools is defined and tool_call.name in builtin_tools %} - {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} - {{- "<|python_tag|>" + tool_call.name + ".call(" }} - {%- for arg_name, arg_val in tool_call.arguments | items %} - {{- arg_name + '="' + arg_val + '"' }} - {%- if not loop.last %} - {{- ", " }} - {%- endif %} - {%- endfor %} - {{- ")" }} - {%- else %} - {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} - {{- '{"name": "' + tool_call.name + '", ' }} - {{- '"parameters": ' }} - {{- tool_call.arguments | tojson }} - {{- "}" }} - {%- endif %} - {%- if builtin_tools is defined %} - {#- This means we're in ipython mode #} - {{- "<|eom_id|>" }} - {%- else %} - {{- "<|eot_id|>" }} - {%- endif %} - {%- elif message.role == "tool" or message.role == "ipython" %} - {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} - {%- if message.content is mapping or message.content is iterable %} - {{- message.content | tojson }} - {%- else %} - {{- message.content }} - {%- endif %} - {{- "<|eot_id|>" }} - {%- endif %} -{%- endfor %} -{%- if add_generation_prompt %} - {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} -{%- endif %} diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw7.56_4-8bit/config.json b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw7.56_4-8bit/config.json deleted file mode 100644 index 2bf71dab1ee4525127aba58e9446aa0a1dd046e6..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw7.56_4-8bit/config.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "bos_token_id": 128000, - "dtype": "float16", - "eos_token_id": [ - 128001, - 128008, - 128009 - ], - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "transformers_version": "4.57.3", - "use_cache": false, - "vocab_size": 128256 -} diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw7.56_4-8bit/generation_config.json b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw7.56_4-8bit/generation_config.json deleted file mode 100644 index 993459bf55ed73c1390809c2e2a3d7c1c0e0d844..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw7.56_4-8bit/generation_config.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "bos_token_id": 128000, - "do_sample": true, - "eos_token_id": [ - 128001, - 128008, - 128009 - ], - "temperature": 0.6, - "top_p": 0.9, - "transformers_version": "4.57.3" -} diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw7.56_4-8bit/model-00001-of-00004.safetensors b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw7.56_4-8bit/model-00001-of-00004.safetensors deleted file mode 100644 index a63907e45edabb16bf201c41ec1d62ae4e64c38d..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw7.56_4-8bit/model-00001-of-00004.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1a5f62139dec457e820c871de03dcf8cd6cb7b3dff4340dd6b4da8e8307e922b -size 4976698592 diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw7.56_4-8bit/model-00002-of-00004.safetensors b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw7.56_4-8bit/model-00002-of-00004.safetensors deleted file mode 100644 index c7bfdb009325b0e7ea8f34071629834b7031d70e..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw7.56_4-8bit/model-00002-of-00004.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b7038b8a751f0563d8e730dee3d8d6a71340f26191fff5aaadc68324c9ed6bb5 -size 4999802616 diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw7.56_4-8bit/model-00003-of-00004.safetensors b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw7.56_4-8bit/model-00003-of-00004.safetensors deleted file mode 100644 index d4191344260a2e835575571ec0df6b729d34e055..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw7.56_4-8bit/model-00003-of-00004.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fbd67550b41307bd8c7e47fc3222dd099af2f73fa2a89921c3a2ee06cb9bc958 -size 4915916080 diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw7.56_4-8bit/model-00004-of-00004.safetensors b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw7.56_4-8bit/model-00004-of-00004.safetensors deleted file mode 100644 index b09e9cdcb05f5cf77b3ef3a8ffa431eb33ad02b9..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw7.56_4-8bit/model-00004-of-00004.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:44408391c116c33adf6e43ab53f84d75bee5e2956b293c34dc60509fb0fd825b -size 1168138808 diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw7.56_4-8bit/model.safetensors.index.json b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw7.56_4-8bit/model.safetensors.index.json deleted file mode 100644 index 5c64f1e87be95160fabc494eebfa0f7e68064af2..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw7.56_4-8bit/model.safetensors.index.json +++ /dev/null @@ -1,299 +0,0 @@ -{ - "metadata": { - "total_parameters": 8030261248, - "total_size": 16060522496 - }, - "weight_map": { - "lm_head.weight": "model-00004-of-00004.safetensors", - "model.embed_tokens.weight": "model-00001-of-00004.safetensors", - "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors", - "model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", - "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.norm.weight": "model-00004-of-00004.safetensors" - } -} diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw7.56_4-8bit/quantization_config.txt b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw7.56_4-8bit/quantization_config.txt deleted file mode 100644 index 29cb12a8c17bbcdf49656674a48e82d890f340ea..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw7.56_4-8bit/quantization_config.txt +++ /dev/null @@ -1,249 +0,0 @@ -# Model: meta-llama/Llama-3.1-8B-Instruct -# Layer directory: /nfs/scistore19/alistgrp/mhelcig/local/data/search/4_5_6_7_8bit_asym_g128/Llama-3.1-8B-Instruct/6bit/ -# Sensitivity method: linear -# Estimation method: linear -# Available bitwidths: [4, 8] -# Bitwidth map: {4: 4.3125, 5: 5.3125, 6: 6.3125, 7: 7.3125, 8: 8.3125} -# -# Mode: predicted_constraint_search -# Constraint max_kl: 0.01 -# Weights: nll=0.0, kl=0.0, eap=1.0 -# -# Average bitwidth: 7.5625 -# Total params: 6979321856 -# Total bits: 52781121536 -# Predicted KL: 0.009987 -# Predicted EAP: 0.554600 -# Predicted ETL: 0.445400 -# Satisfies constraints: True -# Solver calls: 1 -# Evaluations: 0 -# -# Bitwidth distribution: -# 8-bit: 201 layers (89.7%) -# 4-bit: 23 layers (10.3%) -# -model.layers.0.self_attn.q_proj: 8 -model.layers.0.self_attn.k_proj: 8 -model.layers.0.self_attn.v_proj: 8 -model.layers.0.self_attn.o_proj: 8 -model.layers.0.mlp.gate_proj: 4 -model.layers.0.mlp.up_proj: 4 -model.layers.0.mlp.down_proj: 8 -model.layers.1.self_attn.q_proj: 8 -model.layers.1.self_attn.k_proj: 8 -model.layers.1.self_attn.v_proj: 8 -model.layers.1.self_attn.o_proj: 8 -model.layers.1.mlp.gate_proj: 8 -model.layers.1.mlp.up_proj: 8 -model.layers.1.mlp.down_proj: 8 -model.layers.2.self_attn.q_proj: 8 -model.layers.2.self_attn.k_proj: 8 -model.layers.2.self_attn.v_proj: 8 -model.layers.2.self_attn.o_proj: 8 -model.layers.2.mlp.gate_proj: 8 -model.layers.2.mlp.up_proj: 8 -model.layers.2.mlp.down_proj: 4 -model.layers.3.self_attn.q_proj: 8 -model.layers.3.self_attn.k_proj: 8 -model.layers.3.self_attn.v_proj: 8 -model.layers.3.self_attn.o_proj: 8 -model.layers.3.mlp.gate_proj: 8 -model.layers.3.mlp.up_proj: 8 -model.layers.3.mlp.down_proj: 4 -model.layers.4.self_attn.q_proj: 8 -model.layers.4.self_attn.k_proj: 8 -model.layers.4.self_attn.v_proj: 8 -model.layers.4.self_attn.o_proj: 8 -model.layers.4.mlp.gate_proj: 8 -model.layers.4.mlp.up_proj: 8 -model.layers.4.mlp.down_proj: 4 -model.layers.5.self_attn.q_proj: 8 -model.layers.5.self_attn.k_proj: 8 -model.layers.5.self_attn.v_proj: 8 -model.layers.5.self_attn.o_proj: 8 -model.layers.5.mlp.gate_proj: 8 -model.layers.5.mlp.up_proj: 8 -model.layers.5.mlp.down_proj: 4 -model.layers.6.self_attn.q_proj: 8 -model.layers.6.self_attn.k_proj: 8 -model.layers.6.self_attn.v_proj: 8 -model.layers.6.self_attn.o_proj: 8 -model.layers.6.mlp.gate_proj: 8 -model.layers.6.mlp.up_proj: 8 -model.layers.6.mlp.down_proj: 4 -model.layers.7.self_attn.q_proj: 8 -model.layers.7.self_attn.k_proj: 8 -model.layers.7.self_attn.v_proj: 8 -model.layers.7.self_attn.o_proj: 8 -model.layers.7.mlp.gate_proj: 8 -model.layers.7.mlp.up_proj: 8 -model.layers.7.mlp.down_proj: 4 -model.layers.8.self_attn.q_proj: 8 -model.layers.8.self_attn.k_proj: 8 -model.layers.8.self_attn.v_proj: 8 -model.layers.8.self_attn.o_proj: 8 -model.layers.8.mlp.gate_proj: 8 -model.layers.8.mlp.up_proj: 8 -model.layers.8.mlp.down_proj: 4 -model.layers.9.self_attn.q_proj: 8 -model.layers.9.self_attn.k_proj: 8 -model.layers.9.self_attn.v_proj: 8 -model.layers.9.self_attn.o_proj: 8 -model.layers.9.mlp.gate_proj: 8 -model.layers.9.mlp.up_proj: 8 -model.layers.9.mlp.down_proj: 4 -model.layers.10.self_attn.q_proj: 8 -model.layers.10.self_attn.k_proj: 8 -model.layers.10.self_attn.v_proj: 8 -model.layers.10.self_attn.o_proj: 8 -model.layers.10.mlp.gate_proj: 8 -model.layers.10.mlp.up_proj: 8 -model.layers.10.mlp.down_proj: 4 -model.layers.11.self_attn.q_proj: 8 -model.layers.11.self_attn.k_proj: 8 -model.layers.11.self_attn.v_proj: 8 -model.layers.11.self_attn.o_proj: 8 -model.layers.11.mlp.gate_proj: 8 -model.layers.11.mlp.up_proj: 8 -model.layers.11.mlp.down_proj: 4 -model.layers.12.self_attn.q_proj: 8 -model.layers.12.self_attn.k_proj: 8 -model.layers.12.self_attn.v_proj: 8 -model.layers.12.self_attn.o_proj: 8 -model.layers.12.mlp.gate_proj: 8 -model.layers.12.mlp.up_proj: 8 -model.layers.12.mlp.down_proj: 4 -model.layers.13.self_attn.q_proj: 8 -model.layers.13.self_attn.k_proj: 8 -model.layers.13.self_attn.v_proj: 8 -model.layers.13.self_attn.o_proj: 8 -model.layers.13.mlp.gate_proj: 8 -model.layers.13.mlp.up_proj: 8 -model.layers.13.mlp.down_proj: 4 -model.layers.14.self_attn.q_proj: 8 -model.layers.14.self_attn.k_proj: 8 -model.layers.14.self_attn.v_proj: 8 -model.layers.14.self_attn.o_proj: 8 -model.layers.14.mlp.gate_proj: 8 -model.layers.14.mlp.up_proj: 8 -model.layers.14.mlp.down_proj: 8 -model.layers.15.self_attn.q_proj: 8 -model.layers.15.self_attn.k_proj: 8 -model.layers.15.self_attn.v_proj: 8 -model.layers.15.self_attn.o_proj: 8 -model.layers.15.mlp.gate_proj: 8 -model.layers.15.mlp.up_proj: 8 -model.layers.15.mlp.down_proj: 8 -model.layers.16.self_attn.q_proj: 8 -model.layers.16.self_attn.k_proj: 8 -model.layers.16.self_attn.v_proj: 8 -model.layers.16.self_attn.o_proj: 8 -model.layers.16.mlp.gate_proj: 8 -model.layers.16.mlp.up_proj: 8 -model.layers.16.mlp.down_proj: 8 -model.layers.17.self_attn.q_proj: 8 -model.layers.17.self_attn.k_proj: 8 -model.layers.17.self_attn.v_proj: 8 -model.layers.17.self_attn.o_proj: 8 -model.layers.17.mlp.gate_proj: 8 -model.layers.17.mlp.up_proj: 8 -model.layers.17.mlp.down_proj: 8 -model.layers.18.self_attn.q_proj: 8 -model.layers.18.self_attn.k_proj: 8 -model.layers.18.self_attn.v_proj: 8 -model.layers.18.self_attn.o_proj: 8 -model.layers.18.mlp.gate_proj: 8 -model.layers.18.mlp.up_proj: 8 -model.layers.18.mlp.down_proj: 8 -model.layers.19.self_attn.q_proj: 8 -model.layers.19.self_attn.k_proj: 8 -model.layers.19.self_attn.v_proj: 8 -model.layers.19.self_attn.o_proj: 8 -model.layers.19.mlp.gate_proj: 8 -model.layers.19.mlp.up_proj: 8 -model.layers.19.mlp.down_proj: 8 -model.layers.20.self_attn.q_proj: 8 -model.layers.20.self_attn.k_proj: 8 -model.layers.20.self_attn.v_proj: 8 -model.layers.20.self_attn.o_proj: 8 -model.layers.20.mlp.gate_proj: 8 -model.layers.20.mlp.up_proj: 8 -model.layers.20.mlp.down_proj: 8 -model.layers.21.self_attn.q_proj: 8 -model.layers.21.self_attn.k_proj: 8 -model.layers.21.self_attn.v_proj: 8 -model.layers.21.self_attn.o_proj: 8 -model.layers.21.mlp.gate_proj: 8 -model.layers.21.mlp.up_proj: 8 -model.layers.21.mlp.down_proj: 8 -model.layers.22.self_attn.q_proj: 8 -model.layers.22.self_attn.k_proj: 8 -model.layers.22.self_attn.v_proj: 8 -model.layers.22.self_attn.o_proj: 8 -model.layers.22.mlp.gate_proj: 8 -model.layers.22.mlp.up_proj: 8 -model.layers.22.mlp.down_proj: 4 -model.layers.23.self_attn.q_proj: 8 -model.layers.23.self_attn.k_proj: 8 -model.layers.23.self_attn.v_proj: 8 -model.layers.23.self_attn.o_proj: 8 -model.layers.23.mlp.gate_proj: 8 -model.layers.23.mlp.up_proj: 8 -model.layers.23.mlp.down_proj: 4 -model.layers.24.self_attn.q_proj: 8 -model.layers.24.self_attn.k_proj: 8 -model.layers.24.self_attn.v_proj: 8 -model.layers.24.self_attn.o_proj: 8 -model.layers.24.mlp.gate_proj: 8 -model.layers.24.mlp.up_proj: 8 -model.layers.24.mlp.down_proj: 4 -model.layers.25.self_attn.q_proj: 8 -model.layers.25.self_attn.k_proj: 8 -model.layers.25.self_attn.v_proj: 8 -model.layers.25.self_attn.o_proj: 8 -model.layers.25.mlp.gate_proj: 8 -model.layers.25.mlp.up_proj: 8 -model.layers.25.mlp.down_proj: 4 -model.layers.26.self_attn.q_proj: 8 -model.layers.26.self_attn.k_proj: 8 -model.layers.26.self_attn.v_proj: 8 -model.layers.26.self_attn.o_proj: 8 -model.layers.26.mlp.gate_proj: 8 -model.layers.26.mlp.up_proj: 8 -model.layers.26.mlp.down_proj: 4 -model.layers.27.self_attn.q_proj: 8 -model.layers.27.self_attn.k_proj: 8 -model.layers.27.self_attn.v_proj: 8 -model.layers.27.self_attn.o_proj: 8 -model.layers.27.mlp.gate_proj: 8 -model.layers.27.mlp.up_proj: 8 -model.layers.27.mlp.down_proj: 4 -model.layers.28.self_attn.q_proj: 8 -model.layers.28.self_attn.k_proj: 8 -model.layers.28.self_attn.v_proj: 8 -model.layers.28.self_attn.o_proj: 8 -model.layers.28.mlp.gate_proj: 8 -model.layers.28.mlp.up_proj: 8 -model.layers.28.mlp.down_proj: 4 -model.layers.29.self_attn.q_proj: 8 -model.layers.29.self_attn.k_proj: 8 -model.layers.29.self_attn.v_proj: 8 -model.layers.29.self_attn.o_proj: 4 -model.layers.29.mlp.gate_proj: 8 -model.layers.29.mlp.up_proj: 8 -model.layers.29.mlp.down_proj: 4 -model.layers.30.self_attn.q_proj: 8 -model.layers.30.self_attn.k_proj: 8 -model.layers.30.self_attn.v_proj: 8 -model.layers.30.self_attn.o_proj: 8 -model.layers.30.mlp.gate_proj: 8 -model.layers.30.mlp.up_proj: 8 -model.layers.30.mlp.down_proj: 8 -model.layers.31.self_attn.q_proj: 8 -model.layers.31.self_attn.k_proj: 8 -model.layers.31.self_attn.v_proj: 8 -model.layers.31.self_attn.o_proj: 8 -model.layers.31.mlp.gate_proj: 8 -model.layers.31.mlp.up_proj: 8 -model.layers.31.mlp.down_proj: 8 diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw7.56_4-8bit/special_tokens_map.json b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw7.56_4-8bit/special_tokens_map.json deleted file mode 100644 index b43be96621d147110fb8a18b5776ec6e38516127..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw7.56_4-8bit/special_tokens_map.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "bos_token": { - "content": "<|begin_of_text|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false - }, - "eos_token": { - "content": "<|eot_id|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false - }, - "pad_token": "<|eot_id|>" -} diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw7.56_4-8bit/tokenizer.json b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw7.56_4-8bit/tokenizer.json deleted file mode 100644 index 1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw7.56_4-8bit/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b -size 17209920 diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw7.56_4-8bit/tokenizer_config.json b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw7.56_4-8bit/tokenizer_config.json deleted file mode 100644 index 3beeacc86a6ca3cae14ad3004263ab74a4bac07a..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_lin_bw7.56_4-8bit/tokenizer_config.json +++ /dev/null @@ -1,2063 +0,0 @@ -{ - "added_tokens_decoder": { - "128000": { - "content": "<|begin_of_text|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128001": { - "content": "<|end_of_text|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128002": { - "content": "<|reserved_special_token_0|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128003": { - "content": "<|reserved_special_token_1|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128004": { - "content": "<|finetune_right_pad_id|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128005": { - "content": "<|reserved_special_token_2|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128006": { - "content": "<|start_header_id|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128007": { - "content": "<|end_header_id|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128008": { - "content": "<|eom_id|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128009": { - "content": "<|eot_id|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128010": { - "content": "<|python_tag|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128011": { - "content": "<|reserved_special_token_3|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128012": { - "content": "<|reserved_special_token_4|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128013": { - "content": "<|reserved_special_token_5|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128014": { - "content": "<|reserved_special_token_6|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128015": { - "content": "<|reserved_special_token_7|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128016": { - "content": "<|reserved_special_token_8|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128017": { - "content": "<|reserved_special_token_9|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128018": { - "content": "<|reserved_special_token_10|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128019": { - "content": "<|reserved_special_token_11|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128020": { - "content": "<|reserved_special_token_12|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128021": { - "content": "<|reserved_special_token_13|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128022": { - "content": "<|reserved_special_token_14|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128023": { - "content": "<|reserved_special_token_15|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128024": { - "content": "<|reserved_special_token_16|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128025": { - "content": "<|reserved_special_token_17|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128026": { - "content": "<|reserved_special_token_18|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128027": { - "content": "<|reserved_special_token_19|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128028": { - "content": "<|reserved_special_token_20|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128029": { - "content": "<|reserved_special_token_21|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128030": { - "content": "<|reserved_special_token_22|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128031": { - "content": "<|reserved_special_token_23|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128032": { - "content": "<|reserved_special_token_24|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128033": { - "content": "<|reserved_special_token_25|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128034": { - "content": "<|reserved_special_token_26|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128035": { - "content": "<|reserved_special_token_27|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128036": { - "content": "<|reserved_special_token_28|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128037": { - "content": "<|reserved_special_token_29|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128038": { - "content": "<|reserved_special_token_30|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128039": { - "content": "<|reserved_special_token_31|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128040": { - "content": "<|reserved_special_token_32|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128041": { - "content": "<|reserved_special_token_33|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128042": { - "content": "<|reserved_special_token_34|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128043": { - "content": "<|reserved_special_token_35|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128044": { - "content": "<|reserved_special_token_36|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128045": { - "content": "<|reserved_special_token_37|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128046": { - "content": "<|reserved_special_token_38|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128047": { - "content": "<|reserved_special_token_39|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128048": { - "content": "<|reserved_special_token_40|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128049": { - "content": "<|reserved_special_token_41|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128050": { - "content": "<|reserved_special_token_42|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128051": { - "content": "<|reserved_special_token_43|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128052": { - "content": "<|reserved_special_token_44|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128053": { - "content": "<|reserved_special_token_45|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128054": { - "content": "<|reserved_special_token_46|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128055": { - "content": "<|reserved_special_token_47|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128056": { - "content": "<|reserved_special_token_48|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128057": { - "content": "<|reserved_special_token_49|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128058": { - "content": "<|reserved_special_token_50|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128059": { - "content": "<|reserved_special_token_51|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128060": { - "content": "<|reserved_special_token_52|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128061": { - "content": "<|reserved_special_token_53|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128062": { - "content": "<|reserved_special_token_54|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128063": { - "content": "<|reserved_special_token_55|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128064": { - "content": "<|reserved_special_token_56|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128065": { - "content": "<|reserved_special_token_57|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128066": { - "content": "<|reserved_special_token_58|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128067": { - "content": "<|reserved_special_token_59|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128068": { - "content": "<|reserved_special_token_60|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128069": { - "content": "<|reserved_special_token_61|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128070": { - "content": "<|reserved_special_token_62|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128071": { - "content": "<|reserved_special_token_63|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128072": { - "content": "<|reserved_special_token_64|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128073": { - "content": "<|reserved_special_token_65|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128074": { - "content": "<|reserved_special_token_66|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128075": { - "content": "<|reserved_special_token_67|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128076": { - "content": "<|reserved_special_token_68|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128077": { - "content": "<|reserved_special_token_69|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128078": { - "content": "<|reserved_special_token_70|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128079": { - "content": "<|reserved_special_token_71|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128080": { - "content": "<|reserved_special_token_72|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128081": { - "content": "<|reserved_special_token_73|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128082": { - "content": "<|reserved_special_token_74|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128083": { - "content": "<|reserved_special_token_75|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128084": { - "content": "<|reserved_special_token_76|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128085": { - "content": "<|reserved_special_token_77|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128086": { - "content": "<|reserved_special_token_78|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128087": { - "content": "<|reserved_special_token_79|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128088": { - "content": "<|reserved_special_token_80|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128089": { - "content": "<|reserved_special_token_81|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128090": { - "content": "<|reserved_special_token_82|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128091": { - "content": "<|reserved_special_token_83|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128092": { - "content": "<|reserved_special_token_84|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128093": { - "content": "<|reserved_special_token_85|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128094": { - "content": "<|reserved_special_token_86|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128095": { - "content": "<|reserved_special_token_87|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128096": { - "content": "<|reserved_special_token_88|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128097": { - "content": "<|reserved_special_token_89|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128098": { - "content": "<|reserved_special_token_90|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128099": { - "content": "<|reserved_special_token_91|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128100": { - "content": "<|reserved_special_token_92|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128101": { - "content": "<|reserved_special_token_93|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128102": { - "content": "<|reserved_special_token_94|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128103": { - "content": "<|reserved_special_token_95|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128104": { - "content": "<|reserved_special_token_96|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128105": { - "content": "<|reserved_special_token_97|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128106": { - "content": "<|reserved_special_token_98|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128107": { - "content": "<|reserved_special_token_99|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128108": { - "content": "<|reserved_special_token_100|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128109": { - "content": "<|reserved_special_token_101|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128110": { - "content": "<|reserved_special_token_102|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128111": { - "content": "<|reserved_special_token_103|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128112": { - "content": "<|reserved_special_token_104|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128113": { - "content": "<|reserved_special_token_105|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128114": { - "content": "<|reserved_special_token_106|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128115": { - "content": "<|reserved_special_token_107|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128116": { - "content": "<|reserved_special_token_108|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128117": { - "content": "<|reserved_special_token_109|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128118": { - "content": "<|reserved_special_token_110|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128119": { - "content": "<|reserved_special_token_111|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128120": { - "content": "<|reserved_special_token_112|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128121": { - "content": "<|reserved_special_token_113|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128122": { - "content": "<|reserved_special_token_114|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128123": { - "content": "<|reserved_special_token_115|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128124": { - "content": "<|reserved_special_token_116|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128125": { - "content": "<|reserved_special_token_117|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128126": { - "content": "<|reserved_special_token_118|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128127": { - "content": "<|reserved_special_token_119|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128128": { - "content": "<|reserved_special_token_120|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128129": { - "content": "<|reserved_special_token_121|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128130": { - "content": "<|reserved_special_token_122|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128131": { - "content": "<|reserved_special_token_123|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128132": { - "content": "<|reserved_special_token_124|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128133": { - "content": "<|reserved_special_token_125|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128134": { - "content": "<|reserved_special_token_126|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128135": { - "content": "<|reserved_special_token_127|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128136": { - "content": "<|reserved_special_token_128|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128137": { - "content": "<|reserved_special_token_129|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128138": { - "content": "<|reserved_special_token_130|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128139": { - "content": "<|reserved_special_token_131|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128140": { - "content": "<|reserved_special_token_132|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128141": { - "content": "<|reserved_special_token_133|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128142": { - "content": "<|reserved_special_token_134|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128143": { - "content": "<|reserved_special_token_135|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128144": { - "content": "<|reserved_special_token_136|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128145": { - "content": "<|reserved_special_token_137|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128146": { - "content": "<|reserved_special_token_138|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128147": { - "content": "<|reserved_special_token_139|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128148": { - "content": "<|reserved_special_token_140|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128149": { - "content": "<|reserved_special_token_141|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128150": { - "content": "<|reserved_special_token_142|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128151": { - "content": "<|reserved_special_token_143|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128152": { - "content": "<|reserved_special_token_144|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128153": { - "content": "<|reserved_special_token_145|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128154": { - "content": "<|reserved_special_token_146|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128155": { - "content": "<|reserved_special_token_147|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128156": { - "content": "<|reserved_special_token_148|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128157": { - "content": "<|reserved_special_token_149|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128158": { - "content": "<|reserved_special_token_150|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128159": { - "content": "<|reserved_special_token_151|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128160": { - "content": "<|reserved_special_token_152|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128161": { - "content": "<|reserved_special_token_153|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128162": { - "content": "<|reserved_special_token_154|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128163": { - "content": "<|reserved_special_token_155|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128164": { - "content": "<|reserved_special_token_156|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128165": { - "content": "<|reserved_special_token_157|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128166": { - "content": "<|reserved_special_token_158|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128167": { - "content": "<|reserved_special_token_159|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128168": { - "content": "<|reserved_special_token_160|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128169": { - "content": "<|reserved_special_token_161|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128170": { - "content": "<|reserved_special_token_162|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128171": { - "content": "<|reserved_special_token_163|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128172": { - "content": "<|reserved_special_token_164|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128173": { - "content": "<|reserved_special_token_165|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128174": { - "content": "<|reserved_special_token_166|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128175": { - "content": "<|reserved_special_token_167|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128176": { - "content": "<|reserved_special_token_168|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128177": { - "content": "<|reserved_special_token_169|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128178": { - "content": "<|reserved_special_token_170|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128179": { - "content": "<|reserved_special_token_171|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128180": { - "content": "<|reserved_special_token_172|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128181": { - "content": "<|reserved_special_token_173|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128182": { - "content": "<|reserved_special_token_174|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128183": { - "content": "<|reserved_special_token_175|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128184": { - "content": "<|reserved_special_token_176|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128185": { - "content": "<|reserved_special_token_177|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128186": { - "content": "<|reserved_special_token_178|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128187": { - "content": "<|reserved_special_token_179|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128188": { - "content": "<|reserved_special_token_180|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128189": { - "content": "<|reserved_special_token_181|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128190": { - "content": "<|reserved_special_token_182|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128191": { - "content": "<|reserved_special_token_183|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128192": { - "content": "<|reserved_special_token_184|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128193": { - "content": "<|reserved_special_token_185|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128194": { - "content": "<|reserved_special_token_186|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128195": { - "content": "<|reserved_special_token_187|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128196": { - "content": "<|reserved_special_token_188|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128197": { - "content": "<|reserved_special_token_189|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128198": { - "content": "<|reserved_special_token_190|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128199": { - "content": "<|reserved_special_token_191|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128200": { - "content": "<|reserved_special_token_192|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128201": { - "content": "<|reserved_special_token_193|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128202": { - "content": "<|reserved_special_token_194|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128203": { - "content": "<|reserved_special_token_195|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128204": { - "content": "<|reserved_special_token_196|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128205": { - "content": "<|reserved_special_token_197|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128206": { - "content": "<|reserved_special_token_198|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128207": { - "content": "<|reserved_special_token_199|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128208": { - "content": "<|reserved_special_token_200|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128209": { - "content": "<|reserved_special_token_201|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128210": { - "content": "<|reserved_special_token_202|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128211": { - "content": "<|reserved_special_token_203|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128212": { - "content": "<|reserved_special_token_204|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128213": { - "content": "<|reserved_special_token_205|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128214": { - "content": "<|reserved_special_token_206|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128215": { - "content": "<|reserved_special_token_207|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128216": { - "content": "<|reserved_special_token_208|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128217": { - "content": "<|reserved_special_token_209|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128218": { - "content": "<|reserved_special_token_210|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128219": { - "content": "<|reserved_special_token_211|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128220": { - "content": "<|reserved_special_token_212|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128221": { - "content": "<|reserved_special_token_213|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128222": { - "content": "<|reserved_special_token_214|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128223": { - "content": "<|reserved_special_token_215|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128224": { - "content": "<|reserved_special_token_216|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128225": { - "content": "<|reserved_special_token_217|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128226": { - "content": "<|reserved_special_token_218|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128227": { - "content": "<|reserved_special_token_219|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128228": { - "content": "<|reserved_special_token_220|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128229": { - "content": "<|reserved_special_token_221|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128230": { - "content": "<|reserved_special_token_222|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128231": { - "content": "<|reserved_special_token_223|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128232": { - "content": "<|reserved_special_token_224|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128233": { - "content": "<|reserved_special_token_225|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128234": { - "content": "<|reserved_special_token_226|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128235": { - "content": "<|reserved_special_token_227|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128236": { - "content": "<|reserved_special_token_228|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128237": { - "content": "<|reserved_special_token_229|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128238": { - "content": "<|reserved_special_token_230|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128239": { - "content": "<|reserved_special_token_231|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128240": { - "content": "<|reserved_special_token_232|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128241": { - "content": "<|reserved_special_token_233|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128242": { - "content": "<|reserved_special_token_234|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128243": { - "content": "<|reserved_special_token_235|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128244": { - "content": "<|reserved_special_token_236|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128245": { - "content": "<|reserved_special_token_237|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128246": { - "content": "<|reserved_special_token_238|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128247": { - "content": "<|reserved_special_token_239|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128248": { - "content": "<|reserved_special_token_240|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128249": { - "content": "<|reserved_special_token_241|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128250": { - "content": "<|reserved_special_token_242|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128251": { - "content": "<|reserved_special_token_243|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128252": { - "content": "<|reserved_special_token_244|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128253": { - "content": "<|reserved_special_token_245|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128254": { - "content": "<|reserved_special_token_246|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128255": { - "content": "<|reserved_special_token_247|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - } - }, - "bos_token": "<|begin_of_text|>", - "clean_up_tokenization_spaces": true, - "eos_token": "<|eot_id|>", - "extra_special_tokens": {}, - "model_input_names": [ - "input_ids", - "attention_mask" - ], - "model_max_length": 131072, - "pad_token": "<|eot_id|>", - "tokenizer_class": "PreTrainedTokenizerFast" -} diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw5.35_4-5-6-7bit/README.md b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw5.35_4-5-6-7bit/README.md deleted file mode 100644 index a7b1845736d57ab6d0c2069f0011dac9cbd654eb..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw5.35_4-5-6-7bit/README.md +++ /dev/null @@ -1,16 +0,0 @@ -# Quantized Model Checkpoint - -**Base model:** meta-llama/Llama-3.1-8B-Instruct - -**Average bitwidth:** 5.3486 - -**Sensitivity method:** shapley - -**Constraints:** -- max_kl: 0.01 - -**Metrics:** -- predicted_kl: 0.009999 -- predicted_eap: 0.961336 - -See `quantization_config.txt` for full configuration details. diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw5.35_4-5-6-7bit/chat_template.jinja b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw5.35_4-5-6-7bit/chat_template.jinja deleted file mode 100644 index 33089ace1be88f22a10fe861ad49718d5d886090..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw5.35_4-5-6-7bit/chat_template.jinja +++ /dev/null @@ -1,109 +0,0 @@ -{{- bos_token }} -{%- if custom_tools is defined %} - {%- set tools = custom_tools %} -{%- endif %} -{%- if not tools_in_user_message is defined %} - {%- set tools_in_user_message = true %} -{%- endif %} -{%- if not date_string is defined %} - {%- set date_string = "26 Jul 2024" %} -{%- endif %} -{%- if not tools is defined %} - {%- set tools = none %} -{%- endif %} - -{#- This block extracts the system message, so we can slot it into the right place. #} -{%- if messages[0]['role'] == 'system' %} - {%- set system_message = messages[0]['content']|trim %} - {%- set messages = messages[1:] %} -{%- else %} - {%- set system_message = "" %} -{%- endif %} - -{#- System message + builtin tools #} -{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} -{%- if builtin_tools is defined or tools is not none %} - {{- "Environment: ipython\n" }} -{%- endif %} -{%- if builtin_tools is defined %} - {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} -{%- endif %} -{{- "Cutting Knowledge Date: December 2023\n" }} -{{- "Today Date: " + date_string + "\n\n" }} -{%- if tools is not none and not tools_in_user_message %} - {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} - {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} - {{- "Do not use variables.\n\n" }} - {%- for t in tools %} - {{- t | tojson(indent=4) }} - {{- "\n\n" }} - {%- endfor %} -{%- endif %} -{{- system_message }} -{{- "<|eot_id|>" }} - -{#- Custom tools are passed in a user message with some extra guidance #} -{%- if tools_in_user_message and not tools is none %} - {#- Extract the first user message so we can plug it in here #} - {%- if messages | length != 0 %} - {%- set first_user_message = messages[0]['content']|trim %} - {%- set messages = messages[1:] %} - {%- else %} - {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} -{%- endif %} - {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} - {{- "Given the following functions, please respond with a JSON for a function call " }} - {{- "with its proper arguments that best answers the given prompt.\n\n" }} - {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} - {{- "Do not use variables.\n\n" }} - {%- for t in tools %} - {{- t | tojson(indent=4) }} - {{- "\n\n" }} - {%- endfor %} - {{- first_user_message + "<|eot_id|>"}} -{%- endif %} - -{%- for message in messages %} - {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} - {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} - {%- elif 'tool_calls' in message %} - {%- if not message.tool_calls|length == 1 %} - {{- raise_exception("This model only supports single tool-calls at once!") }} - {%- endif %} - {%- set tool_call = message.tool_calls[0].function %} - {%- if builtin_tools is defined and tool_call.name in builtin_tools %} - {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} - {{- "<|python_tag|>" + tool_call.name + ".call(" }} - {%- for arg_name, arg_val in tool_call.arguments | items %} - {{- arg_name + '="' + arg_val + '"' }} - {%- if not loop.last %} - {{- ", " }} - {%- endif %} - {%- endfor %} - {{- ")" }} - {%- else %} - {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} - {{- '{"name": "' + tool_call.name + '", ' }} - {{- '"parameters": ' }} - {{- tool_call.arguments | tojson }} - {{- "}" }} - {%- endif %} - {%- if builtin_tools is defined %} - {#- This means we're in ipython mode #} - {{- "<|eom_id|>" }} - {%- else %} - {{- "<|eot_id|>" }} - {%- endif %} - {%- elif message.role == "tool" or message.role == "ipython" %} - {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} - {%- if message.content is mapping or message.content is iterable %} - {{- message.content | tojson }} - {%- else %} - {{- message.content }} - {%- endif %} - {{- "<|eot_id|>" }} - {%- endif %} -{%- endfor %} -{%- if add_generation_prompt %} - {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} -{%- endif %} diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw5.35_4-5-6-7bit/config.json b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw5.35_4-5-6-7bit/config.json deleted file mode 100644 index 2bf71dab1ee4525127aba58e9446aa0a1dd046e6..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw5.35_4-5-6-7bit/config.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "bos_token_id": 128000, - "dtype": "float16", - "eos_token_id": [ - 128001, - 128008, - 128009 - ], - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "transformers_version": "4.57.3", - "use_cache": false, - "vocab_size": 128256 -} diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw5.35_4-5-6-7bit/generation_config.json b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw5.35_4-5-6-7bit/generation_config.json deleted file mode 100644 index 993459bf55ed73c1390809c2e2a3d7c1c0e0d844..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw5.35_4-5-6-7bit/generation_config.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "bos_token_id": 128000, - "do_sample": true, - "eos_token_id": [ - 128001, - 128008, - 128009 - ], - "temperature": 0.6, - "top_p": 0.9, - "transformers_version": "4.57.3" -} diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw5.35_4-5-6-7bit/model-00001-of-00004.safetensors b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw5.35_4-5-6-7bit/model-00001-of-00004.safetensors deleted file mode 100644 index 667b9580e3294a2a8b34cfe321b9c8f87168350a..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw5.35_4-5-6-7bit/model-00001-of-00004.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:337abd4707d98c53d4e566f79474b6e5d4f4475c33149e59e5d095790e3d62dc -size 4976698592 diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw5.35_4-5-6-7bit/model-00002-of-00004.safetensors b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw5.35_4-5-6-7bit/model-00002-of-00004.safetensors deleted file mode 100644 index d21b15b2a7eac123dae48e5be14f90e5a80ad6e3..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw5.35_4-5-6-7bit/model-00002-of-00004.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:088532c31698ede2d759698b7fb781da27a9affe6454973c22cd59f7b97852d6 -size 4999802616 diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw5.35_4-5-6-7bit/model-00003-of-00004.safetensors b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw5.35_4-5-6-7bit/model-00003-of-00004.safetensors deleted file mode 100644 index 48a3e7ade09af9461bcceca335f0b386d6be5d63..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw5.35_4-5-6-7bit/model-00003-of-00004.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:83407311ee1e62d98dd8b4d90504f9e57c94f8581d335cc5571edcbcb9972723 -size 4915916080 diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw5.35_4-5-6-7bit/model-00004-of-00004.safetensors b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw5.35_4-5-6-7bit/model-00004-of-00004.safetensors deleted file mode 100644 index 6e6670704266d0fec2b96d74c46c9122bb614f94..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw5.35_4-5-6-7bit/model-00004-of-00004.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:38b69c0b458651dc79acbd142315cfa6285742af0146ce1fa0df241d9dcffebd -size 1168138808 diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw5.35_4-5-6-7bit/model.safetensors.index.json b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw5.35_4-5-6-7bit/model.safetensors.index.json deleted file mode 100644 index 5c64f1e87be95160fabc494eebfa0f7e68064af2..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw5.35_4-5-6-7bit/model.safetensors.index.json +++ /dev/null @@ -1,299 +0,0 @@ -{ - "metadata": { - "total_parameters": 8030261248, - "total_size": 16060522496 - }, - "weight_map": { - "lm_head.weight": "model-00004-of-00004.safetensors", - "model.embed_tokens.weight": "model-00001-of-00004.safetensors", - "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors", - "model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", - "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.norm.weight": "model-00004-of-00004.safetensors" - } -} diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw5.35_4-5-6-7bit/quantization_config.txt b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw5.35_4-5-6-7bit/quantization_config.txt deleted file mode 100644 index 3938b37a0c0ce61875d480b5780d533514211689..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw5.35_4-5-6-7bit/quantization_config.txt +++ /dev/null @@ -1,251 +0,0 @@ -# Model: meta-llama/Llama-3.1-8B-Instruct -# Layer directory: /nfs/scistore19/alistgrp/mhelcig/local/data/search/4_5_6_7_8bit_asym_g128/Llama-3.1-8B-Instruct/6bit/ -# Sensitivity method: shapley -# Estimation method: permutation_separate -# Available bitwidths: [4, 5, 6, 7, 8] -# Bitwidth map: {4: 4.3125, 5: 5.3125, 6: 6.3125, 7: 7.3125, 8: 8.3125} -# -# Mode: predicted_constraint_search -# Constraint max_kl: 0.01 -# Weights: nll=0.0, kl=0.0, eap=1.0 -# -# Average bitwidth: 5.3486 -# Total params: 6979321856 -# Total bits: 37329305600 -# Predicted KL: 0.009999 -# Predicted EAP: 0.961336 -# Predicted ETL: 0.038664 -# Satisfies constraints: True -# Solver calls: 1 -# Evaluations: 0 -# -# Bitwidth distribution: -# 7-bit: 30 layers (13.4%) -# 6-bit: 51 layers (22.8%) -# 5-bit: 135 layers (60.3%) -# 4-bit: 8 layers (3.6%) -# -model.layers.0.self_attn.q_proj: 5 -model.layers.0.self_attn.k_proj: 6 -model.layers.0.self_attn.v_proj: 6 -model.layers.0.self_attn.o_proj: 5 -model.layers.0.mlp.gate_proj: 5 -model.layers.0.mlp.up_proj: 5 -model.layers.0.mlp.down_proj: 5 -model.layers.1.self_attn.q_proj: 6 -model.layers.1.self_attn.k_proj: 7 -model.layers.1.self_attn.v_proj: 7 -model.layers.1.self_attn.o_proj: 5 -model.layers.1.mlp.gate_proj: 5 -model.layers.1.mlp.up_proj: 5 -model.layers.1.mlp.down_proj: 6 -model.layers.2.self_attn.q_proj: 6 -model.layers.2.self_attn.k_proj: 7 -model.layers.2.self_attn.v_proj: 7 -model.layers.2.self_attn.o_proj: 5 -model.layers.2.mlp.gate_proj: 5 -model.layers.2.mlp.up_proj: 5 -model.layers.2.mlp.down_proj: 5 -model.layers.3.self_attn.q_proj: 5 -model.layers.3.self_attn.k_proj: 7 -model.layers.3.self_attn.v_proj: 7 -model.layers.3.self_attn.o_proj: 5 -model.layers.3.mlp.gate_proj: 5 -model.layers.3.mlp.up_proj: 5 -model.layers.3.mlp.down_proj: 5 -model.layers.4.self_attn.q_proj: 5 -model.layers.4.self_attn.k_proj: 7 -model.layers.4.self_attn.v_proj: 7 -model.layers.4.self_attn.o_proj: 5 -model.layers.4.mlp.gate_proj: 5 -model.layers.4.mlp.up_proj: 5 -model.layers.4.mlp.down_proj: 5 -model.layers.5.self_attn.q_proj: 6 -model.layers.5.self_attn.k_proj: 6 -model.layers.5.self_attn.v_proj: 6 -model.layers.5.self_attn.o_proj: 5 -model.layers.5.mlp.gate_proj: 5 -model.layers.5.mlp.up_proj: 5 -model.layers.5.mlp.down_proj: 5 -model.layers.6.self_attn.q_proj: 6 -model.layers.6.self_attn.k_proj: 6 -model.layers.6.self_attn.v_proj: 6 -model.layers.6.self_attn.o_proj: 5 -model.layers.6.mlp.gate_proj: 5 -model.layers.6.mlp.up_proj: 5 -model.layers.6.mlp.down_proj: 5 -model.layers.7.self_attn.q_proj: 6 -model.layers.7.self_attn.k_proj: 6 -model.layers.7.self_attn.v_proj: 6 -model.layers.7.self_attn.o_proj: 5 -model.layers.7.mlp.gate_proj: 5 -model.layers.7.mlp.up_proj: 5 -model.layers.7.mlp.down_proj: 5 -model.layers.8.self_attn.q_proj: 5 -model.layers.8.self_attn.k_proj: 6 -model.layers.8.self_attn.v_proj: 6 -model.layers.8.self_attn.o_proj: 5 -model.layers.8.mlp.gate_proj: 5 -model.layers.8.mlp.up_proj: 5 -model.layers.8.mlp.down_proj: 4 -model.layers.9.self_attn.q_proj: 6 -model.layers.9.self_attn.k_proj: 7 -model.layers.9.self_attn.v_proj: 7 -model.layers.9.self_attn.o_proj: 5 -model.layers.9.mlp.gate_proj: 4 -model.layers.9.mlp.up_proj: 4 -model.layers.9.mlp.down_proj: 5 -model.layers.10.self_attn.q_proj: 6 -model.layers.10.self_attn.k_proj: 6 -model.layers.10.self_attn.v_proj: 6 -model.layers.10.self_attn.o_proj: 5 -model.layers.10.mlp.gate_proj: 5 -model.layers.10.mlp.up_proj: 5 -model.layers.10.mlp.down_proj: 5 -model.layers.11.self_attn.q_proj: 5 -model.layers.11.self_attn.k_proj: 6 -model.layers.11.self_attn.v_proj: 6 -model.layers.11.self_attn.o_proj: 5 -model.layers.11.mlp.gate_proj: 5 -model.layers.11.mlp.up_proj: 5 -model.layers.11.mlp.down_proj: 5 -model.layers.12.self_attn.q_proj: 5 -model.layers.12.self_attn.k_proj: 6 -model.layers.12.self_attn.v_proj: 6 -model.layers.12.self_attn.o_proj: 5 -model.layers.12.mlp.gate_proj: 5 -model.layers.12.mlp.up_proj: 5 -model.layers.12.mlp.down_proj: 4 -model.layers.13.self_attn.q_proj: 6 -model.layers.13.self_attn.k_proj: 6 -model.layers.13.self_attn.v_proj: 6 -model.layers.13.self_attn.o_proj: 5 -model.layers.13.mlp.gate_proj: 5 -model.layers.13.mlp.up_proj: 5 -model.layers.13.mlp.down_proj: 5 -model.layers.14.self_attn.q_proj: 6 -model.layers.14.self_attn.k_proj: 7 -model.layers.14.self_attn.v_proj: 7 -model.layers.14.self_attn.o_proj: 5 -model.layers.14.mlp.gate_proj: 5 -model.layers.14.mlp.up_proj: 5 -model.layers.14.mlp.down_proj: 5 -model.layers.15.self_attn.q_proj: 6 -model.layers.15.self_attn.k_proj: 7 -model.layers.15.self_attn.v_proj: 7 -model.layers.15.self_attn.o_proj: 5 -model.layers.15.mlp.gate_proj: 5 -model.layers.15.mlp.up_proj: 5 -model.layers.15.mlp.down_proj: 5 -model.layers.16.self_attn.q_proj: 5 -model.layers.16.self_attn.k_proj: 7 -model.layers.16.self_attn.v_proj: 7 -model.layers.16.self_attn.o_proj: 5 -model.layers.16.mlp.gate_proj: 5 -model.layers.16.mlp.up_proj: 5 -model.layers.16.mlp.down_proj: 5 -model.layers.17.self_attn.q_proj: 6 -model.layers.17.self_attn.k_proj: 7 -model.layers.17.self_attn.v_proj: 7 -model.layers.17.self_attn.o_proj: 5 -model.layers.17.mlp.gate_proj: 5 -model.layers.17.mlp.up_proj: 5 -model.layers.17.mlp.down_proj: 5 -model.layers.18.self_attn.q_proj: 6 -model.layers.18.self_attn.k_proj: 6 -model.layers.18.self_attn.v_proj: 6 -model.layers.18.self_attn.o_proj: 5 -model.layers.18.mlp.gate_proj: 5 -model.layers.18.mlp.up_proj: 5 -model.layers.18.mlp.down_proj: 5 -model.layers.19.self_attn.q_proj: 5 -model.layers.19.self_attn.k_proj: 7 -model.layers.19.self_attn.v_proj: 7 -model.layers.19.self_attn.o_proj: 5 -model.layers.19.mlp.gate_proj: 5 -model.layers.19.mlp.up_proj: 5 -model.layers.19.mlp.down_proj: 5 -model.layers.20.self_attn.q_proj: 5 -model.layers.20.self_attn.k_proj: 7 -model.layers.20.self_attn.v_proj: 7 -model.layers.20.self_attn.o_proj: 5 -model.layers.20.mlp.gate_proj: 5 -model.layers.20.mlp.up_proj: 5 -model.layers.20.mlp.down_proj: 5 -model.layers.21.self_attn.q_proj: 6 -model.layers.21.self_attn.k_proj: 7 -model.layers.21.self_attn.v_proj: 7 -model.layers.21.self_attn.o_proj: 5 -model.layers.21.mlp.gate_proj: 4 -model.layers.21.mlp.up_proj: 4 -model.layers.21.mlp.down_proj: 5 -model.layers.22.self_attn.q_proj: 5 -model.layers.22.self_attn.k_proj: 7 -model.layers.22.self_attn.v_proj: 7 -model.layers.22.self_attn.o_proj: 5 -model.layers.22.mlp.gate_proj: 5 -model.layers.22.mlp.up_proj: 5 -model.layers.22.mlp.down_proj: 5 -model.layers.23.self_attn.q_proj: 6 -model.layers.23.self_attn.k_proj: 6 -model.layers.23.self_attn.v_proj: 6 -model.layers.23.self_attn.o_proj: 5 -model.layers.23.mlp.gate_proj: 5 -model.layers.23.mlp.up_proj: 5 -model.layers.23.mlp.down_proj: 5 -model.layers.24.self_attn.q_proj: 5 -model.layers.24.self_attn.k_proj: 6 -model.layers.24.self_attn.v_proj: 6 -model.layers.24.self_attn.o_proj: 5 -model.layers.24.mlp.gate_proj: 5 -model.layers.24.mlp.up_proj: 5 -model.layers.24.mlp.down_proj: 5 -model.layers.25.self_attn.q_proj: 5 -model.layers.25.self_attn.k_proj: 6 -model.layers.25.self_attn.v_proj: 6 -model.layers.25.self_attn.o_proj: 5 -model.layers.25.mlp.gate_proj: 5 -model.layers.25.mlp.up_proj: 5 -model.layers.25.mlp.down_proj: 5 -model.layers.26.self_attn.q_proj: 5 -model.layers.26.self_attn.k_proj: 7 -model.layers.26.self_attn.v_proj: 7 -model.layers.26.self_attn.o_proj: 5 -model.layers.26.mlp.gate_proj: 5 -model.layers.26.mlp.up_proj: 5 -model.layers.26.mlp.down_proj: 4 -model.layers.27.self_attn.q_proj: 5 -model.layers.27.self_attn.k_proj: 6 -model.layers.27.self_attn.v_proj: 6 -model.layers.27.self_attn.o_proj: 5 -model.layers.27.mlp.gate_proj: 5 -model.layers.27.mlp.up_proj: 5 -model.layers.27.mlp.down_proj: 4 -model.layers.28.self_attn.q_proj: 6 -model.layers.28.self_attn.k_proj: 6 -model.layers.28.self_attn.v_proj: 6 -model.layers.28.self_attn.o_proj: 5 -model.layers.28.mlp.gate_proj: 5 -model.layers.28.mlp.up_proj: 5 -model.layers.28.mlp.down_proj: 5 -model.layers.29.self_attn.q_proj: 5 -model.layers.29.self_attn.k_proj: 7 -model.layers.29.self_attn.v_proj: 7 -model.layers.29.self_attn.o_proj: 5 -model.layers.29.mlp.gate_proj: 5 -model.layers.29.mlp.up_proj: 5 -model.layers.29.mlp.down_proj: 5 -model.layers.30.self_attn.q_proj: 5 -model.layers.30.self_attn.k_proj: 6 -model.layers.30.self_attn.v_proj: 6 -model.layers.30.self_attn.o_proj: 5 -model.layers.30.mlp.gate_proj: 5 -model.layers.30.mlp.up_proj: 5 -model.layers.30.mlp.down_proj: 5 -model.layers.31.self_attn.q_proj: 6 -model.layers.31.self_attn.k_proj: 6 -model.layers.31.self_attn.v_proj: 6 -model.layers.31.self_attn.o_proj: 5 -model.layers.31.mlp.gate_proj: 5 -model.layers.31.mlp.up_proj: 5 -model.layers.31.mlp.down_proj: 5 diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw5.35_4-5-6-7bit/special_tokens_map.json b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw5.35_4-5-6-7bit/special_tokens_map.json deleted file mode 100644 index b43be96621d147110fb8a18b5776ec6e38516127..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw5.35_4-5-6-7bit/special_tokens_map.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "bos_token": { - "content": "<|begin_of_text|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false - }, - "eos_token": { - "content": "<|eot_id|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false - }, - "pad_token": "<|eot_id|>" -} diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw5.35_4-5-6-7bit/tokenizer.json b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw5.35_4-5-6-7bit/tokenizer.json deleted file mode 100644 index 1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw5.35_4-5-6-7bit/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b -size 17209920 diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw5.35_4-5-6-7bit/tokenizer_config.json b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw5.35_4-5-6-7bit/tokenizer_config.json deleted file mode 100644 index 3beeacc86a6ca3cae14ad3004263ab74a4bac07a..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw5.35_4-5-6-7bit/tokenizer_config.json +++ /dev/null @@ -1,2063 +0,0 @@ -{ - "added_tokens_decoder": { - "128000": { - "content": "<|begin_of_text|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128001": { - "content": "<|end_of_text|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128002": { - "content": "<|reserved_special_token_0|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128003": { - "content": "<|reserved_special_token_1|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128004": { - "content": "<|finetune_right_pad_id|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128005": { - "content": "<|reserved_special_token_2|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128006": { - "content": "<|start_header_id|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128007": { - "content": "<|end_header_id|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128008": { - "content": "<|eom_id|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128009": { - "content": "<|eot_id|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128010": { - "content": "<|python_tag|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128011": { - "content": "<|reserved_special_token_3|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128012": { - "content": "<|reserved_special_token_4|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128013": { - "content": "<|reserved_special_token_5|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128014": { - "content": "<|reserved_special_token_6|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128015": { - "content": "<|reserved_special_token_7|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128016": { - "content": "<|reserved_special_token_8|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128017": { - "content": "<|reserved_special_token_9|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128018": { - "content": "<|reserved_special_token_10|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128019": { - "content": "<|reserved_special_token_11|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128020": { - "content": "<|reserved_special_token_12|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128021": { - "content": "<|reserved_special_token_13|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128022": { - "content": "<|reserved_special_token_14|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128023": { - "content": "<|reserved_special_token_15|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128024": { - "content": "<|reserved_special_token_16|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128025": { - "content": "<|reserved_special_token_17|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128026": { - "content": "<|reserved_special_token_18|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128027": { - "content": "<|reserved_special_token_19|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128028": { - "content": "<|reserved_special_token_20|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128029": { - "content": "<|reserved_special_token_21|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128030": { - "content": "<|reserved_special_token_22|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128031": { - "content": "<|reserved_special_token_23|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128032": { - "content": "<|reserved_special_token_24|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128033": { - "content": "<|reserved_special_token_25|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128034": { - "content": "<|reserved_special_token_26|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128035": { - "content": "<|reserved_special_token_27|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128036": { - "content": "<|reserved_special_token_28|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128037": { - "content": "<|reserved_special_token_29|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128038": { - "content": "<|reserved_special_token_30|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128039": { - "content": "<|reserved_special_token_31|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128040": { - "content": "<|reserved_special_token_32|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128041": { - "content": "<|reserved_special_token_33|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128042": { - "content": "<|reserved_special_token_34|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128043": { - "content": "<|reserved_special_token_35|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128044": { - "content": "<|reserved_special_token_36|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128045": { - "content": "<|reserved_special_token_37|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128046": { - "content": "<|reserved_special_token_38|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128047": { - "content": "<|reserved_special_token_39|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128048": { - "content": "<|reserved_special_token_40|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128049": { - "content": "<|reserved_special_token_41|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128050": { - "content": "<|reserved_special_token_42|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128051": { - "content": "<|reserved_special_token_43|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128052": { - "content": "<|reserved_special_token_44|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128053": { - "content": "<|reserved_special_token_45|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128054": { - "content": "<|reserved_special_token_46|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128055": { - "content": "<|reserved_special_token_47|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128056": { - "content": "<|reserved_special_token_48|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128057": { - "content": "<|reserved_special_token_49|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128058": { - "content": "<|reserved_special_token_50|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128059": { - "content": "<|reserved_special_token_51|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128060": { - "content": "<|reserved_special_token_52|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128061": { - "content": "<|reserved_special_token_53|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128062": { - "content": "<|reserved_special_token_54|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128063": { - "content": "<|reserved_special_token_55|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128064": { - "content": "<|reserved_special_token_56|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128065": { - "content": "<|reserved_special_token_57|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128066": { - "content": "<|reserved_special_token_58|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128067": { - "content": "<|reserved_special_token_59|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128068": { - "content": "<|reserved_special_token_60|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128069": { - "content": "<|reserved_special_token_61|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128070": { - "content": "<|reserved_special_token_62|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128071": { - "content": "<|reserved_special_token_63|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128072": { - "content": "<|reserved_special_token_64|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128073": { - "content": "<|reserved_special_token_65|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128074": { - "content": "<|reserved_special_token_66|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128075": { - "content": "<|reserved_special_token_67|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128076": { - "content": "<|reserved_special_token_68|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128077": { - "content": "<|reserved_special_token_69|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128078": { - "content": "<|reserved_special_token_70|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128079": { - "content": "<|reserved_special_token_71|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128080": { - "content": "<|reserved_special_token_72|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128081": { - "content": "<|reserved_special_token_73|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128082": { - "content": "<|reserved_special_token_74|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128083": { - "content": "<|reserved_special_token_75|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128084": { - "content": "<|reserved_special_token_76|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128085": { - "content": "<|reserved_special_token_77|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128086": { - "content": "<|reserved_special_token_78|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128087": { - "content": "<|reserved_special_token_79|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128088": { - "content": "<|reserved_special_token_80|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128089": { - "content": "<|reserved_special_token_81|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128090": { - "content": "<|reserved_special_token_82|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128091": { - "content": "<|reserved_special_token_83|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128092": { - "content": "<|reserved_special_token_84|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128093": { - "content": "<|reserved_special_token_85|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128094": { - "content": "<|reserved_special_token_86|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128095": { - "content": "<|reserved_special_token_87|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128096": { - "content": "<|reserved_special_token_88|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128097": { - "content": "<|reserved_special_token_89|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128098": { - "content": "<|reserved_special_token_90|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128099": { - "content": "<|reserved_special_token_91|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128100": { - "content": "<|reserved_special_token_92|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128101": { - "content": "<|reserved_special_token_93|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128102": { - "content": "<|reserved_special_token_94|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128103": { - "content": "<|reserved_special_token_95|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128104": { - "content": "<|reserved_special_token_96|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128105": { - "content": "<|reserved_special_token_97|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128106": { - "content": "<|reserved_special_token_98|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128107": { - "content": "<|reserved_special_token_99|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128108": { - "content": "<|reserved_special_token_100|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128109": { - "content": "<|reserved_special_token_101|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128110": { - "content": "<|reserved_special_token_102|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128111": { - "content": "<|reserved_special_token_103|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128112": { - "content": "<|reserved_special_token_104|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128113": { - "content": "<|reserved_special_token_105|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128114": { - "content": "<|reserved_special_token_106|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128115": { - "content": "<|reserved_special_token_107|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128116": { - "content": "<|reserved_special_token_108|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128117": { - "content": "<|reserved_special_token_109|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128118": { - "content": "<|reserved_special_token_110|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128119": { - "content": "<|reserved_special_token_111|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128120": { - "content": "<|reserved_special_token_112|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128121": { - "content": "<|reserved_special_token_113|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128122": { - "content": "<|reserved_special_token_114|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128123": { - "content": "<|reserved_special_token_115|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128124": { - "content": "<|reserved_special_token_116|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128125": { - "content": "<|reserved_special_token_117|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128126": { - "content": "<|reserved_special_token_118|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128127": { - "content": "<|reserved_special_token_119|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128128": { - "content": "<|reserved_special_token_120|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128129": { - "content": "<|reserved_special_token_121|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128130": { - "content": "<|reserved_special_token_122|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128131": { - "content": "<|reserved_special_token_123|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128132": { - "content": "<|reserved_special_token_124|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128133": { - "content": "<|reserved_special_token_125|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128134": { - "content": "<|reserved_special_token_126|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128135": { - "content": "<|reserved_special_token_127|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128136": { - "content": "<|reserved_special_token_128|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128137": { - "content": "<|reserved_special_token_129|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128138": { - "content": "<|reserved_special_token_130|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128139": { - "content": "<|reserved_special_token_131|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128140": { - "content": "<|reserved_special_token_132|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128141": { - "content": "<|reserved_special_token_133|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128142": { - "content": "<|reserved_special_token_134|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128143": { - "content": "<|reserved_special_token_135|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128144": { - "content": "<|reserved_special_token_136|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128145": { - "content": "<|reserved_special_token_137|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128146": { - "content": "<|reserved_special_token_138|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128147": { - "content": "<|reserved_special_token_139|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128148": { - "content": "<|reserved_special_token_140|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128149": { - "content": "<|reserved_special_token_141|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128150": { - "content": "<|reserved_special_token_142|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128151": { - "content": "<|reserved_special_token_143|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128152": { - "content": "<|reserved_special_token_144|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128153": { - "content": "<|reserved_special_token_145|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128154": { - "content": "<|reserved_special_token_146|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128155": { - "content": "<|reserved_special_token_147|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128156": { - "content": "<|reserved_special_token_148|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128157": { - "content": "<|reserved_special_token_149|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128158": { - "content": "<|reserved_special_token_150|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128159": { - "content": "<|reserved_special_token_151|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128160": { - "content": "<|reserved_special_token_152|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128161": { - "content": "<|reserved_special_token_153|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128162": { - "content": "<|reserved_special_token_154|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128163": { - "content": "<|reserved_special_token_155|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128164": { - "content": "<|reserved_special_token_156|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128165": { - "content": "<|reserved_special_token_157|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128166": { - "content": "<|reserved_special_token_158|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128167": { - "content": "<|reserved_special_token_159|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128168": { - "content": "<|reserved_special_token_160|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128169": { - "content": "<|reserved_special_token_161|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128170": { - "content": "<|reserved_special_token_162|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128171": { - "content": "<|reserved_special_token_163|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128172": { - "content": "<|reserved_special_token_164|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128173": { - "content": "<|reserved_special_token_165|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128174": { - "content": "<|reserved_special_token_166|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128175": { - "content": "<|reserved_special_token_167|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128176": { - "content": "<|reserved_special_token_168|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128177": { - "content": "<|reserved_special_token_169|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128178": { - "content": "<|reserved_special_token_170|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128179": { - "content": "<|reserved_special_token_171|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128180": { - "content": "<|reserved_special_token_172|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128181": { - "content": "<|reserved_special_token_173|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128182": { - "content": "<|reserved_special_token_174|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128183": { - "content": "<|reserved_special_token_175|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128184": { - "content": "<|reserved_special_token_176|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128185": { - "content": "<|reserved_special_token_177|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128186": { - "content": "<|reserved_special_token_178|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128187": { - "content": "<|reserved_special_token_179|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128188": { - "content": "<|reserved_special_token_180|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128189": { - "content": "<|reserved_special_token_181|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128190": { - "content": "<|reserved_special_token_182|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128191": { - "content": "<|reserved_special_token_183|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128192": { - "content": "<|reserved_special_token_184|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128193": { - "content": "<|reserved_special_token_185|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128194": { - "content": "<|reserved_special_token_186|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128195": { - "content": "<|reserved_special_token_187|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128196": { - "content": "<|reserved_special_token_188|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128197": { - "content": "<|reserved_special_token_189|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128198": { - "content": "<|reserved_special_token_190|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128199": { - "content": "<|reserved_special_token_191|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128200": { - "content": "<|reserved_special_token_192|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128201": { - "content": "<|reserved_special_token_193|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128202": { - "content": "<|reserved_special_token_194|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128203": { - "content": "<|reserved_special_token_195|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128204": { - "content": "<|reserved_special_token_196|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128205": { - "content": "<|reserved_special_token_197|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128206": { - "content": "<|reserved_special_token_198|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128207": { - "content": "<|reserved_special_token_199|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128208": { - "content": "<|reserved_special_token_200|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128209": { - "content": "<|reserved_special_token_201|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128210": { - "content": "<|reserved_special_token_202|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128211": { - "content": "<|reserved_special_token_203|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128212": { - "content": "<|reserved_special_token_204|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128213": { - "content": "<|reserved_special_token_205|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128214": { - "content": "<|reserved_special_token_206|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128215": { - "content": "<|reserved_special_token_207|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128216": { - "content": "<|reserved_special_token_208|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128217": { - "content": "<|reserved_special_token_209|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128218": { - "content": "<|reserved_special_token_210|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128219": { - "content": "<|reserved_special_token_211|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128220": { - "content": "<|reserved_special_token_212|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128221": { - "content": "<|reserved_special_token_213|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128222": { - "content": "<|reserved_special_token_214|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128223": { - "content": "<|reserved_special_token_215|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128224": { - "content": "<|reserved_special_token_216|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128225": { - "content": "<|reserved_special_token_217|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128226": { - "content": "<|reserved_special_token_218|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128227": { - "content": "<|reserved_special_token_219|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128228": { - "content": "<|reserved_special_token_220|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128229": { - "content": "<|reserved_special_token_221|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128230": { - "content": "<|reserved_special_token_222|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128231": { - "content": "<|reserved_special_token_223|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128232": { - "content": "<|reserved_special_token_224|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128233": { - "content": "<|reserved_special_token_225|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128234": { - "content": "<|reserved_special_token_226|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128235": { - "content": "<|reserved_special_token_227|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128236": { - "content": "<|reserved_special_token_228|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128237": { - "content": "<|reserved_special_token_229|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128238": { - "content": "<|reserved_special_token_230|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128239": { - "content": "<|reserved_special_token_231|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128240": { - "content": "<|reserved_special_token_232|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128241": { - "content": "<|reserved_special_token_233|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128242": { - "content": "<|reserved_special_token_234|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128243": { - "content": "<|reserved_special_token_235|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128244": { - "content": "<|reserved_special_token_236|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128245": { - "content": "<|reserved_special_token_237|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128246": { - "content": "<|reserved_special_token_238|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128247": { - "content": "<|reserved_special_token_239|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128248": { - "content": "<|reserved_special_token_240|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128249": { - "content": "<|reserved_special_token_241|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128250": { - "content": "<|reserved_special_token_242|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128251": { - "content": "<|reserved_special_token_243|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128252": { - "content": "<|reserved_special_token_244|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128253": { - "content": "<|reserved_special_token_245|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128254": { - "content": "<|reserved_special_token_246|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128255": { - "content": "<|reserved_special_token_247|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - } - }, - "bos_token": "<|begin_of_text|>", - "clean_up_tokenization_spaces": true, - "eos_token": "<|eot_id|>", - "extra_special_tokens": {}, - "model_input_names": [ - "input_ids", - "attention_mask" - ], - "model_max_length": 131072, - "pad_token": "<|eot_id|>", - "tokenizer_class": "PreTrainedTokenizerFast" -} diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw6.88_4-8bit/README.md b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw6.88_4-8bit/README.md deleted file mode 100644 index e03c6947586c8988d514182616070833297e4e47..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw6.88_4-8bit/README.md +++ /dev/null @@ -1,16 +0,0 @@ -# Quantized Model Checkpoint - -**Base model:** meta-llama/Llama-3.1-8B-Instruct - -**Average bitwidth:** 6.8798 - -**Sensitivity method:** shapley - -**Constraints:** -- max_kl: 0.01 - -**Metrics:** -- predicted_kl: 0.009999 -- predicted_eap: 0.979133 - -See `quantization_config.txt` for full configuration details. diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw6.88_4-8bit/chat_template.jinja b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw6.88_4-8bit/chat_template.jinja deleted file mode 100644 index 33089ace1be88f22a10fe861ad49718d5d886090..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw6.88_4-8bit/chat_template.jinja +++ /dev/null @@ -1,109 +0,0 @@ -{{- bos_token }} -{%- if custom_tools is defined %} - {%- set tools = custom_tools %} -{%- endif %} -{%- if not tools_in_user_message is defined %} - {%- set tools_in_user_message = true %} -{%- endif %} -{%- if not date_string is defined %} - {%- set date_string = "26 Jul 2024" %} -{%- endif %} -{%- if not tools is defined %} - {%- set tools = none %} -{%- endif %} - -{#- This block extracts the system message, so we can slot it into the right place. #} -{%- if messages[0]['role'] == 'system' %} - {%- set system_message = messages[0]['content']|trim %} - {%- set messages = messages[1:] %} -{%- else %} - {%- set system_message = "" %} -{%- endif %} - -{#- System message + builtin tools #} -{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} -{%- if builtin_tools is defined or tools is not none %} - {{- "Environment: ipython\n" }} -{%- endif %} -{%- if builtin_tools is defined %} - {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} -{%- endif %} -{{- "Cutting Knowledge Date: December 2023\n" }} -{{- "Today Date: " + date_string + "\n\n" }} -{%- if tools is not none and not tools_in_user_message %} - {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} - {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} - {{- "Do not use variables.\n\n" }} - {%- for t in tools %} - {{- t | tojson(indent=4) }} - {{- "\n\n" }} - {%- endfor %} -{%- endif %} -{{- system_message }} -{{- "<|eot_id|>" }} - -{#- Custom tools are passed in a user message with some extra guidance #} -{%- if tools_in_user_message and not tools is none %} - {#- Extract the first user message so we can plug it in here #} - {%- if messages | length != 0 %} - {%- set first_user_message = messages[0]['content']|trim %} - {%- set messages = messages[1:] %} - {%- else %} - {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} -{%- endif %} - {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} - {{- "Given the following functions, please respond with a JSON for a function call " }} - {{- "with its proper arguments that best answers the given prompt.\n\n" }} - {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} - {{- "Do not use variables.\n\n" }} - {%- for t in tools %} - {{- t | tojson(indent=4) }} - {{- "\n\n" }} - {%- endfor %} - {{- first_user_message + "<|eot_id|>"}} -{%- endif %} - -{%- for message in messages %} - {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} - {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} - {%- elif 'tool_calls' in message %} - {%- if not message.tool_calls|length == 1 %} - {{- raise_exception("This model only supports single tool-calls at once!") }} - {%- endif %} - {%- set tool_call = message.tool_calls[0].function %} - {%- if builtin_tools is defined and tool_call.name in builtin_tools %} - {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} - {{- "<|python_tag|>" + tool_call.name + ".call(" }} - {%- for arg_name, arg_val in tool_call.arguments | items %} - {{- arg_name + '="' + arg_val + '"' }} - {%- if not loop.last %} - {{- ", " }} - {%- endif %} - {%- endfor %} - {{- ")" }} - {%- else %} - {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} - {{- '{"name": "' + tool_call.name + '", ' }} - {{- '"parameters": ' }} - {{- tool_call.arguments | tojson }} - {{- "}" }} - {%- endif %} - {%- if builtin_tools is defined %} - {#- This means we're in ipython mode #} - {{- "<|eom_id|>" }} - {%- else %} - {{- "<|eot_id|>" }} - {%- endif %} - {%- elif message.role == "tool" or message.role == "ipython" %} - {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} - {%- if message.content is mapping or message.content is iterable %} - {{- message.content | tojson }} - {%- else %} - {{- message.content }} - {%- endif %} - {{- "<|eot_id|>" }} - {%- endif %} -{%- endfor %} -{%- if add_generation_prompt %} - {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} -{%- endif %} diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw6.88_4-8bit/config.json b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw6.88_4-8bit/config.json deleted file mode 100644 index 2bf71dab1ee4525127aba58e9446aa0a1dd046e6..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw6.88_4-8bit/config.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "architectures": [ - "LlamaForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "bos_token_id": 128000, - "dtype": "float16", - "eos_token_id": [ - 128001, - 128008, - 128009 - ], - "head_dim": 128, - "hidden_act": "silu", - "hidden_size": 4096, - "initializer_range": 0.02, - "intermediate_size": 14336, - "max_position_embeddings": 131072, - "mlp_bias": false, - "model_type": "llama", - "num_attention_heads": 32, - "num_hidden_layers": 32, - "num_key_value_heads": 8, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "factor": 8.0, - "high_freq_factor": 4.0, - "low_freq_factor": 1.0, - "original_max_position_embeddings": 8192, - "rope_type": "llama3" - }, - "rope_theta": 500000.0, - "tie_word_embeddings": false, - "transformers_version": "4.57.3", - "use_cache": false, - "vocab_size": 128256 -} diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw6.88_4-8bit/generation_config.json b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw6.88_4-8bit/generation_config.json deleted file mode 100644 index 993459bf55ed73c1390809c2e2a3d7c1c0e0d844..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw6.88_4-8bit/generation_config.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "bos_token_id": 128000, - "do_sample": true, - "eos_token_id": [ - 128001, - 128008, - 128009 - ], - "temperature": 0.6, - "top_p": 0.9, - "transformers_version": "4.57.3" -} diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw6.88_4-8bit/model-00001-of-00004.safetensors b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw6.88_4-8bit/model-00001-of-00004.safetensors deleted file mode 100644 index 8dd035b8db49648b40d72962741c1c58f6311448..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw6.88_4-8bit/model-00001-of-00004.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:43ccd05418065e5afea0fbebc62ceff90c0c6427c09984e35e1db322ab04030c -size 4976698592 diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw6.88_4-8bit/model-00002-of-00004.safetensors b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw6.88_4-8bit/model-00002-of-00004.safetensors deleted file mode 100644 index 92df7c2459bc94e4e08eaada3a1457feaed44a8e..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw6.88_4-8bit/model-00002-of-00004.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:022116f33211e960b2fcff1d546bcadc127ecca67fafc65dc9d6ebc4a70bc4a0 -size 4999802616 diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw6.88_4-8bit/model-00003-of-00004.safetensors b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw6.88_4-8bit/model-00003-of-00004.safetensors deleted file mode 100644 index 5cbf7b1f8ef39819d45f5a8b897ae3bf7094705a..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw6.88_4-8bit/model-00003-of-00004.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1dda5a94237559c0fa90135fa4b63aa10e9492aec1f1b47205bcdc3ca200b374 -size 4915916080 diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw6.88_4-8bit/model-00004-of-00004.safetensors b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw6.88_4-8bit/model-00004-of-00004.safetensors deleted file mode 100644 index b09e9cdcb05f5cf77b3ef3a8ffa431eb33ad02b9..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw6.88_4-8bit/model-00004-of-00004.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:44408391c116c33adf6e43ab53f84d75bee5e2956b293c34dc60509fb0fd825b -size 1168138808 diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw6.88_4-8bit/model.safetensors.index.json b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw6.88_4-8bit/model.safetensors.index.json deleted file mode 100644 index 5c64f1e87be95160fabc494eebfa0f7e68064af2..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw6.88_4-8bit/model.safetensors.index.json +++ /dev/null @@ -1,299 +0,0 @@ -{ - "metadata": { - "total_parameters": 8030261248, - "total_size": 16060522496 - }, - "weight_map": { - "lm_head.weight": "model-00004-of-00004.safetensors", - "model.embed_tokens.weight": "model-00001-of-00004.safetensors", - "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors", - "model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors", - "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", - "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.norm.weight": "model-00004-of-00004.safetensors" - } -} diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw6.88_4-8bit/quantization_config.txt b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw6.88_4-8bit/quantization_config.txt deleted file mode 100644 index 04eafe2e51b1920a7b350b6c297eda7396fbdfe0..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw6.88_4-8bit/quantization_config.txt +++ /dev/null @@ -1,249 +0,0 @@ -# Model: meta-llama/Llama-3.1-8B-Instruct -# Layer directory: /nfs/scistore19/alistgrp/mhelcig/local/data/search/4_5_6_7_8bit_asym_g128/Llama-3.1-8B-Instruct/6bit/ -# Sensitivity method: shapley -# Estimation method: permutation_separate -# Available bitwidths: [4, 8] -# Bitwidth map: {4: 4.3125, 5: 5.3125, 6: 6.3125, 7: 7.3125, 8: 8.3125} -# -# Mode: predicted_constraint_search -# Constraint max_kl: 0.01 -# Weights: nll=0.0, kl=0.0, eap=1.0 -# -# Average bitwidth: 6.8798 -# Total params: 6979321856 -# Total bits: 48016392192 -# Predicted KL: 0.009999 -# Predicted EAP: 0.979133 -# Predicted ETL: 0.020867 -# Satisfies constraints: True -# Solver calls: 1 -# Evaluations: 0 -# -# Bitwidth distribution: -# 8-bit: 180 layers (80.4%) -# 4-bit: 44 layers (19.6%) -# -model.layers.0.self_attn.q_proj: 8 -model.layers.0.self_attn.k_proj: 8 -model.layers.0.self_attn.v_proj: 8 -model.layers.0.self_attn.o_proj: 8 -model.layers.0.mlp.gate_proj: 4 -model.layers.0.mlp.up_proj: 4 -model.layers.0.mlp.down_proj: 4 -model.layers.1.self_attn.q_proj: 8 -model.layers.1.self_attn.k_proj: 8 -model.layers.1.self_attn.v_proj: 8 -model.layers.1.self_attn.o_proj: 8 -model.layers.1.mlp.gate_proj: 4 -model.layers.1.mlp.up_proj: 4 -model.layers.1.mlp.down_proj: 8 -model.layers.2.self_attn.q_proj: 8 -model.layers.2.self_attn.k_proj: 8 -model.layers.2.self_attn.v_proj: 8 -model.layers.2.self_attn.o_proj: 8 -model.layers.2.mlp.gate_proj: 8 -model.layers.2.mlp.up_proj: 8 -model.layers.2.mlp.down_proj: 4 -model.layers.3.self_attn.q_proj: 8 -model.layers.3.self_attn.k_proj: 8 -model.layers.3.self_attn.v_proj: 8 -model.layers.3.self_attn.o_proj: 8 -model.layers.3.mlp.gate_proj: 8 -model.layers.3.mlp.up_proj: 8 -model.layers.3.mlp.down_proj: 4 -model.layers.4.self_attn.q_proj: 8 -model.layers.4.self_attn.k_proj: 8 -model.layers.4.self_attn.v_proj: 8 -model.layers.4.self_attn.o_proj: 8 -model.layers.4.mlp.gate_proj: 8 -model.layers.4.mlp.up_proj: 8 -model.layers.4.mlp.down_proj: 4 -model.layers.5.self_attn.q_proj: 8 -model.layers.5.self_attn.k_proj: 8 -model.layers.5.self_attn.v_proj: 8 -model.layers.5.self_attn.o_proj: 4 -model.layers.5.mlp.gate_proj: 8 -model.layers.5.mlp.up_proj: 8 -model.layers.5.mlp.down_proj: 4 -model.layers.6.self_attn.q_proj: 8 -model.layers.6.self_attn.k_proj: 8 -model.layers.6.self_attn.v_proj: 8 -model.layers.6.self_attn.o_proj: 8 -model.layers.6.mlp.gate_proj: 8 -model.layers.6.mlp.up_proj: 8 -model.layers.6.mlp.down_proj: 4 -model.layers.7.self_attn.q_proj: 8 -model.layers.7.self_attn.k_proj: 8 -model.layers.7.self_attn.v_proj: 8 -model.layers.7.self_attn.o_proj: 8 -model.layers.7.mlp.gate_proj: 8 -model.layers.7.mlp.up_proj: 8 -model.layers.7.mlp.down_proj: 4 -model.layers.8.self_attn.q_proj: 8 -model.layers.8.self_attn.k_proj: 8 -model.layers.8.self_attn.v_proj: 8 -model.layers.8.self_attn.o_proj: 8 -model.layers.8.mlp.gate_proj: 4 -model.layers.8.mlp.up_proj: 4 -model.layers.8.mlp.down_proj: 4 -model.layers.9.self_attn.q_proj: 8 -model.layers.9.self_attn.k_proj: 8 -model.layers.9.self_attn.v_proj: 8 -model.layers.9.self_attn.o_proj: 8 -model.layers.9.mlp.gate_proj: 4 -model.layers.9.mlp.up_proj: 4 -model.layers.9.mlp.down_proj: 4 -model.layers.10.self_attn.q_proj: 8 -model.layers.10.self_attn.k_proj: 8 -model.layers.10.self_attn.v_proj: 8 -model.layers.10.self_attn.o_proj: 8 -model.layers.10.mlp.gate_proj: 4 -model.layers.10.mlp.up_proj: 4 -model.layers.10.mlp.down_proj: 4 -model.layers.11.self_attn.q_proj: 8 -model.layers.11.self_attn.k_proj: 8 -model.layers.11.self_attn.v_proj: 8 -model.layers.11.self_attn.o_proj: 8 -model.layers.11.mlp.gate_proj: 8 -model.layers.11.mlp.up_proj: 8 -model.layers.11.mlp.down_proj: 4 -model.layers.12.self_attn.q_proj: 8 -model.layers.12.self_attn.k_proj: 8 -model.layers.12.self_attn.v_proj: 8 -model.layers.12.self_attn.o_proj: 8 -model.layers.12.mlp.gate_proj: 4 -model.layers.12.mlp.up_proj: 4 -model.layers.12.mlp.down_proj: 4 -model.layers.13.self_attn.q_proj: 8 -model.layers.13.self_attn.k_proj: 8 -model.layers.13.self_attn.v_proj: 8 -model.layers.13.self_attn.o_proj: 8 -model.layers.13.mlp.gate_proj: 4 -model.layers.13.mlp.up_proj: 4 -model.layers.13.mlp.down_proj: 4 -model.layers.14.self_attn.q_proj: 8 -model.layers.14.self_attn.k_proj: 8 -model.layers.14.self_attn.v_proj: 8 -model.layers.14.self_attn.o_proj: 8 -model.layers.14.mlp.gate_proj: 8 -model.layers.14.mlp.up_proj: 8 -model.layers.14.mlp.down_proj: 4 -model.layers.15.self_attn.q_proj: 8 -model.layers.15.self_attn.k_proj: 8 -model.layers.15.self_attn.v_proj: 8 -model.layers.15.self_attn.o_proj: 8 -model.layers.15.mlp.gate_proj: 4 -model.layers.15.mlp.up_proj: 4 -model.layers.15.mlp.down_proj: 4 -model.layers.16.self_attn.q_proj: 8 -model.layers.16.self_attn.k_proj: 8 -model.layers.16.self_attn.v_proj: 8 -model.layers.16.self_attn.o_proj: 8 -model.layers.16.mlp.gate_proj: 8 -model.layers.16.mlp.up_proj: 8 -model.layers.16.mlp.down_proj: 4 -model.layers.17.self_attn.q_proj: 8 -model.layers.17.self_attn.k_proj: 8 -model.layers.17.self_attn.v_proj: 8 -model.layers.17.self_attn.o_proj: 8 -model.layers.17.mlp.gate_proj: 8 -model.layers.17.mlp.up_proj: 8 -model.layers.17.mlp.down_proj: 8 -model.layers.18.self_attn.q_proj: 8 -model.layers.18.self_attn.k_proj: 8 -model.layers.18.self_attn.v_proj: 8 -model.layers.18.self_attn.o_proj: 8 -model.layers.18.mlp.gate_proj: 8 -model.layers.18.mlp.up_proj: 8 -model.layers.18.mlp.down_proj: 4 -model.layers.19.self_attn.q_proj: 8 -model.layers.19.self_attn.k_proj: 8 -model.layers.19.self_attn.v_proj: 8 -model.layers.19.self_attn.o_proj: 8 -model.layers.19.mlp.gate_proj: 8 -model.layers.19.mlp.up_proj: 8 -model.layers.19.mlp.down_proj: 4 -model.layers.20.self_attn.q_proj: 8 -model.layers.20.self_attn.k_proj: 8 -model.layers.20.self_attn.v_proj: 8 -model.layers.20.self_attn.o_proj: 8 -model.layers.20.mlp.gate_proj: 8 -model.layers.20.mlp.up_proj: 8 -model.layers.20.mlp.down_proj: 8 -model.layers.21.self_attn.q_proj: 8 -model.layers.21.self_attn.k_proj: 8 -model.layers.21.self_attn.v_proj: 8 -model.layers.21.self_attn.o_proj: 8 -model.layers.21.mlp.gate_proj: 4 -model.layers.21.mlp.up_proj: 4 -model.layers.21.mlp.down_proj: 8 -model.layers.22.self_attn.q_proj: 8 -model.layers.22.self_attn.k_proj: 8 -model.layers.22.self_attn.v_proj: 8 -model.layers.22.self_attn.o_proj: 8 -model.layers.22.mlp.gate_proj: 8 -model.layers.22.mlp.up_proj: 8 -model.layers.22.mlp.down_proj: 8 -model.layers.23.self_attn.q_proj: 8 -model.layers.23.self_attn.k_proj: 8 -model.layers.23.self_attn.v_proj: 8 -model.layers.23.self_attn.o_proj: 4 -model.layers.23.mlp.gate_proj: 8 -model.layers.23.mlp.up_proj: 8 -model.layers.23.mlp.down_proj: 8 -model.layers.24.self_attn.q_proj: 8 -model.layers.24.self_attn.k_proj: 8 -model.layers.24.self_attn.v_proj: 8 -model.layers.24.self_attn.o_proj: 8 -model.layers.24.mlp.gate_proj: 8 -model.layers.24.mlp.up_proj: 8 -model.layers.24.mlp.down_proj: 8 -model.layers.25.self_attn.q_proj: 8 -model.layers.25.self_attn.k_proj: 8 -model.layers.25.self_attn.v_proj: 8 -model.layers.25.self_attn.o_proj: 8 -model.layers.25.mlp.gate_proj: 8 -model.layers.25.mlp.up_proj: 8 -model.layers.25.mlp.down_proj: 4 -model.layers.26.self_attn.q_proj: 8 -model.layers.26.self_attn.k_proj: 8 -model.layers.26.self_attn.v_proj: 8 -model.layers.26.self_attn.o_proj: 8 -model.layers.26.mlp.gate_proj: 8 -model.layers.26.mlp.up_proj: 8 -model.layers.26.mlp.down_proj: 4 -model.layers.27.self_attn.q_proj: 8 -model.layers.27.self_attn.k_proj: 8 -model.layers.27.self_attn.v_proj: 8 -model.layers.27.self_attn.o_proj: 8 -model.layers.27.mlp.gate_proj: 8 -model.layers.27.mlp.up_proj: 8 -model.layers.27.mlp.down_proj: 4 -model.layers.28.self_attn.q_proj: 8 -model.layers.28.self_attn.k_proj: 8 -model.layers.28.self_attn.v_proj: 8 -model.layers.28.self_attn.o_proj: 8 -model.layers.28.mlp.gate_proj: 8 -model.layers.28.mlp.up_proj: 8 -model.layers.28.mlp.down_proj: 4 -model.layers.29.self_attn.q_proj: 8 -model.layers.29.self_attn.k_proj: 8 -model.layers.29.self_attn.v_proj: 8 -model.layers.29.self_attn.o_proj: 8 -model.layers.29.mlp.gate_proj: 8 -model.layers.29.mlp.up_proj: 8 -model.layers.29.mlp.down_proj: 4 -model.layers.30.self_attn.q_proj: 8 -model.layers.30.self_attn.k_proj: 8 -model.layers.30.self_attn.v_proj: 8 -model.layers.30.self_attn.o_proj: 8 -model.layers.30.mlp.gate_proj: 8 -model.layers.30.mlp.up_proj: 8 -model.layers.30.mlp.down_proj: 4 -model.layers.31.self_attn.q_proj: 8 -model.layers.31.self_attn.k_proj: 8 -model.layers.31.self_attn.v_proj: 8 -model.layers.31.self_attn.o_proj: 8 -model.layers.31.mlp.gate_proj: 8 -model.layers.31.mlp.up_proj: 8 -model.layers.31.mlp.down_proj: 8 diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw6.88_4-8bit/special_tokens_map.json b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw6.88_4-8bit/special_tokens_map.json deleted file mode 100644 index b43be96621d147110fb8a18b5776ec6e38516127..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw6.88_4-8bit/special_tokens_map.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "bos_token": { - "content": "<|begin_of_text|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false - }, - "eos_token": { - "content": "<|eot_id|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false - }, - "pad_token": "<|eot_id|>" -} diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw6.88_4-8bit/tokenizer.json b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw6.88_4-8bit/tokenizer.json deleted file mode 100644 index 1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw6.88_4-8bit/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b -size 17209920 diff --git a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw6.88_4-8bit/tokenizer_config.json b/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw6.88_4-8bit/tokenizer_config.json deleted file mode 100644 index 3beeacc86a6ca3cae14ad3004263ab74a4bac07a..0000000000000000000000000000000000000000 --- a/Llama-3.1-8B-Instruct/ll_pred_kl0.01_sha_bw6.88_4-8bit/tokenizer_config.json +++ /dev/null @@ -1,2063 +0,0 @@ -{ - "added_tokens_decoder": { - "128000": { - "content": "<|begin_of_text|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128001": { - "content": "<|end_of_text|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128002": { - "content": "<|reserved_special_token_0|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128003": { - "content": "<|reserved_special_token_1|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128004": { - "content": "<|finetune_right_pad_id|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128005": { - "content": "<|reserved_special_token_2|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128006": { - "content": "<|start_header_id|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128007": { - "content": "<|end_header_id|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128008": { - "content": "<|eom_id|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128009": { - "content": "<|eot_id|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128010": { - "content": "<|python_tag|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128011": { - "content": "<|reserved_special_token_3|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128012": { - "content": "<|reserved_special_token_4|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128013": { - "content": "<|reserved_special_token_5|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128014": { - "content": "<|reserved_special_token_6|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128015": { - "content": "<|reserved_special_token_7|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128016": { - "content": "<|reserved_special_token_8|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128017": { - "content": "<|reserved_special_token_9|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128018": { - "content": "<|reserved_special_token_10|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128019": { - "content": "<|reserved_special_token_11|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128020": { - "content": "<|reserved_special_token_12|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128021": { - "content": "<|reserved_special_token_13|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128022": { - "content": "<|reserved_special_token_14|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128023": { - "content": "<|reserved_special_token_15|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128024": { - "content": "<|reserved_special_token_16|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128025": { - "content": "<|reserved_special_token_17|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128026": { - "content": "<|reserved_special_token_18|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128027": { - "content": "<|reserved_special_token_19|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128028": { - "content": "<|reserved_special_token_20|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128029": { - "content": "<|reserved_special_token_21|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128030": { - "content": "<|reserved_special_token_22|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128031": { - "content": "<|reserved_special_token_23|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128032": { - "content": "<|reserved_special_token_24|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128033": { - "content": "<|reserved_special_token_25|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128034": { - "content": "<|reserved_special_token_26|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128035": { - "content": "<|reserved_special_token_27|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128036": { - "content": "<|reserved_special_token_28|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128037": { - "content": "<|reserved_special_token_29|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128038": { - "content": "<|reserved_special_token_30|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128039": { - "content": "<|reserved_special_token_31|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128040": { - "content": "<|reserved_special_token_32|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128041": { - "content": "<|reserved_special_token_33|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128042": { - "content": "<|reserved_special_token_34|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128043": { - "content": "<|reserved_special_token_35|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128044": { - "content": "<|reserved_special_token_36|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128045": { - "content": "<|reserved_special_token_37|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128046": { - "content": "<|reserved_special_token_38|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128047": { - "content": "<|reserved_special_token_39|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128048": { - "content": "<|reserved_special_token_40|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128049": { - "content": "<|reserved_special_token_41|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128050": { - "content": "<|reserved_special_token_42|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128051": { - "content": "<|reserved_special_token_43|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128052": { - "content": "<|reserved_special_token_44|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128053": { - "content": "<|reserved_special_token_45|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128054": { - "content": "<|reserved_special_token_46|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128055": { - "content": "<|reserved_special_token_47|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128056": { - "content": "<|reserved_special_token_48|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128057": { - "content": "<|reserved_special_token_49|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128058": { - "content": "<|reserved_special_token_50|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128059": { - "content": "<|reserved_special_token_51|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128060": { - "content": "<|reserved_special_token_52|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128061": { - "content": "<|reserved_special_token_53|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128062": { - "content": "<|reserved_special_token_54|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128063": { - "content": "<|reserved_special_token_55|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128064": { - "content": "<|reserved_special_token_56|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128065": { - "content": "<|reserved_special_token_57|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128066": { - "content": "<|reserved_special_token_58|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128067": { - "content": "<|reserved_special_token_59|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128068": { - "content": "<|reserved_special_token_60|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128069": { - "content": "<|reserved_special_token_61|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128070": { - "content": "<|reserved_special_token_62|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128071": { - "content": "<|reserved_special_token_63|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128072": { - "content": "<|reserved_special_token_64|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128073": { - "content": "<|reserved_special_token_65|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128074": { - "content": "<|reserved_special_token_66|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128075": { - "content": "<|reserved_special_token_67|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128076": { - "content": "<|reserved_special_token_68|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128077": { - "content": "<|reserved_special_token_69|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128078": { - "content": "<|reserved_special_token_70|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128079": { - "content": "<|reserved_special_token_71|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128080": { - "content": "<|reserved_special_token_72|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128081": { - "content": "<|reserved_special_token_73|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128082": { - "content": "<|reserved_special_token_74|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128083": { - "content": "<|reserved_special_token_75|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128084": { - "content": "<|reserved_special_token_76|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128085": { - "content": "<|reserved_special_token_77|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128086": { - "content": "<|reserved_special_token_78|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128087": { - "content": "<|reserved_special_token_79|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128088": { - "content": "<|reserved_special_token_80|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128089": { - "content": "<|reserved_special_token_81|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128090": { - "content": "<|reserved_special_token_82|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128091": { - "content": "<|reserved_special_token_83|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128092": { - "content": "<|reserved_special_token_84|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128093": { - "content": "<|reserved_special_token_85|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128094": { - "content": "<|reserved_special_token_86|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128095": { - "content": "<|reserved_special_token_87|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128096": { - "content": "<|reserved_special_token_88|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128097": { - "content": "<|reserved_special_token_89|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128098": { - "content": "<|reserved_special_token_90|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128099": { - "content": "<|reserved_special_token_91|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128100": { - "content": "<|reserved_special_token_92|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128101": { - "content": "<|reserved_special_token_93|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128102": { - "content": "<|reserved_special_token_94|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128103": { - "content": "<|reserved_special_token_95|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128104": { - "content": "<|reserved_special_token_96|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128105": { - "content": "<|reserved_special_token_97|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128106": { - "content": "<|reserved_special_token_98|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128107": { - "content": "<|reserved_special_token_99|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128108": { - "content": "<|reserved_special_token_100|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128109": { - "content": "<|reserved_special_token_101|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128110": { - "content": "<|reserved_special_token_102|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128111": { - "content": "<|reserved_special_token_103|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128112": { - "content": "<|reserved_special_token_104|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128113": { - "content": "<|reserved_special_token_105|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128114": { - "content": "<|reserved_special_token_106|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128115": { - "content": "<|reserved_special_token_107|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128116": { - "content": "<|reserved_special_token_108|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128117": { - "content": "<|reserved_special_token_109|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128118": { - "content": "<|reserved_special_token_110|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128119": { - "content": "<|reserved_special_token_111|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128120": { - "content": "<|reserved_special_token_112|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128121": { - "content": "<|reserved_special_token_113|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128122": { - "content": "<|reserved_special_token_114|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128123": { - "content": "<|reserved_special_token_115|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128124": { - "content": "<|reserved_special_token_116|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128125": { - "content": "<|reserved_special_token_117|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128126": { - "content": "<|reserved_special_token_118|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128127": { - "content": "<|reserved_special_token_119|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128128": { - "content": "<|reserved_special_token_120|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128129": { - "content": "<|reserved_special_token_121|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128130": { - "content": "<|reserved_special_token_122|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128131": { - "content": "<|reserved_special_token_123|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128132": { - "content": "<|reserved_special_token_124|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128133": { - "content": "<|reserved_special_token_125|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128134": { - "content": "<|reserved_special_token_126|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128135": { - "content": "<|reserved_special_token_127|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128136": { - "content": "<|reserved_special_token_128|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128137": { - "content": "<|reserved_special_token_129|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128138": { - "content": "<|reserved_special_token_130|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128139": { - "content": "<|reserved_special_token_131|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128140": { - "content": "<|reserved_special_token_132|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128141": { - "content": "<|reserved_special_token_133|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128142": { - "content": "<|reserved_special_token_134|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128143": { - "content": "<|reserved_special_token_135|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128144": { - "content": "<|reserved_special_token_136|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128145": { - "content": "<|reserved_special_token_137|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128146": { - "content": "<|reserved_special_token_138|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128147": { - "content": "<|reserved_special_token_139|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128148": { - "content": "<|reserved_special_token_140|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128149": { - "content": "<|reserved_special_token_141|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128150": { - "content": "<|reserved_special_token_142|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128151": { - "content": "<|reserved_special_token_143|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128152": { - "content": "<|reserved_special_token_144|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128153": { - "content": "<|reserved_special_token_145|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128154": { - "content": "<|reserved_special_token_146|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128155": { - "content": "<|reserved_special_token_147|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128156": { - "content": "<|reserved_special_token_148|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128157": { - "content": "<|reserved_special_token_149|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128158": { - "content": "<|reserved_special_token_150|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128159": { - "content": "<|reserved_special_token_151|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128160": { - "content": "<|reserved_special_token_152|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128161": { - "content": "<|reserved_special_token_153|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128162": { - "content": "<|reserved_special_token_154|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128163": { - "content": "<|reserved_special_token_155|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128164": { - "content": "<|reserved_special_token_156|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128165": { - "content": "<|reserved_special_token_157|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128166": { - "content": "<|reserved_special_token_158|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128167": { - "content": "<|reserved_special_token_159|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128168": { - "content": "<|reserved_special_token_160|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128169": { - "content": "<|reserved_special_token_161|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128170": { - "content": "<|reserved_special_token_162|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128171": { - "content": "<|reserved_special_token_163|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128172": { - "content": "<|reserved_special_token_164|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128173": { - "content": "<|reserved_special_token_165|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128174": { - "content": "<|reserved_special_token_166|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128175": { - "content": "<|reserved_special_token_167|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128176": { - "content": "<|reserved_special_token_168|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128177": { - "content": "<|reserved_special_token_169|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128178": { - "content": "<|reserved_special_token_170|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128179": { - "content": "<|reserved_special_token_171|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128180": { - "content": "<|reserved_special_token_172|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128181": { - "content": "<|reserved_special_token_173|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128182": { - "content": "<|reserved_special_token_174|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128183": { - "content": "<|reserved_special_token_175|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128184": { - "content": "<|reserved_special_token_176|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128185": { - "content": "<|reserved_special_token_177|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128186": { - "content": "<|reserved_special_token_178|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128187": { - "content": "<|reserved_special_token_179|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128188": { - "content": "<|reserved_special_token_180|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128189": { - "content": "<|reserved_special_token_181|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128190": { - "content": "<|reserved_special_token_182|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128191": { - "content": "<|reserved_special_token_183|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128192": { - "content": "<|reserved_special_token_184|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128193": { - "content": "<|reserved_special_token_185|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128194": { - "content": "<|reserved_special_token_186|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128195": { - "content": "<|reserved_special_token_187|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128196": { - "content": "<|reserved_special_token_188|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128197": { - "content": "<|reserved_special_token_189|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128198": { - "content": "<|reserved_special_token_190|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128199": { - "content": "<|reserved_special_token_191|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128200": { - "content": "<|reserved_special_token_192|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128201": { - "content": "<|reserved_special_token_193|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128202": { - "content": "<|reserved_special_token_194|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128203": { - "content": "<|reserved_special_token_195|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128204": { - "content": "<|reserved_special_token_196|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128205": { - "content": "<|reserved_special_token_197|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128206": { - "content": "<|reserved_special_token_198|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128207": { - "content": "<|reserved_special_token_199|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128208": { - "content": "<|reserved_special_token_200|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128209": { - "content": "<|reserved_special_token_201|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128210": { - "content": "<|reserved_special_token_202|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128211": { - "content": "<|reserved_special_token_203|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128212": { - "content": "<|reserved_special_token_204|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128213": { - "content": "<|reserved_special_token_205|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128214": { - "content": "<|reserved_special_token_206|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128215": { - "content": "<|reserved_special_token_207|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128216": { - "content": "<|reserved_special_token_208|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128217": { - "content": "<|reserved_special_token_209|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128218": { - "content": "<|reserved_special_token_210|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128219": { - "content": "<|reserved_special_token_211|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128220": { - "content": "<|reserved_special_token_212|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128221": { - "content": "<|reserved_special_token_213|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128222": { - "content": "<|reserved_special_token_214|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128223": { - "content": "<|reserved_special_token_215|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128224": { - "content": "<|reserved_special_token_216|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128225": { - "content": "<|reserved_special_token_217|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128226": { - "content": "<|reserved_special_token_218|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128227": { - "content": "<|reserved_special_token_219|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128228": { - "content": "<|reserved_special_token_220|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128229": { - "content": "<|reserved_special_token_221|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128230": { - "content": "<|reserved_special_token_222|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128231": { - "content": "<|reserved_special_token_223|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128232": { - "content": "<|reserved_special_token_224|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128233": { - "content": "<|reserved_special_token_225|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128234": { - "content": "<|reserved_special_token_226|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128235": { - "content": "<|reserved_special_token_227|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128236": { - "content": "<|reserved_special_token_228|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128237": { - "content": "<|reserved_special_token_229|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128238": { - "content": "<|reserved_special_token_230|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128239": { - "content": "<|reserved_special_token_231|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128240": { - "content": "<|reserved_special_token_232|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128241": { - "content": "<|reserved_special_token_233|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128242": { - "content": "<|reserved_special_token_234|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128243": { - "content": "<|reserved_special_token_235|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128244": { - "content": "<|reserved_special_token_236|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128245": { - "content": "<|reserved_special_token_237|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128246": { - "content": "<|reserved_special_token_238|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128247": { - "content": "<|reserved_special_token_239|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128248": { - "content": "<|reserved_special_token_240|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128249": { - "content": "<|reserved_special_token_241|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128250": { - "content": "<|reserved_special_token_242|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128251": { - "content": "<|reserved_special_token_243|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128252": { - "content": "<|reserved_special_token_244|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128253": { - "content": "<|reserved_special_token_245|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128254": { - "content": "<|reserved_special_token_246|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "128255": { - "content": "<|reserved_special_token_247|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - } - }, - "bos_token": "<|begin_of_text|>", - "clean_up_tokenization_spaces": true, - "eos_token": "<|eot_id|>", - "extra_special_tokens": {}, - "model_input_names": [ - "input_ids", - "attention_mask" - ], - "model_max_length": 131072, - "pad_token": "<|eot_id|>", - "tokenizer_class": "PreTrainedTokenizerFast" -}