diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..aa7aacd0134a92c3c1943fdecc75cd8b7420cce6 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +model.safetensors.index.json filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..2ab98ef068d62829d17c5ade1827b9f013fa2bbf --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,86 @@ +[gMASK] +{%- if tools -%} +<|system|> +# Tools + +You may call one or more functions to assist with the user query. + +You are provided with function signatures within XML tags: + +{% for tool in tools %} +{{ tool | tojson(ensure_ascii=False) }} +{% endfor %} + + +For each function call, output the function name and arguments within the following XML format: +{function-name}{arg-key-1}{arg-value-1}{arg-key-2}{arg-value-2}...{%- endif -%} +{%- macro visible_text(content) -%} + {%- if content is string -%} + {{- content }} + {%- elif content is iterable and content is not mapping -%} + {%- for item in content -%} + {%- if item is mapping and item.type == 'text' -%} + {{- item.text }} + {%- elif item is string -%} + {{- item }} + {%- endif -%} + {%- endfor -%} + {%- else -%} + {{- content }} + {%- endif -%} +{%- endmacro -%} +{%- set ns = namespace(last_user_index=-1) %} +{%- for m in messages %} + {%- if m.role == 'user' %} + {% set ns.last_user_index = loop.index0 -%} + {%- endif %} +{%- endfor %} +{% for m in messages %} +{%- if m.role == 'user' -%}<|user|>{{ visible_text(m.content) }} +{%- elif m.role == 'assistant' -%} +<|assistant|> +{%- set reasoning_content = '' %} +{%- set content = visible_text(m.content) %} +{%- if m.reasoning_content is string %} + {%- set reasoning_content = m.reasoning_content %} +{%- else %} + {%- if '' in content %} + {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- set content = content.split('')[-1].lstrip('\n') %} + {%- endif %} +{%- endif %} +{%- if ((clear_thinking is defined and not clear_thinking) or loop.index0 > ns.last_user_index) and reasoning_content -%} +{{ '' + reasoning_content.strip() + ''}} +{%- else -%} +{{ '' }} +{%- endif -%} +{%- if content.strip() -%} +{{ content.strip() }} +{%- endif -%} +{% if m.tool_calls %} +{% for tc in m.tool_calls %} +{%- if tc.function %} + {%- set tc = tc.function %} +{%- endif %} +{{- '' + tc.name -}} +{% set _args = tc.arguments %}{% for k, v in _args.items() %}{{ k }}{{ v | tojson(ensure_ascii=False) if v is not string else v }}{% endfor %}{% endfor %} +{% endif %} +{%- elif m.role == 'tool' -%} +{%- if m.content is string -%} +{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|observation|>' }} +{%- endif %} +{{- '' }} +{{- m.content }} +{{- '' }} +{%- else -%} +<|observation|>{% for tr in m.content %} +{{ tr.output if tr.output is defined else tr }}{% endfor -%} +{% endif -%} +{%- elif m.role == 'system' -%} +<|system|>{{ visible_text(m.content) }} +{%- endif -%} +{%- endfor -%} +{%- if add_generation_prompt -%} + <|assistant|>{{- '' if (enable_thinking is defined and not enable_thinking) else '' -}} +{%- endif -%} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..68b5a1596e61ef956c5276d7bfeeb7f622001fb4 --- /dev/null +++ b/config.json @@ -0,0 +1,3585 @@ +{ + "architectures": [ + "GlmMoeDsaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 0, + "dtype": "bfloat16", + "eos_token_id": [ + 154820, + 154827, + 154829 + ], + "ep_size": 1, + "first_k_dense_replace": 3, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 6144, + "index_head_dim": 128, + "index_n_heads": 32, + "index_topk": 2048, + "indexer_rope_interleave": true, + "initializer_range": 0.02, + "intermediate_size": 12288, + "kv_lora_rank": 512, + "max_position_embeddings": 202752, + "mlp_layer_types": [ + "dense", + "dense", + "dense", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse" + ], + "model_type": "glm_moe_dsa", + "moe_intermediate_size": 2048, + "moe_layer_freq": 1, + "n_group": 1, + "n_routed_experts": 256, + "n_shared_experts": 1, + "norm_topk_prob": true, + "num_attention_heads": 64, + "num_experts_per_tok": 8, + "num_hidden_layers": 78, + "num_key_value_heads": 64, + "num_nextn_predict_layers": 1, + "pad_token_id": 154820, + "pretraining_tp": 1, + "q_lora_rank": 2048, + "qk_head_dim": 256, + "qk_nope_head_dim": 192, + "qk_rope_head_dim": 64, + "quantization_config": { + "autoround_version": "0.12.0", + "bits": 4, + "data_type": "int", + "extra_config": { + "model.layers.0.mlp.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.0.mlp.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.0.mlp.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.0.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.0.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.0.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.0.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.0.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.0.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.0.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.0.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.1.mlp.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.1.mlp.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.1.mlp.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.1.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.1.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.1.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.1.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.1.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.1.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.1.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.1.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.10.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.10.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.10.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.10.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.10.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.10.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.10.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.10.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.10.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.10.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.10.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.11.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.11.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.11.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.11.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.11.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.11.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.11.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.11.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.11.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.11.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.11.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.12.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.12.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.12.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.12.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.12.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.12.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.12.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.12.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.12.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.12.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.12.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.13.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.13.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.13.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.13.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.13.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.13.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.13.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.13.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.13.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.13.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.13.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.14.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.14.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.14.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.14.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.14.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.14.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.14.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.14.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.14.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.14.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.14.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.15.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.15.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.15.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.15.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.15.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.15.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.15.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.15.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.15.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.15.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.15.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.16.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.16.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.16.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.16.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.16.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.16.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.16.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.16.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.16.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.16.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.16.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.17.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.17.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.17.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.17.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.17.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.17.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.17.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.17.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.17.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.17.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.17.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.18.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.18.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.18.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.18.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.18.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.18.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.18.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.18.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.18.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.18.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.18.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.19.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.19.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.19.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.19.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.19.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.19.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.19.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.19.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.19.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.19.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.19.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.2.mlp.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.2.mlp.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.2.mlp.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.2.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.2.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.2.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.2.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.2.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.2.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.2.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.2.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.20.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.20.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.20.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.20.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.20.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.20.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.20.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.20.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.20.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.20.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.20.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.21.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.21.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.21.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.21.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.21.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.21.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.21.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.21.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.21.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.21.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.21.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.22.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.22.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.22.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.22.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.22.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.22.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.22.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.22.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.22.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.22.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.22.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.23.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.23.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.23.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.23.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.23.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.23.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.23.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.23.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.23.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.23.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.23.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.24.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.24.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.24.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.24.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.24.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.24.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.24.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.24.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.24.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.24.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.24.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.25.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.25.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.25.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.25.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.25.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.25.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.25.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.25.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.25.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.25.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.25.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.26.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.26.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.26.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.26.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.26.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.26.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.26.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.26.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.26.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.26.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.26.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.27.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.27.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.27.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.27.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.27.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.27.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.27.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.27.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.27.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.27.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.27.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.28.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.28.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.28.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.28.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.28.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.28.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.28.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.28.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.28.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.28.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.28.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.29.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.29.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.29.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.29.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.29.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.29.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.29.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.29.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.29.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.29.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.29.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.3.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.3.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.3.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.3.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.3.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.3.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.3.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.3.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.3.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.3.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.3.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.30.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.30.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.30.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.30.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.30.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.30.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.30.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.30.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.30.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.30.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.30.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.31.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.31.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.31.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.31.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.31.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.31.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.31.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.31.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.31.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.31.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.31.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.32.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.32.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.32.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.32.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.32.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.32.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.32.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.32.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.32.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.32.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.32.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.33.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.33.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.33.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.33.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.33.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.33.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.33.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.33.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.33.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.33.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.33.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.34.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.34.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.34.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.34.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.34.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.34.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.34.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.34.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.34.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.34.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.34.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.35.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.35.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.35.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.35.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.35.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.35.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.35.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.35.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.35.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.35.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.35.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.36.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.36.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.36.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.36.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.36.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.36.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.36.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.36.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.36.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.36.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.36.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.37.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.37.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.37.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.37.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.37.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.37.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.37.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.37.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.37.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.37.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.37.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.38.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.38.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.38.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.38.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.38.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.38.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.38.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.38.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.38.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.38.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.38.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.39.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.39.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.39.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.39.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.39.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.39.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.39.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.39.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.39.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.39.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.39.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.4.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.4.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.4.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.4.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.4.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.4.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.4.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.4.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.4.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.4.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.4.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.40.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.40.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.40.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.40.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.40.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.40.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.40.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.40.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.40.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.40.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.40.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.41.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.41.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.41.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.41.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.41.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.41.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.41.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.41.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.41.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.41.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.41.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.42.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.42.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.42.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.42.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.42.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.42.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.42.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.42.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.42.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.42.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.42.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.43.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.43.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.43.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.43.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.43.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.43.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.43.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.43.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.43.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.43.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.43.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.44.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.44.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.44.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.44.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.44.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.44.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.44.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.44.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.44.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.44.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.44.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.45.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.45.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.45.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.45.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.45.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.45.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.45.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.45.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.45.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.45.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.45.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.46.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.46.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.46.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.46.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.46.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.46.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.46.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.46.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.46.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.46.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.46.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.47.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.47.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.47.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.47.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.47.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.47.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.47.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.47.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.47.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.47.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.47.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.48.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.48.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.48.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.48.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.48.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.48.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.48.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.48.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.48.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.48.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.48.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.49.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.49.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.49.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.49.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.49.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.49.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.49.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.49.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.49.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.49.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.49.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.5.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.5.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.5.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.5.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.5.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.5.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.5.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.5.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.5.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.5.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.5.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.50.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.50.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.50.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.50.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.50.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.50.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.50.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.50.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.50.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.50.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.50.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.51.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.51.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.51.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.51.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.51.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.51.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.51.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.51.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.51.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.51.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.51.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.52.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.52.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.52.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.52.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.52.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.52.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.52.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.52.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.52.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.52.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.52.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.53.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.53.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.53.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.53.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.53.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.53.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.53.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.53.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.53.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.53.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.53.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.54.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.54.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.54.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.54.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.54.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.54.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.54.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.54.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.54.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.54.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.54.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.55.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.55.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.55.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.55.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.55.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.55.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.55.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.55.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.55.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.55.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.55.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.56.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.56.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.56.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.56.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.56.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.56.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.56.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.56.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.56.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.56.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.56.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.57.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.57.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.57.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.57.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.57.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.57.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.57.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.57.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.57.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.57.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.57.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.58.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.58.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.58.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.58.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.58.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.58.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.58.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.58.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.58.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.58.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.58.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.59.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.59.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.59.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.59.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.59.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.59.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.59.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.59.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.59.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.59.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.59.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.6.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.6.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.6.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.6.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.6.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.6.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.6.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.6.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.6.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.6.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.6.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.60.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.60.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.60.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.60.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.60.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.60.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.60.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.60.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.60.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.60.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.60.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.61.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.61.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.61.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.61.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.61.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.61.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.61.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.61.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.61.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.61.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.61.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.62.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.62.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.62.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.62.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.62.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.62.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.62.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.62.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.62.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.62.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.62.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.63.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.63.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.63.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.63.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.63.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.63.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.63.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.63.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.63.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.63.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.63.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.64.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.64.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.64.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.64.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.64.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.64.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.64.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.64.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.64.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.64.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.64.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.65.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.65.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.65.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.65.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.65.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.65.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.65.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.65.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.65.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.65.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.65.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.66.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.66.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.66.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.66.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.66.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.66.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.66.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.66.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.66.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.66.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.66.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.67.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.67.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.67.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.67.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.67.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.67.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.67.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.67.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.67.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.67.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.67.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.68.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.68.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.68.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.68.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.68.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.68.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.68.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.68.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.68.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.68.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.68.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.69.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.69.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.69.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.69.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.69.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.69.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.69.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.69.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.69.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.69.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.69.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.7.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.7.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.7.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.7.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.7.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.7.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.7.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.7.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.7.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.7.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.7.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.70.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.70.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.70.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.70.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.70.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.70.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.70.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.70.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.70.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.70.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.70.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.71.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.71.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.71.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.71.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.71.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.71.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.71.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.71.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.71.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.71.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.71.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.72.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.72.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.72.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.72.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.72.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.72.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.72.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.72.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.72.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.72.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.72.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.73.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.73.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.73.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.73.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.73.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.73.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.73.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.73.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.73.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.73.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.73.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.74.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.74.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.74.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.74.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.74.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.74.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.74.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.74.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.74.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.74.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.74.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.75.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.75.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.75.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.75.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.75.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.75.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.75.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.75.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.75.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.75.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.75.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.76.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.76.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.76.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.76.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.76.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.76.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.76.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.76.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.76.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.76.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.76.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.77.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.77.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.77.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.77.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.77.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.77.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.77.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.77.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.77.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.77.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.77.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.8.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.8.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.8.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.8.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.8.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.8.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.8.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.8.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.8.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.8.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.8.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.9.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.9.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.9.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.9.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.9.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.9.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.9.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.9.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.9.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.9.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.9.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + } + }, + "group_size": 128, + "iters": 0, + "packing_format": "auto_round:auto_awq", + "quant_method": "auto-round", + "sym": false + }, + "rms_norm_eps": 1e-05, + "rope_interleave": true, + "rope_parameters": { + "rope_theta": 1000000, + "rope_type": "default" + }, + "routed_scaling_factor": 2.5, + "scoring_func": "sigmoid", + "tie_word_embeddings": false, + "topk_group": 1, + "topk_method": "noaux_tc", + "transformers_version": "5.2.0", + "use_cache": true, + "v_head_dim": 256, + "vocab_size": 154880, + "torch_dtype": "float16" +} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..cdf918d86b3f8523b2c90510aee4205bc5cdac35 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,13 @@ +{ + "_from_model_config": true, + "do_sample": true, + "eos_token_id": [ + 154820, + 154827, + 154829 + ], + "pad_token_id": 154820, + "temperature": 1.0, + "top_p": 0.95, + "transformers_version": "5.2.0" +} diff --git a/model-00001-of-00079.safetensors b/model-00001-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2909a02fd867591fa130a2499cdc5e622881375e --- /dev/null +++ b/model-00001-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d503276ff6d45d1ac8531afe8941bd603583f4ae71f2f50dadacbeee6d52302a +size 5362682984 diff --git a/model-00002-of-00079.safetensors b/model-00002-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..82c9f21023e489b6b2fb2a51b6b318409ffc1725 --- /dev/null +++ b/model-00002-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0f9e21e411b68579fc57eab21aeac9232f42a10f9c60886d44a0ff75bb80c2a +size 5363333544 diff --git a/model-00003-of-00079.safetensors b/model-00003-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..94b0ff2abdc4e866d894055e0e1fb84cc0d74daa --- /dev/null +++ b/model-00003-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3acf2210bc30804c39fd742445fbbfda9cb71712065d0d1b1edd96dd4780cc65 +size 5363333552 diff --git a/model-00004-of-00079.safetensors b/model-00004-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f02627a06bf640231759ffdb4ae39395df235884 --- /dev/null +++ b/model-00004-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79acea19f77023040e064267ddddf6f91ba91e5698ff5a4fddeb6341bca0bb73 +size 5363333544 diff --git a/model-00005-of-00079.safetensors b/model-00005-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..693ec257e7ff5fd6474ff0353705864fa5a0df2c --- /dev/null +++ b/model-00005-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb4b5b3e5908816f9c5e68c7f775ffa41c1e2382e3df9c8b3e7ad9ba14903f98 +size 5363333544 diff --git a/model-00006-of-00079.safetensors b/model-00006-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a49c31e6eaf61c40f5c8faf9d4ab3f623cadc66e --- /dev/null +++ b/model-00006-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da3c0714c028e9f39d7f514608595c3b954ddc434ad68428a5f598bcbdaa566d +size 5363333552 diff --git a/model-00007-of-00079.safetensors b/model-00007-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a3af9731b33ca6dc00089323f6a15c11ea6e05cf --- /dev/null +++ b/model-00007-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e1c0e2097d05f6f1ad986b83c8e311f1be4161532a7d31a0eb5c1d89f271093 +size 5363333544 diff --git a/model-00008-of-00079.safetensors b/model-00008-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9805faa0489cd862f0b52308a50ccad4fadf0de0 --- /dev/null +++ b/model-00008-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52dafb35466732440b3d9fdb9b370fea4453a800f7bb72d7d8e8b857897ddf5f +size 5363334480 diff --git a/model-00009-of-00079.safetensors b/model-00009-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..11eb074df5ce37b153d84c72c593c41070658c66 --- /dev/null +++ b/model-00009-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6c3c927beee319a7eab7dc8567b40304463dfd70e507dc6ea9f2885c8bbc531 +size 5363335848 diff --git a/model-00010-of-00079.safetensors b/model-00010-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dc5235b0d132bb3e2d36f34092353a6ee4f15c33 --- /dev/null +++ b/model-00010-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:843071249d02eb8d34cd92e2938c957b3c7008aee9211dc8848ccb3ee1440796 +size 5363335872 diff --git a/model-00011-of-00079.safetensors b/model-00011-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ed16923741bebd2d1cefc1bae9cf07ebc7efd802 --- /dev/null +++ b/model-00011-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b5d4a34fb477772a7b4da781b04c1faeacc94ae69a1158bc50fdd90188b2333 +size 5363335872 diff --git a/model-00012-of-00079.safetensors b/model-00012-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f92874189f609345e254600a09693074f12898d0 --- /dev/null +++ b/model-00012-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:685b0f533f070a82aa90870916b260d54ba7f1e2d6fc9d9dc67409890f658091 +size 5363335872 diff --git a/model-00013-of-00079.safetensors b/model-00013-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0fac977f997a0c494c7a33f3c2c8b3a134778faf --- /dev/null +++ b/model-00013-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f141d107a01d8cd0d62fa08d35bb8515970807519752c49ac776447963e3a7a +size 5363335872 diff --git a/model-00014-of-00079.safetensors b/model-00014-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6220ef33652a12c92c2ebf8269cce192681c1251 --- /dev/null +++ b/model-00014-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ebf2da295ac32ffdcf56c8afc378725544618a9f605697b6bdbf3d88dae59a1 +size 5363335872 diff --git a/model-00015-of-00079.safetensors b/model-00015-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0eed4a74c3ecffffba8a2134bb716c9ce9568979 --- /dev/null +++ b/model-00015-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c538482066a5c29b9a8bd4a86dc1aceba947af224f69010178416e77dfc3c7cf +size 5363335872 diff --git a/model-00016-of-00079.safetensors b/model-00016-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fe2916522b6a62638883f4866001e5a33cba59f6 --- /dev/null +++ b/model-00016-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dfbd2021d2465853a2c70d4a3b2d1b6326ed9f9d0160d929726b2bf490045b0 +size 5363335872 diff --git a/model-00017-of-00079.safetensors b/model-00017-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..80d358429ad9dc5a58066759b3752e2b4adfe60f --- /dev/null +++ b/model-00017-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f398f5d436bea896c5182a2fe5cd0b8b524a839f7d46ce7bcf0c04b9ea2bb91a +size 5363335872 diff --git a/model-00018-of-00079.safetensors b/model-00018-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b1569e3ff4d694e2d6447dc0dd2f76d3b65ab7f0 --- /dev/null +++ b/model-00018-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:972ef16e857b0fb9037488496f16e84880f087b5a2e771ff73ea8d476899edc2 +size 5363335872 diff --git a/model-00019-of-00079.safetensors b/model-00019-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..529f0333d04e2fa68a7cd8eef6e725606bc5944e --- /dev/null +++ b/model-00019-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:553e25b68fb5530b9e317849f94bc4a8e67a4be783e669827b6683c46bf87259 +size 5363335872 diff --git a/model-00020-of-00079.safetensors b/model-00020-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4df4a32b57d00473e4d9ffa3d1767217fbe1a608 --- /dev/null +++ b/model-00020-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f1b7e12a081b9d5839570a87c4efa465aa68035e0942f3132243c270dca862f +size 5363335872 diff --git a/model-00021-of-00079.safetensors b/model-00021-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7a419d2d079dfd01302a214dffc478591a69c8e2 --- /dev/null +++ b/model-00021-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8f60d7db12e5ec54e57542878d43287be1c25eab707179d7eb78b4c7e34a230 +size 5363335872 diff --git a/model-00022-of-00079.safetensors b/model-00022-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..86de5ff10442533ce98a39f6018ced70ebc38d6b --- /dev/null +++ b/model-00022-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74800e0df90e9e5b1612cfea4bf2458881dcd9048670ad8eff840f32a76b3ea7 +size 5363335872 diff --git a/model-00023-of-00079.safetensors b/model-00023-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7b7be724b532edf599944d68aa4e49ce4af60a69 --- /dev/null +++ b/model-00023-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:becfc4f008454aded18e0f84615d7d5ccb07a4f988c086a66ea4839e054ffad1 +size 5363335872 diff --git a/model-00024-of-00079.safetensors b/model-00024-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6e0bb1ed300d043d4603a2e0579f7ae6b4d92f06 --- /dev/null +++ b/model-00024-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fc9b3b53e75bc9a9c2844f56d48af93105eb19827830cd7156cf52530dc435c +size 5363335872 diff --git a/model-00025-of-00079.safetensors b/model-00025-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b84b3e6f217266140211bc67f0f69da63148ff13 --- /dev/null +++ b/model-00025-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd2947bc54e44885dc3e86ec8c58be75ea8f8c3efe17ebf3b08053eff47f68bd +size 5363335872 diff --git a/model-00026-of-00079.safetensors b/model-00026-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a6f47252aa3305accbeb289b8cd6c5bb22645dd8 --- /dev/null +++ b/model-00026-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57c4f39fd0dca044c3f22b3e6e7fac1796cde376c97cfa3eb26cc677dcd4db8f +size 5363335872 diff --git a/model-00027-of-00079.safetensors b/model-00027-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3309560c92c6474e264324c16a9f821d3030d676 --- /dev/null +++ b/model-00027-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a47968e296b6e64f246341c443a93562f5469b252389139d8fd4f88c2caf9931 +size 5363335872 diff --git a/model-00028-of-00079.safetensors b/model-00028-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..84198abab0bb8c52016e394409d55f7121acf50a --- /dev/null +++ b/model-00028-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74585a47bd7eab51b57c5198b98b84de22aed72170c50ae58dbed845771c16d6 +size 5363335872 diff --git a/model-00029-of-00079.safetensors b/model-00029-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..af02d28f3cc62bef126d2227161691f2cf30cacc --- /dev/null +++ b/model-00029-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbf7738171dc771cd84ffa7b2b5b9eb72381da467c4405d051328eaedc1cead7 +size 5363335872 diff --git a/model-00030-of-00079.safetensors b/model-00030-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0e91cb487d198c11f9493f7ab8c90950ce06a9b0 --- /dev/null +++ b/model-00030-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88a678110a9a68ae24c2db00b8d41c44bfb0f50c7540b4361b65d2aa33396c19 +size 5363335896 diff --git a/model-00031-of-00079.safetensors b/model-00031-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..91f684514589a21e63c003fc31520daf1dab5762 --- /dev/null +++ b/model-00031-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13766f9d11c10b514e97340105aa5ba216860af30e9e7fbd4f87997cc66a8241 +size 5363335904 diff --git a/model-00032-of-00079.safetensors b/model-00032-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c608c023528ca8478073f72d8f75154b74061549 --- /dev/null +++ b/model-00032-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75794c04beaa7366dc3a0e6bdaaf21a6e14230bee288a6c4853b407b48dab14f +size 5169415104 diff --git a/model-00033-of-00079.safetensors b/model-00033-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6a9a207dd7ff43eba3f8b1d0547c59efe22c9c8b --- /dev/null +++ b/model-00033-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f94835d83bb3b299947a9d79e89190fba42194ff3e3fba3b629818edf04e306 +size 5344776064 diff --git a/model-00034-of-00079.safetensors b/model-00034-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e6bc50682a63d593183493e1179083a1d5cb933c --- /dev/null +++ b/model-00034-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b81d0c15834b3f77ade1b3cb122605d4a6f6ad2882918fa52c3661687193bf24 +size 5347631632 diff --git a/model-00035-of-00079.safetensors b/model-00035-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5dcb138781e73e0aad88c14a401531744a98e5c5 --- /dev/null +++ b/model-00035-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:815f6e9c838b9fd8d30f802916d1d9550362cd9c9ddde1b024a8ef590be3f3a5 +size 5366726448 diff --git a/model-00036-of-00079.safetensors b/model-00036-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..820eeb3427a41c9a3ca58d22c54982947719fe98 --- /dev/null +++ b/model-00036-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:540b75cac855ef92244eb4c0a1dab40b3db569434ef230a170bc1b4107bf3aa5 +size 5363335832 diff --git a/model-00037-of-00079.safetensors b/model-00037-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..98171c5110d393aad5c96a49799af49d6744e8fe --- /dev/null +++ b/model-00037-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a2c865903988d31975bcf7719f7302860c4dbb1a71055e48e0c00c7d7492ae9 +size 5363335832 diff --git a/model-00038-of-00079.safetensors b/model-00038-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..143d72fede97a9b8764c003b6d6034e6412b9c3c --- /dev/null +++ b/model-00038-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0cbf81be335db6b9c717c8c1d9950b5d0a32ca34722b627c31ce7e1f4c93ab1 +size 5363335832 diff --git a/model-00039-of-00079.safetensors b/model-00039-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c6f9a11870e055d455363dc97708ca95b3bd478c --- /dev/null +++ b/model-00039-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0d103944e19c6f3ef7a260f2c496ab93f55213f57172841424f7f5a6af39aa0 +size 5363335832 diff --git a/model-00040-of-00079.safetensors b/model-00040-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..11e9dd82cbae1b2b96bb2a760eca1799616ef1fa --- /dev/null +++ b/model-00040-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3e3049f45b92ae8e5ac35d42bf5b47c04aa158494f236feb38b3aea6699f197 +size 5363335832 diff --git a/model-00041-of-00079.safetensors b/model-00041-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..de4a5af086b1675ca292e15ebd13a378c7cd7c7c --- /dev/null +++ b/model-00041-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f2285c0659e65fb9b193be221fe33a164a8fbf990c1bbe9d7bb1c25593d8664 +size 5363335832 diff --git a/model-00042-of-00079.safetensors b/model-00042-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..670b8c14e8fd560726b42863c781b05cd945de3d --- /dev/null +++ b/model-00042-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fa312393d0185010b765acc2eedb76bcd6a1870cd50d96f7b5dc064fada8d48 +size 5363335832 diff --git a/model-00043-of-00079.safetensors b/model-00043-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..57b08db7dd522fb80371ac72a182b27a6e7ea2b1 --- /dev/null +++ b/model-00043-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d97009205ecc46145d9830bc3ec87b48e6e30744054e02fd7fdf7c142863765f +size 5363335832 diff --git a/model-00044-of-00079.safetensors b/model-00044-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bfdb32aa490de43ea9594f28e6a8341254233dee --- /dev/null +++ b/model-00044-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2340ef72dfcd971cc857ecb63c9e4d35cf245eecac8dc33cc7e19fe1cf8f6c2 +size 5363335832 diff --git a/model-00045-of-00079.safetensors b/model-00045-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4aa42c908cd77e140e1731063eb1f2deb87e4fba --- /dev/null +++ b/model-00045-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc60f161ef88d35022d151fd9dc08257306e2cf597ef00fc97d80bf17532282f +size 5363335832 diff --git a/model-00046-of-00079.safetensors b/model-00046-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d813eb6c5bc92a4ebb657c7d9f06e8667b01be57 --- /dev/null +++ b/model-00046-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa875dfb566ad4cdbe9baf2871b3fe9173c0b6b27a33e7acba33cb1601b395ee +size 5363335832 diff --git a/model-00047-of-00079.safetensors b/model-00047-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..08eb2da479f750ebcd4ffde3fbd09d8860a170ea --- /dev/null +++ b/model-00047-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c76ec0387c0852b2dc7a375dbea36df87ec8c84ef1555666b6749029147c2f20 +size 5363335832 diff --git a/model-00048-of-00079.safetensors b/model-00048-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..72aef11110f3739443e943159a87e0412b0fed03 --- /dev/null +++ b/model-00048-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58e23a1f1b4a39498224c708d80ee2391c37b5225ebd9abcfe207ec0b2c3402b +size 5363335832 diff --git a/model-00049-of-00079.safetensors b/model-00049-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..63fa0f47e39d312e6a77fc5ae4416b8bbef3e07e --- /dev/null +++ b/model-00049-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:573286357eab57d595d694f55922babb05cd1a23de1766fdf1580f69d945f1d1 +size 5363335832 diff --git a/model-00050-of-00079.safetensors b/model-00050-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..199b25c12a1902b6c997a4a664eb1574c731cd0c --- /dev/null +++ b/model-00050-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2b8d70710387d15a8377a18fadf994d4ef1ce12c63d3eec74261ddf099f3ebd +size 5363335832 diff --git a/model-00051-of-00079.safetensors b/model-00051-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ba731ea957586787a7e395d866e3a4b25a4c7d83 --- /dev/null +++ b/model-00051-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6c1aa62f1fcf9a4b39bd8c0402885147d95e2ffec669e8de9c3b5373a244f46 +size 5363335832 diff --git a/model-00052-of-00079.safetensors b/model-00052-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e8837d92e2321190604990220b241857e5c88858 --- /dev/null +++ b/model-00052-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fc72416a1da799849f2831c1e5b7ccbbc96fd2f4c9b881eff9a2bc9c2755487 +size 5363335832 diff --git a/model-00053-of-00079.safetensors b/model-00053-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fb795940f45a438d7120e6e9a6a43325e12108dd --- /dev/null +++ b/model-00053-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:629799f55e5a246fd83ee979d1aa6d42470fac7d3ac4c05d2be9e24227d8cdd6 +size 5363335832 diff --git a/model-00054-of-00079.safetensors b/model-00054-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..538b01f3618de91674725942c29c709ab40abbc7 --- /dev/null +++ b/model-00054-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b5e68d92da8298886a461201ace4b40bcb19e06b1d67748cfab6cab87dc3bb1 +size 5363335832 diff --git a/model-00055-of-00079.safetensors b/model-00055-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7949c297112734b30da0cdf3a8a7945b16fa4450 --- /dev/null +++ b/model-00055-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:008e13fd3fc3f8d1d51d846a74793399e934994a9c2bdc2a4700126ee871bf59 +size 5363335832 diff --git a/model-00056-of-00079.safetensors b/model-00056-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a82a30106b0f41f83f629cd032a4fe7c9627a5c1 --- /dev/null +++ b/model-00056-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb0513e5d997ad4ebf0c8c8443813d7cf287548a7d4b182193cbcf451fe67cce +size 5363335832 diff --git a/model-00057-of-00079.safetensors b/model-00057-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5ae10fe98f0508deb2b824ad59a4c77e42456db2 --- /dev/null +++ b/model-00057-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41ff7bb6f8b268cdb9f8158bfb636dba79c7f53d9a21313d93794ad0a715d10a +size 5363335832 diff --git a/model-00058-of-00079.safetensors b/model-00058-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..112b62b148f1cd3f59608b0cd4f08028bef9655f --- /dev/null +++ b/model-00058-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2d3ee336ac00b8ee084b6884ef7ee6752a593ec36ab179536cf56148c705666 +size 5363335832 diff --git a/model-00059-of-00079.safetensors b/model-00059-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0f188cd2c4ba31cbf320086616ca2d56172f59a0 --- /dev/null +++ b/model-00059-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:822991d8c5739c657c6e4a994529c9d897aad41adcf876f39250dd1474e4fdb7 +size 5363335832 diff --git a/model-00060-of-00079.safetensors b/model-00060-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2133f01051e3fc3818991e53d0c5f5f9e6ea3c14 --- /dev/null +++ b/model-00060-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e27cc957e045383aaa42703f5f6e6c685ad5cc0605a1845fc5e03a831c822cd +size 5363335832 diff --git a/model-00061-of-00079.safetensors b/model-00061-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b34b4430967a1456de894c99415329f832fda057 --- /dev/null +++ b/model-00061-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:927909605b11fa69318ff979a82ce817d0bce0106dea75ae82c91b2b1bfdf765 +size 5363335832 diff --git a/model-00062-of-00079.safetensors b/model-00062-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..30aa379a8d1c98f1531e2c16a60e7c0db90207a4 --- /dev/null +++ b/model-00062-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e1b281017bc5213421372d856cee459707877f2894f94ba701b7b4a393f757f +size 5363335832 diff --git a/model-00063-of-00079.safetensors b/model-00063-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..52b7104f3751d6ac5d48d9fa251210717db69540 --- /dev/null +++ b/model-00063-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c64eb0932f8a201093cd68db632a4fe5265ecdefecb1b7a56d4a1535f9f7334 +size 5363335832 diff --git a/model-00064-of-00079.safetensors b/model-00064-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2689784e4486b776b17074f8613d27e7029aa856 --- /dev/null +++ b/model-00064-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08ae0a0eccf47cc4bebf3b491c6686c631fc14bbb9829cd24e780aeff1827707 +size 5363335832 diff --git a/model-00065-of-00079.safetensors b/model-00065-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9a0b7c423c1a6e9f471d862df18463f462124fd9 --- /dev/null +++ b/model-00065-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19a5d72aa96b0e803f338235533c954b37d44505507610e6e07b0305c3da0422 +size 5363335832 diff --git a/model-00066-of-00079.safetensors b/model-00066-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fa785b173ec02facbfc5fdbc6b86f8744a878a71 --- /dev/null +++ b/model-00066-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcc89b6dd1260fdb04c2c66a0127f9c7b26af70c5f554c96def4a222579881cf +size 5363335832 diff --git a/model-00067-of-00079.safetensors b/model-00067-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..05d881bae7ce3e591ed2d4969b5a7e9fced14912 --- /dev/null +++ b/model-00067-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:311cf6074c9405f27830f30cd570b0f6ad4763573c0a85f96d1a494733e17b02 +size 5363335832 diff --git a/model-00068-of-00079.safetensors b/model-00068-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cab057e4c49a1ac9ec18cadb7e1562454b30cf2c --- /dev/null +++ b/model-00068-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4614af398ee7b271edd7555b6b6822307723009a2a3b0f401fac21383b528aab +size 5363335832 diff --git a/model-00069-of-00079.safetensors b/model-00069-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..998fe1f4b06cb9ee3e960d5032750622416b7cd8 --- /dev/null +++ b/model-00069-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:716cfd5143e99d6be27d9d83e63d2c2ae77d1d2ab55e4b929867b0799fabb3c9 +size 5363335832 diff --git a/model-00070-of-00079.safetensors b/model-00070-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9e45258459e04b92a79d9dd0b05016e182c68e40 --- /dev/null +++ b/model-00070-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c1d966a60cc163cf16b9e424d3c23de6202fff135307470e98faa7ba0c47ba9 +size 5363335832 diff --git a/model-00071-of-00079.safetensors b/model-00071-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..658f3450d32ff8d5f0901af7bfb8552647b17578 --- /dev/null +++ b/model-00071-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf03d5355bb3fee4519f54c0c8b48cf0fad7fb0de0d4ef04af359751fe7879c6 +size 5363335864 diff --git a/model-00072-of-00079.safetensors b/model-00072-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0f9674041ab95c204e0f8263a419b0e53c40fbb6 --- /dev/null +++ b/model-00072-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e5100d349acb5d2c663091453234ce9225ec85940bbf9aac54c272775c30da7 +size 5363335872 diff --git a/model-00073-of-00079.safetensors b/model-00073-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..527b2cc4db9c882159dc6c8f544df7be5f919be5 --- /dev/null +++ b/model-00073-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9945e30fb16c17fa3c36f0d6a9e2cadda9b543238a6b18c90f08b37a456748a +size 5363335872 diff --git a/model-00074-of-00079.safetensors b/model-00074-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a1a7c4884f7ab48668c6824a3c126c62317d4ba5 --- /dev/null +++ b/model-00074-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a5d6be0063c9323868381ea12e1d912b1deb98d1a1ccee44f7626177e5e997f +size 5363335872 diff --git a/model-00075-of-00079.safetensors b/model-00075-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8c2f211e9101124dafb70c92c369f60792b0f8d6 --- /dev/null +++ b/model-00075-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a3a9edd525d58a700380409a705469d646bacff44ab87f1b040581d97b2a872 +size 5363335872 diff --git a/model-00076-of-00079.safetensors b/model-00076-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1ab7f5babe0f3db35d83487919dd0fbee23d09b3 --- /dev/null +++ b/model-00076-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6f538c0a8f6bca85cc68b5cbbd1879b1d0fe6f78056f0c927e0021edf498dd6 +size 5363335872 diff --git a/model-00077-of-00079.safetensors b/model-00077-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c0abbf1caf61ccb7e226a545651ddb054c57fd56 --- /dev/null +++ b/model-00077-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1acd7d38524f44e1848e472f2c83933a6948a96c29f69ab23db2dc5015fdbad +size 3641666008 diff --git a/model-00078-of-00079.safetensors b/model-00078-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1360985dea77c90ddb6edd35324c0f4dca1ff6fc --- /dev/null +++ b/model-00078-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a11fcb0f6a3d7e6f4f325e3b4748014f3426002cfb1fc20137654e2714dd68e +size 3806331088 diff --git a/model-00079-of-00079.safetensors b/model-00079-of-00079.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..94726926917079c71b92e21b21b3141d84676383 --- /dev/null +++ b/model-00079-of-00079.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cceb4574f0a0fea6f1cf84cbf6e993d84f363fe03c0fdbee6ff82ff11c36385f +size 1903165536 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..a76a6954e77f2ad3ad8c1154fcfd47d62e8088d7 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfbe5dba22a639e0c7706f7e076a206ae3c1121cc184a9e3fcf8dfa148476f74 +size 15881055 diff --git a/quantization_config.json b/quantization_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5d7fc0103e5c2184564d0201f532d10d763fbba0 --- /dev/null +++ b/quantization_config.json @@ -0,0 +1,3444 @@ +{ + "bits": 4, + "data_type": "int", + "group_size": 128, + "sym": false, + "iters": 0, + "autoround_version": "0.12.0", + "quant_method": "auto-round", + "packing_format": "auto_round:auto_awq", + "extra_config": { + "model.layers.0.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.0.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.0.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.0.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.0.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.0.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.0.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.0.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.0.mlp.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.0.mlp.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.0.mlp.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.1.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.1.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.1.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.1.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.1.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.1.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.1.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.1.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.1.mlp.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.1.mlp.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.1.mlp.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.2.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.2.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.2.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.2.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.2.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.2.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.2.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.2.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.2.mlp.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.2.mlp.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.2.mlp.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.3.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.3.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.3.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.3.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.3.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.3.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.3.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.3.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.4.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.4.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.4.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.4.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.4.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.4.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.4.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.4.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.5.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.5.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.5.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.5.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.5.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.5.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.5.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.5.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.6.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.6.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.6.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.6.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.6.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.6.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.6.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.6.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.7.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.7.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.7.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.7.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.7.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.7.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.7.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.7.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.8.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.8.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.8.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.8.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.8.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.8.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.8.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.8.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.9.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.9.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.9.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.9.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.9.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.9.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.9.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.9.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.10.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.10.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.10.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.10.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.10.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.10.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.10.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.10.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.11.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.11.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.11.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.11.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.11.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.11.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.11.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.11.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.12.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.12.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.12.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.12.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.12.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.12.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.12.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.12.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.13.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.13.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.13.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.13.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.13.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.13.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.13.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.13.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.14.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.14.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.14.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.14.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.14.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.14.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.14.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.14.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.15.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.15.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.15.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.15.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.15.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.15.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.15.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.15.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.16.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.16.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.16.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.16.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.16.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.16.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.16.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.16.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.17.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.17.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.17.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.17.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.17.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.17.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.17.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.17.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.18.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.18.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.18.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.18.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.18.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.18.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.18.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.18.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.19.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.19.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.19.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.19.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.19.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.19.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.19.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.19.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.20.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.20.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.20.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.20.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.20.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.20.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.20.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.20.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.21.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.21.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.21.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.21.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.21.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.21.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.21.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.21.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.22.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.22.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.22.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.22.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.22.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.22.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.22.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.22.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.23.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.23.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.23.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.23.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.23.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.23.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.23.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.23.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.24.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.24.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.24.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.24.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.24.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.24.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.24.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.24.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.25.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.25.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.25.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.25.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.25.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.25.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.25.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.25.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.26.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.26.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.26.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.26.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.26.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.26.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.26.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.26.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.27.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.27.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.27.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.27.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.27.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.27.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.27.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.27.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.28.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.28.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.28.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.28.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.28.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.28.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.28.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.28.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.29.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.29.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.29.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.29.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.29.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.29.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.29.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.29.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.30.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.30.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.30.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.30.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.30.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.30.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.30.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.30.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.31.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.31.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.31.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.31.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.31.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.31.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.31.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.31.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.32.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.32.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.32.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.32.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.32.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.32.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.32.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.32.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.33.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.33.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.33.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.33.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.33.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.33.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.33.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.33.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.34.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.34.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.34.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.34.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.34.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.34.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.34.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.34.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.35.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.35.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.35.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.35.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.35.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.35.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.35.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.35.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.36.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.36.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.36.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.36.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.36.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.36.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.36.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.36.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.37.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.37.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.37.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.37.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.37.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.37.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.37.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.37.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.38.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.38.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.38.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.38.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.38.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.38.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.38.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.38.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.39.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.39.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.39.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.39.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.39.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.39.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.39.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.39.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.40.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.40.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.40.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.40.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.40.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.40.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.40.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.40.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.41.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.41.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.41.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.41.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.41.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.41.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.41.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.41.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.42.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.42.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.42.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.42.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.42.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.42.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.42.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.42.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.43.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.43.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.43.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.43.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.43.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.43.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.43.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.43.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.44.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.44.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.44.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.44.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.44.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.44.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.44.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.44.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.45.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.45.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.45.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.45.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.45.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.45.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.45.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.45.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.46.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.46.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.46.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.46.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.46.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.46.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.46.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.46.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.47.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.47.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.47.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.47.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.47.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.47.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.47.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.47.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.48.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.48.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.48.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.48.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.48.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.48.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.48.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.48.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.49.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.49.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.49.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.49.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.49.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.49.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.49.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.49.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.50.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.50.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.50.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.50.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.50.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.50.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.50.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.50.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.51.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.51.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.51.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.51.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.51.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.51.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.51.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.51.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.52.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.52.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.52.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.52.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.52.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.52.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.52.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.52.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.53.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.53.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.53.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.53.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.53.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.53.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.53.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.53.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.54.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.54.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.54.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.54.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.54.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.54.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.54.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.54.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.55.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.55.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.55.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.55.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.55.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.55.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.55.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.55.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.56.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.56.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.56.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.56.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.56.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.56.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.56.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.56.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.57.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.57.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.57.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.57.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.57.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.57.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.57.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.57.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.58.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.58.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.58.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.58.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.58.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.58.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.58.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.58.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.59.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.59.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.59.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.59.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.59.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.59.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.59.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.59.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.60.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.60.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.60.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.60.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.60.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.60.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.60.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.60.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.61.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.61.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.61.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.61.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.61.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.61.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.61.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.61.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.62.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.62.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.62.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.62.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.62.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.62.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.62.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.62.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.63.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.63.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.63.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.63.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.63.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.63.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.63.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.63.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.64.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.64.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.64.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.64.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.64.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.64.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.64.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.64.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.65.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.65.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.65.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.65.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.65.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.65.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.65.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.65.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.66.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.66.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.66.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.66.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.66.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.66.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.66.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.66.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.67.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.67.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.67.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.67.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.67.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.67.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.67.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.67.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.68.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.68.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.68.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.68.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.68.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.68.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.68.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.68.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.69.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.69.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.69.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.69.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.69.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.69.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.69.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.69.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.70.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.70.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.70.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.70.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.70.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.70.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.70.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.70.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.71.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.71.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.71.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.71.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.71.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.71.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.71.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.71.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.72.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.72.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.72.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.72.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.72.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.72.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.72.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.72.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.73.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.73.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.73.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.73.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.73.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.73.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.73.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.73.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.74.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.74.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.74.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.74.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.74.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.74.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.74.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.74.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.75.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.75.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.75.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.75.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.75.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.75.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.75.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.75.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.76.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.76.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.76.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.76.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.76.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.76.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.76.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.76.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.77.self_attn.q_a_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.77.self_attn.q_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.77.self_attn.kv_a_proj_with_mqa": { + "bits": 16, + "data_type": "float" + }, + "model.layers.77.self_attn.kv_b_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.77.self_attn.o_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.77.self_attn.indexer.wq_b": { + "bits": 16, + "data_type": "float" + }, + "model.layers.77.self_attn.indexer.wk": { + "bits": 16, + "data_type": "float" + }, + "model.layers.77.self_attn.indexer.weights_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.3.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.3.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.3.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.4.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.4.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.4.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.5.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.5.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.5.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.6.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.6.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.6.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.7.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.7.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.7.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.8.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.8.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.8.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.9.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.9.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.9.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.10.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.10.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.10.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.11.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.11.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.11.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.12.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.12.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.12.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.13.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.13.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.13.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.14.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.14.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.14.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.15.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.15.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.15.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.16.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.16.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.16.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.17.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.17.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.17.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.18.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.18.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.18.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.19.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.19.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.19.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.20.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.20.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.20.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.21.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.21.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.21.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.22.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.22.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.22.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.23.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.23.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.23.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.24.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.24.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.24.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.25.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.25.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.25.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.26.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.26.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.26.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.27.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.27.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.27.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.28.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.28.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.28.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.29.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.29.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.29.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.30.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.30.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.30.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.31.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.31.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.31.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.32.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.32.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.32.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.33.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.33.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.33.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.34.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.34.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.34.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.35.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.35.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.35.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.36.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.36.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.36.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.37.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.37.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.37.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.38.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.38.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.38.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.39.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.39.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.39.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.40.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.40.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.40.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.41.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.41.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.41.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.42.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.42.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.42.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.43.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.43.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.43.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.44.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.44.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.44.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.45.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.45.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.45.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.46.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.46.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.46.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.47.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.47.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.47.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.48.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.48.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.48.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.49.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.49.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.49.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.50.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.50.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.50.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.51.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.51.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.51.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.52.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.52.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.52.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.53.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.53.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.53.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.54.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.54.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.54.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.55.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.55.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.55.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.56.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.56.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.56.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.57.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.57.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.57.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.58.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.58.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.58.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.59.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.59.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.59.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.60.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.60.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.60.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.61.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.61.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.61.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.62.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.62.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.62.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.63.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.63.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.63.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.64.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.64.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.64.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.65.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.65.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.65.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.66.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.66.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.66.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.67.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.67.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.67.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.68.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.68.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.68.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.69.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.69.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.69.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.70.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.70.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.70.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.71.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.71.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.71.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.72.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.72.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.72.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.73.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.73.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.73.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.74.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.74.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.74.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.75.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.75.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.75.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.76.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.76.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.76.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.77.mlp.shared_experts.gate_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.77.mlp.shared_experts.up_proj": { + "bits": 16, + "data_type": "float" + }, + "model.layers.77.mlp.shared_experts.down_proj": { + "bits": 16, + "data_type": "float" + } + } +} \ No newline at end of file diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..aba40197a4cdb5607f4ab7a05fb0a4ee8054fd6d --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19e773648cb4e65de8660ea6365e10acca112d42a854923df93db4a6f333a82d +size 20217442 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1723f7d90e3fb497303ec7b18f88cf5d05928f37 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,33 @@ +{ + "backend": "tokenizers", + "clean_up_tokenization_spaces": false, + "do_lower_case": false, + "eos_token": "<|endoftext|>", + "extra_special_tokens": [ + "<|endoftext|>", + "[MASK]", + "[gMASK]", + "[sMASK]", + "", + "", + "<|system|>", + "<|user|>", + "<|assistant|>", + "<|observation|>", + "<|begin_of_image|>", + "<|end_of_image|>", + "<|begin_of_video|>", + "<|end_of_video|>", + "<|begin_of_audio|>", + "<|end_of_audio|>", + "<|begin_of_transcription|>", + "<|end_of_transcription|>" + ], + "is_local": true, + "model_max_length": 202752, + "model_specific_special_tokens": {}, + "pad_token": "<|endoftext|>", + "padding_side": "left", + "remove_space": false, + "tokenizer_class": "TokenizersBackend" +}