diff --git "a/tensor-cache.json" "b/tensor-cache.json" new file mode 100644--- /dev/null +++ "b/tensor-cache.json" @@ -0,0 +1,3529 @@ +{ + "metadata": { + "ParamSize": 327, + "ParamBytes": 150885632.0, + "BitsPerParam": 4.502399359852415 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "language_model.model.embed_tokens.q_weight", + "shape": [ + 262144, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "759c294ed3a6f81c12617131781b40e7" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 33201408, + "records": [ + { + "name": "language_model.model.embed_tokens.q_scale", + "shape": [ + 262144, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.0.input_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 10485760 + }, + { + "name": "language_model.model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 640, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 655360, + "byteOffset": 10487040 + }, + { + "name": "language_model.model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 640, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 11142400 + }, + { + "name": "language_model.model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 4096, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 11224320 + }, + { + "name": "language_model.model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 4096, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 12535040 + }, + { + "name": "language_model.model.layers.0.post_attention_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 12698880 + }, + { + "name": "language_model.model.layers.0.post_feedforward_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 12700160 + }, + { + "name": "language_model.model.layers.0.pre_feedforward_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 12701440 + }, + { + "name": "language_model.model.layers.0.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 12702720 + }, + { + "name": "language_model.model.layers.0.self_attn.k_proj.q_weight", + "shape": [ + 256, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 12703232 + }, + { + "name": "language_model.model.layers.0.self_attn.k_proj.q_scale", + "shape": [ + 256, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 12785152 + }, + { + "name": "language_model.model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 640, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 12795392 + }, + { + "name": "language_model.model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 640, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 40960, + "byteOffset": 13123072 + }, + { + "name": "language_model.model.layers.0.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 13164032 + }, + { + "name": "language_model.model.layers.0.self_attn.q_proj.q_weight", + "shape": [ + 1024, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 13164544 + }, + { + "name": "language_model.model.layers.0.self_attn.q_proj.q_scale", + "shape": [ + 1024, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 40960, + "byteOffset": 13492224 + }, + { + "name": "language_model.model.layers.0.self_attn.v_proj.q_weight", + "shape": [ + 256, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 13533184 + }, + { + "name": "language_model.model.layers.0.self_attn.v_proj.q_scale", + "shape": [ + 256, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13615104 + }, + { + "name": "language_model.model.layers.1.input_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 13625344 + }, + { + "name": "language_model.model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 640, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 655360, + "byteOffset": 13626624 + }, + { + "name": "language_model.model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 640, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 14281984 + }, + { + "name": "language_model.model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 4096, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 14363904 + }, + { + "name": "language_model.model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 4096, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 15674624 + }, + { + "name": "language_model.model.layers.1.post_attention_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 15838464 + }, + { + "name": "language_model.model.layers.1.post_feedforward_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 15839744 + }, + { + "name": "language_model.model.layers.1.pre_feedforward_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 15841024 + }, + { + "name": "language_model.model.layers.1.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 15842304 + }, + { + "name": "language_model.model.layers.1.self_attn.k_proj.q_weight", + "shape": [ + 256, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 15842816 + }, + { + "name": "language_model.model.layers.1.self_attn.k_proj.q_scale", + "shape": [ + 256, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 15924736 + }, + { + "name": "language_model.model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 640, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 15934976 + }, + { + "name": "language_model.model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 640, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 40960, + "byteOffset": 16262656 + }, + { + "name": "language_model.model.layers.1.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 16303616 + }, + { + "name": "language_model.model.layers.1.self_attn.q_proj.q_weight", + "shape": [ + 1024, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 16304128 + }, + { + "name": "language_model.model.layers.1.self_attn.q_proj.q_scale", + "shape": [ + 1024, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 40960, + "byteOffset": 16631808 + }, + { + "name": "language_model.model.layers.1.self_attn.v_proj.q_weight", + "shape": [ + 256, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 16672768 + }, + { + "name": "language_model.model.layers.1.self_attn.v_proj.q_scale", + "shape": [ + 256, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16754688 + }, + { + "name": "language_model.model.layers.10.input_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 16764928 + }, + { + "name": "language_model.model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 640, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 655360, + "byteOffset": 16766208 + }, + { + "name": "language_model.model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 640, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 17421568 + }, + { + "name": "language_model.model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 4096, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 17503488 + }, + { + "name": "language_model.model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 4096, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 18814208 + }, + { + "name": "language_model.model.layers.10.post_attention_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 18978048 + }, + { + "name": "language_model.model.layers.10.post_feedforward_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 18979328 + }, + { + "name": "language_model.model.layers.10.pre_feedforward_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 18980608 + }, + { + "name": "language_model.model.layers.10.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 18981888 + }, + { + "name": "language_model.model.layers.10.self_attn.k_proj.q_weight", + "shape": [ + 256, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 18982400 + }, + { + "name": "language_model.model.layers.10.self_attn.k_proj.q_scale", + "shape": [ + 256, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19064320 + }, + { + "name": "language_model.model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 640, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 19074560 + }, + { + "name": "language_model.model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 640, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 40960, + "byteOffset": 19402240 + }, + { + "name": "language_model.model.layers.10.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 19443200 + }, + { + "name": "language_model.model.layers.10.self_attn.q_proj.q_weight", + "shape": [ + 1024, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 19443712 + }, + { + "name": "language_model.model.layers.10.self_attn.q_proj.q_scale", + "shape": [ + 1024, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 40960, + "byteOffset": 19771392 + }, + { + "name": "language_model.model.layers.10.self_attn.v_proj.q_weight", + "shape": [ + 256, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 19812352 + }, + { + "name": "language_model.model.layers.10.self_attn.v_proj.q_scale", + "shape": [ + 256, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19894272 + }, + { + "name": "language_model.model.layers.11.input_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 19904512 + }, + { + "name": "language_model.model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 640, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 655360, + "byteOffset": 19905792 + }, + { + "name": "language_model.model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 640, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 20561152 + }, + { + "name": "language_model.model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 4096, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 20643072 + }, + { + "name": "language_model.model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 4096, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 21953792 + }, + { + "name": "language_model.model.layers.11.post_attention_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 22117632 + }, + { + "name": "language_model.model.layers.11.post_feedforward_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 22118912 + }, + { + "name": "language_model.model.layers.11.pre_feedforward_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 22120192 + }, + { + "name": "language_model.model.layers.11.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 22121472 + }, + { + "name": "language_model.model.layers.11.self_attn.k_proj.q_weight", + "shape": [ + 256, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 22121984 + }, + { + "name": "language_model.model.layers.11.self_attn.k_proj.q_scale", + "shape": [ + 256, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 22203904 + }, + { + "name": "language_model.model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 640, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 22214144 + }, + { + "name": "language_model.model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 640, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 40960, + "byteOffset": 22541824 + }, + { + "name": "language_model.model.layers.11.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 22582784 + }, + { + "name": "language_model.model.layers.11.self_attn.q_proj.q_weight", + "shape": [ + 1024, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 22583296 + }, + { + "name": "language_model.model.layers.11.self_attn.q_proj.q_scale", + "shape": [ + 1024, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 40960, + "byteOffset": 22910976 + }, + { + "name": "language_model.model.layers.11.self_attn.v_proj.q_weight", + "shape": [ + 256, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 22951936 + }, + { + "name": "language_model.model.layers.11.self_attn.v_proj.q_scale", + "shape": [ + 256, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23033856 + }, + { + "name": "language_model.model.layers.12.input_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 23044096 + }, + { + "name": "language_model.model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 640, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 655360, + "byteOffset": 23045376 + }, + { + "name": "language_model.model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 640, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 23700736 + }, + { + "name": "language_model.model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 4096, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 23782656 + }, + { + "name": "language_model.model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 4096, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 25093376 + }, + { + "name": "language_model.model.layers.12.post_attention_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 25257216 + }, + { + "name": "language_model.model.layers.12.post_feedforward_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 25258496 + }, + { + "name": "language_model.model.layers.12.pre_feedforward_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 25259776 + }, + { + "name": "language_model.model.layers.12.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 25261056 + }, + { + "name": "language_model.model.layers.12.self_attn.k_proj.q_weight", + "shape": [ + 256, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 25261568 + }, + { + "name": "language_model.model.layers.12.self_attn.k_proj.q_scale", + "shape": [ + 256, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25343488 + }, + { + "name": "language_model.model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 640, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 25353728 + }, + { + "name": "language_model.model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 640, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 40960, + "byteOffset": 25681408 + }, + { + "name": "language_model.model.layers.12.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 25722368 + }, + { + "name": "language_model.model.layers.12.self_attn.q_proj.q_weight", + "shape": [ + 1024, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 25722880 + }, + { + "name": "language_model.model.layers.12.self_attn.q_proj.q_scale", + "shape": [ + 1024, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 40960, + "byteOffset": 26050560 + }, + { + "name": "language_model.model.layers.12.self_attn.v_proj.q_weight", + "shape": [ + 256, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 26091520 + }, + { + "name": "language_model.model.layers.12.self_attn.v_proj.q_scale", + "shape": [ + 256, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26173440 + }, + { + "name": "language_model.model.layers.13.input_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 26183680 + }, + { + "name": "language_model.model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 640, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 655360, + "byteOffset": 26184960 + }, + { + "name": "language_model.model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 640, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 26840320 + }, + { + "name": "language_model.model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 4096, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 26922240 + }, + { + "name": "language_model.model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 4096, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 28232960 + }, + { + "name": "language_model.model.layers.13.post_attention_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 28396800 + }, + { + "name": "language_model.model.layers.13.post_feedforward_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 28398080 + }, + { + "name": "language_model.model.layers.13.pre_feedforward_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 28399360 + }, + { + "name": "language_model.model.layers.13.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 28400640 + }, + { + "name": "language_model.model.layers.13.self_attn.k_proj.q_weight", + "shape": [ + 256, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 28401152 + }, + { + "name": "language_model.model.layers.13.self_attn.k_proj.q_scale", + "shape": [ + 256, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 28483072 + }, + { + "name": "language_model.model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 640, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 28493312 + }, + { + "name": "language_model.model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 640, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 40960, + "byteOffset": 28820992 + }, + { + "name": "language_model.model.layers.13.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 28861952 + }, + { + "name": "language_model.model.layers.13.self_attn.q_proj.q_weight", + "shape": [ + 1024, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 28862464 + }, + { + "name": "language_model.model.layers.13.self_attn.q_proj.q_scale", + "shape": [ + 1024, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 40960, + "byteOffset": 29190144 + }, + { + "name": "language_model.model.layers.13.self_attn.v_proj.q_weight", + "shape": [ + 256, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 29231104 + }, + { + "name": "language_model.model.layers.13.self_attn.v_proj.q_scale", + "shape": [ + 256, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 29313024 + }, + { + "name": "language_model.model.layers.14.input_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 29323264 + }, + { + "name": "language_model.model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 640, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 655360, + "byteOffset": 29324544 + }, + { + "name": "language_model.model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 640, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 29979904 + }, + { + "name": "language_model.model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 4096, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 30061824 + }, + { + "name": "language_model.model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 4096, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 31372544 + }, + { + "name": "language_model.model.layers.14.post_attention_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 31536384 + }, + { + "name": "language_model.model.layers.14.post_feedforward_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 31537664 + }, + { + "name": "language_model.model.layers.14.pre_feedforward_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 31538944 + }, + { + "name": "language_model.model.layers.14.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 31540224 + }, + { + "name": "language_model.model.layers.14.self_attn.k_proj.q_weight", + "shape": [ + 256, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 31540736 + }, + { + "name": "language_model.model.layers.14.self_attn.k_proj.q_scale", + "shape": [ + 256, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 31622656 + }, + { + "name": "language_model.model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 640, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 31632896 + }, + { + "name": "language_model.model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 640, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 40960, + "byteOffset": 31960576 + }, + { + "name": "language_model.model.layers.14.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 32001536 + }, + { + "name": "language_model.model.layers.14.self_attn.q_proj.q_weight", + "shape": [ + 1024, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 32002048 + }, + { + "name": "language_model.model.layers.14.self_attn.q_proj.q_scale", + "shape": [ + 1024, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 40960, + "byteOffset": 32329728 + }, + { + "name": "language_model.model.layers.14.self_attn.v_proj.q_weight", + "shape": [ + 256, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 32370688 + }, + { + "name": "language_model.model.layers.14.self_attn.v_proj.q_scale", + "shape": [ + 256, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32452608 + }, + { + "name": "language_model.model.layers.15.input_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 32462848 + }, + { + "name": "language_model.model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 640, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 655360, + "byteOffset": 32464128 + }, + { + "name": "language_model.model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 640, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 33119488 + } + ], + "md5sum": "632cf778d8386261684a978964e0297b" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 33336064, + "records": [ + { + "name": "language_model.model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 4096, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 4096, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 1310720 + }, + { + "name": "language_model.model.layers.15.post_attention_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 1474560 + }, + { + "name": "language_model.model.layers.15.post_feedforward_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 1475840 + }, + { + "name": "language_model.model.layers.15.pre_feedforward_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 1477120 + }, + { + "name": "language_model.model.layers.15.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 1478400 + }, + { + "name": "language_model.model.layers.15.self_attn.k_proj.q_weight", + "shape": [ + 256, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 1478912 + }, + { + "name": "language_model.model.layers.15.self_attn.k_proj.q_scale", + "shape": [ + 256, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1560832 + }, + { + "name": "language_model.model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 640, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 1571072 + }, + { + "name": "language_model.model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 640, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 40960, + "byteOffset": 1898752 + }, + { + "name": "language_model.model.layers.15.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 1939712 + }, + { + "name": "language_model.model.layers.15.self_attn.q_proj.q_weight", + "shape": [ + 1024, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 1940224 + }, + { + "name": "language_model.model.layers.15.self_attn.q_proj.q_scale", + "shape": [ + 1024, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 40960, + "byteOffset": 2267904 + }, + { + "name": "language_model.model.layers.15.self_attn.v_proj.q_weight", + "shape": [ + 256, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 2308864 + }, + { + "name": "language_model.model.layers.15.self_attn.v_proj.q_scale", + "shape": [ + 256, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2390784 + }, + { + "name": "language_model.model.layers.16.input_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 2401024 + }, + { + "name": "language_model.model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 640, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 655360, + "byteOffset": 2402304 + }, + { + "name": "language_model.model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 640, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 3057664 + }, + { + "name": "language_model.model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 4096, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 3139584 + }, + { + "name": "language_model.model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 4096, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 4450304 + }, + { + "name": "language_model.model.layers.16.post_attention_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 4614144 + }, + { + "name": "language_model.model.layers.16.post_feedforward_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 4615424 + }, + { + "name": "language_model.model.layers.16.pre_feedforward_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 4616704 + }, + { + "name": "language_model.model.layers.16.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 4617984 + }, + { + "name": "language_model.model.layers.16.self_attn.k_proj.q_weight", + "shape": [ + 256, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 4618496 + }, + { + "name": "language_model.model.layers.16.self_attn.k_proj.q_scale", + "shape": [ + 256, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 4700416 + }, + { + "name": "language_model.model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 640, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 4710656 + }, + { + "name": "language_model.model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 640, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 40960, + "byteOffset": 5038336 + }, + { + "name": "language_model.model.layers.16.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 5079296 + }, + { + "name": "language_model.model.layers.16.self_attn.q_proj.q_weight", + "shape": [ + 1024, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 5079808 + }, + { + "name": "language_model.model.layers.16.self_attn.q_proj.q_scale", + "shape": [ + 1024, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 40960, + "byteOffset": 5407488 + }, + { + "name": "language_model.model.layers.16.self_attn.v_proj.q_weight", + "shape": [ + 256, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 5448448 + }, + { + "name": "language_model.model.layers.16.self_attn.v_proj.q_scale", + "shape": [ + 256, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 5530368 + }, + { + "name": "language_model.model.layers.17.input_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 5540608 + }, + { + "name": "language_model.model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 640, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 655360, + "byteOffset": 5541888 + }, + { + "name": "language_model.model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 640, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 6197248 + }, + { + "name": "language_model.model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 4096, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 6279168 + }, + { + "name": "language_model.model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 4096, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 7589888 + }, + { + "name": "language_model.model.layers.17.post_attention_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 7753728 + }, + { + "name": "language_model.model.layers.17.post_feedforward_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 7755008 + }, + { + "name": "language_model.model.layers.17.pre_feedforward_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 7756288 + }, + { + "name": "language_model.model.layers.17.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 7757568 + }, + { + "name": "language_model.model.layers.17.self_attn.k_proj.q_weight", + "shape": [ + 256, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 7758080 + }, + { + "name": "language_model.model.layers.17.self_attn.k_proj.q_scale", + "shape": [ + 256, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 7840000 + }, + { + "name": "language_model.model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 640, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 7850240 + }, + { + "name": "language_model.model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 640, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 40960, + "byteOffset": 8177920 + }, + { + "name": "language_model.model.layers.17.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 8218880 + }, + { + "name": "language_model.model.layers.17.self_attn.q_proj.q_weight", + "shape": [ + 1024, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 8219392 + }, + { + "name": "language_model.model.layers.17.self_attn.q_proj.q_scale", + "shape": [ + 1024, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 40960, + "byteOffset": 8547072 + }, + { + "name": "language_model.model.layers.17.self_attn.v_proj.q_weight", + "shape": [ + 256, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 8588032 + }, + { + "name": "language_model.model.layers.17.self_attn.v_proj.q_scale", + "shape": [ + 256, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8669952 + }, + { + "name": "language_model.model.layers.2.input_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 8680192 + }, + { + "name": "language_model.model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 640, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 655360, + "byteOffset": 8681472 + }, + { + "name": "language_model.model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 640, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 9336832 + }, + { + "name": "language_model.model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 4096, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 9418752 + }, + { + "name": "language_model.model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 4096, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 10729472 + }, + { + "name": "language_model.model.layers.2.post_attention_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 10893312 + }, + { + "name": "language_model.model.layers.2.post_feedforward_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 10894592 + }, + { + "name": "language_model.model.layers.2.pre_feedforward_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 10895872 + }, + { + "name": "language_model.model.layers.2.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 10897152 + }, + { + "name": "language_model.model.layers.2.self_attn.k_proj.q_weight", + "shape": [ + 256, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 10897664 + }, + { + "name": "language_model.model.layers.2.self_attn.k_proj.q_scale", + "shape": [ + 256, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 10979584 + }, + { + "name": "language_model.model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 640, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 10989824 + }, + { + "name": "language_model.model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 640, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 40960, + "byteOffset": 11317504 + }, + { + "name": "language_model.model.layers.2.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 11358464 + }, + { + "name": "language_model.model.layers.2.self_attn.q_proj.q_weight", + "shape": [ + 1024, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 11358976 + }, + { + "name": "language_model.model.layers.2.self_attn.q_proj.q_scale", + "shape": [ + 1024, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 40960, + "byteOffset": 11686656 + }, + { + "name": "language_model.model.layers.2.self_attn.v_proj.q_weight", + "shape": [ + 256, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 11727616 + }, + { + "name": "language_model.model.layers.2.self_attn.v_proj.q_scale", + "shape": [ + 256, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 11809536 + }, + { + "name": "language_model.model.layers.3.input_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 11819776 + }, + { + "name": "language_model.model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 640, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 655360, + "byteOffset": 11821056 + }, + { + "name": "language_model.model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 640, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 12476416 + }, + { + "name": "language_model.model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 4096, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 12558336 + }, + { + "name": "language_model.model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 4096, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 13869056 + }, + { + "name": "language_model.model.layers.3.post_attention_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 14032896 + }, + { + "name": "language_model.model.layers.3.post_feedforward_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 14034176 + }, + { + "name": "language_model.model.layers.3.pre_feedforward_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 14035456 + }, + { + "name": "language_model.model.layers.3.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 14036736 + }, + { + "name": "language_model.model.layers.3.self_attn.k_proj.q_weight", + "shape": [ + 256, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 14037248 + }, + { + "name": "language_model.model.layers.3.self_attn.k_proj.q_scale", + "shape": [ + 256, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14119168 + }, + { + "name": "language_model.model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 640, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 14129408 + }, + { + "name": "language_model.model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 640, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 40960, + "byteOffset": 14457088 + }, + { + "name": "language_model.model.layers.3.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 14498048 + }, + { + "name": "language_model.model.layers.3.self_attn.q_proj.q_weight", + "shape": [ + 1024, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 14498560 + }, + { + "name": "language_model.model.layers.3.self_attn.q_proj.q_scale", + "shape": [ + 1024, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 40960, + "byteOffset": 14826240 + }, + { + "name": "language_model.model.layers.3.self_attn.v_proj.q_weight", + "shape": [ + 256, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 14867200 + }, + { + "name": "language_model.model.layers.3.self_attn.v_proj.q_scale", + "shape": [ + 256, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14949120 + }, + { + "name": "language_model.model.layers.4.input_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 14959360 + }, + { + "name": "language_model.model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 640, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 655360, + "byteOffset": 14960640 + }, + { + "name": "language_model.model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 640, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 15616000 + }, + { + "name": "language_model.model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 4096, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 15697920 + }, + { + "name": "language_model.model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 4096, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 17008640 + }, + { + "name": "language_model.model.layers.4.post_attention_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 17172480 + }, + { + "name": "language_model.model.layers.4.post_feedforward_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 17173760 + }, + { + "name": "language_model.model.layers.4.pre_feedforward_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 17175040 + }, + { + "name": "language_model.model.layers.4.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 17176320 + }, + { + "name": "language_model.model.layers.4.self_attn.k_proj.q_weight", + "shape": [ + 256, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 17176832 + }, + { + "name": "language_model.model.layers.4.self_attn.k_proj.q_scale", + "shape": [ + 256, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17258752 + }, + { + "name": "language_model.model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 640, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 17268992 + }, + { + "name": "language_model.model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 640, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 40960, + "byteOffset": 17596672 + }, + { + "name": "language_model.model.layers.4.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 17637632 + }, + { + "name": "language_model.model.layers.4.self_attn.q_proj.q_weight", + "shape": [ + 1024, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 17638144 + }, + { + "name": "language_model.model.layers.4.self_attn.q_proj.q_scale", + "shape": [ + 1024, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 40960, + "byteOffset": 17965824 + }, + { + "name": "language_model.model.layers.4.self_attn.v_proj.q_weight", + "shape": [ + 256, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 18006784 + }, + { + "name": "language_model.model.layers.4.self_attn.v_proj.q_scale", + "shape": [ + 256, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 18088704 + }, + { + "name": "language_model.model.layers.5.input_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 18098944 + }, + { + "name": "language_model.model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 640, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 655360, + "byteOffset": 18100224 + }, + { + "name": "language_model.model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 640, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 18755584 + }, + { + "name": "language_model.model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 4096, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 18837504 + }, + { + "name": "language_model.model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 4096, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 20148224 + }, + { + "name": "language_model.model.layers.5.post_attention_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 20312064 + }, + { + "name": "language_model.model.layers.5.post_feedforward_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 20313344 + }, + { + "name": "language_model.model.layers.5.pre_feedforward_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 20314624 + }, + { + "name": "language_model.model.layers.5.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 20315904 + }, + { + "name": "language_model.model.layers.5.self_attn.k_proj.q_weight", + "shape": [ + 256, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 20316416 + }, + { + "name": "language_model.model.layers.5.self_attn.k_proj.q_scale", + "shape": [ + 256, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20398336 + }, + { + "name": "language_model.model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 640, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 20408576 + }, + { + "name": "language_model.model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 640, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 40960, + "byteOffset": 20736256 + }, + { + "name": "language_model.model.layers.5.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 20777216 + }, + { + "name": "language_model.model.layers.5.self_attn.q_proj.q_weight", + "shape": [ + 1024, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 20777728 + }, + { + "name": "language_model.model.layers.5.self_attn.q_proj.q_scale", + "shape": [ + 1024, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 40960, + "byteOffset": 21105408 + }, + { + "name": "language_model.model.layers.5.self_attn.v_proj.q_weight", + "shape": [ + 256, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 21146368 + }, + { + "name": "language_model.model.layers.5.self_attn.v_proj.q_scale", + "shape": [ + 256, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 21228288 + }, + { + "name": "language_model.model.layers.6.input_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 21238528 + }, + { + "name": "language_model.model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 640, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 655360, + "byteOffset": 21239808 + }, + { + "name": "language_model.model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 640, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 21895168 + }, + { + "name": "language_model.model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 4096, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 21977088 + }, + { + "name": "language_model.model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 4096, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 23287808 + }, + { + "name": "language_model.model.layers.6.post_attention_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 23451648 + }, + { + "name": "language_model.model.layers.6.post_feedforward_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 23452928 + }, + { + "name": "language_model.model.layers.6.pre_feedforward_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 23454208 + }, + { + "name": "language_model.model.layers.6.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 23455488 + }, + { + "name": "language_model.model.layers.6.self_attn.k_proj.q_weight", + "shape": [ + 256, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 23456000 + }, + { + "name": "language_model.model.layers.6.self_attn.k_proj.q_scale", + "shape": [ + 256, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23537920 + }, + { + "name": "language_model.model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 640, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 23548160 + }, + { + "name": "language_model.model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 640, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 40960, + "byteOffset": 23875840 + }, + { + "name": "language_model.model.layers.6.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 23916800 + }, + { + "name": "language_model.model.layers.6.self_attn.q_proj.q_weight", + "shape": [ + 1024, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 23917312 + }, + { + "name": "language_model.model.layers.6.self_attn.q_proj.q_scale", + "shape": [ + 1024, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 40960, + "byteOffset": 24244992 + }, + { + "name": "language_model.model.layers.6.self_attn.v_proj.q_weight", + "shape": [ + 256, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 24285952 + }, + { + "name": "language_model.model.layers.6.self_attn.v_proj.q_scale", + "shape": [ + 256, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 24367872 + }, + { + "name": "language_model.model.layers.7.input_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 24378112 + }, + { + "name": "language_model.model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 640, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 655360, + "byteOffset": 24379392 + }, + { + "name": "language_model.model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 640, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 25034752 + }, + { + "name": "language_model.model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 4096, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 25116672 + }, + { + "name": "language_model.model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 4096, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 26427392 + }, + { + "name": "language_model.model.layers.7.post_attention_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 26591232 + }, + { + "name": "language_model.model.layers.7.post_feedforward_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 26592512 + }, + { + "name": "language_model.model.layers.7.pre_feedforward_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 26593792 + }, + { + "name": "language_model.model.layers.7.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 26595072 + }, + { + "name": "language_model.model.layers.7.self_attn.k_proj.q_weight", + "shape": [ + 256, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 26595584 + }, + { + "name": "language_model.model.layers.7.self_attn.k_proj.q_scale", + "shape": [ + 256, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26677504 + }, + { + "name": "language_model.model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 640, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 26687744 + }, + { + "name": "language_model.model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 640, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 40960, + "byteOffset": 27015424 + }, + { + "name": "language_model.model.layers.7.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 27056384 + }, + { + "name": "language_model.model.layers.7.self_attn.q_proj.q_weight", + "shape": [ + 1024, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 27056896 + }, + { + "name": "language_model.model.layers.7.self_attn.q_proj.q_scale", + "shape": [ + 1024, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 40960, + "byteOffset": 27384576 + }, + { + "name": "language_model.model.layers.7.self_attn.v_proj.q_weight", + "shape": [ + 256, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 27425536 + }, + { + "name": "language_model.model.layers.7.self_attn.v_proj.q_scale", + "shape": [ + 256, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 27507456 + }, + { + "name": "language_model.model.layers.8.input_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 27517696 + }, + { + "name": "language_model.model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 640, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 655360, + "byteOffset": 27518976 + }, + { + "name": "language_model.model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 640, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 28174336 + }, + { + "name": "language_model.model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 4096, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 28256256 + }, + { + "name": "language_model.model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 4096, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 29566976 + }, + { + "name": "language_model.model.layers.8.post_attention_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 29730816 + }, + { + "name": "language_model.model.layers.8.post_feedforward_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 29732096 + }, + { + "name": "language_model.model.layers.8.pre_feedforward_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 29733376 + }, + { + "name": "language_model.model.layers.8.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 29734656 + }, + { + "name": "language_model.model.layers.8.self_attn.k_proj.q_weight", + "shape": [ + 256, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 29735168 + }, + { + "name": "language_model.model.layers.8.self_attn.k_proj.q_scale", + "shape": [ + 256, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 29817088 + }, + { + "name": "language_model.model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 640, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 29827328 + }, + { + "name": "language_model.model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 640, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 40960, + "byteOffset": 30155008 + }, + { + "name": "language_model.model.layers.8.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 30195968 + }, + { + "name": "language_model.model.layers.8.self_attn.q_proj.q_weight", + "shape": [ + 1024, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 30196480 + }, + { + "name": "language_model.model.layers.8.self_attn.q_proj.q_scale", + "shape": [ + 1024, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 40960, + "byteOffset": 30524160 + }, + { + "name": "language_model.model.layers.8.self_attn.v_proj.q_weight", + "shape": [ + 256, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 30565120 + }, + { + "name": "language_model.model.layers.8.self_attn.v_proj.q_scale", + "shape": [ + 256, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30647040 + }, + { + "name": "language_model.model.layers.9.input_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 30657280 + }, + { + "name": "language_model.model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 640, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 655360, + "byteOffset": 30658560 + }, + { + "name": "language_model.model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 640, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 31313920 + }, + { + "name": "language_model.model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 4096, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 31395840 + }, + { + "name": "language_model.model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 4096, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 32706560 + }, + { + "name": "language_model.model.layers.9.post_attention_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 32870400 + }, + { + "name": "language_model.model.layers.9.post_feedforward_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 32871680 + }, + { + "name": "language_model.model.layers.9.pre_feedforward_layernorm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 32872960 + }, + { + "name": "language_model.model.layers.9.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 32874240 + }, + { + "name": "language_model.model.layers.9.self_attn.k_proj.q_weight", + "shape": [ + 256, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 32874752 + }, + { + "name": "language_model.model.layers.9.self_attn.k_proj.q_scale", + "shape": [ + 256, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32956672 + }, + { + "name": "language_model.model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 640, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 32966912 + }, + { + "name": "language_model.model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 640, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 40960, + "byteOffset": 33294592 + }, + { + "name": "language_model.model.layers.9.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 33335552 + } + ], + "md5sum": "771db05b4d379d652f02a88b4fbb428c" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 462080, + "records": [ + { + "name": "language_model.model.layers.9.self_attn.q_proj.q_weight", + "shape": [ + 1024, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.9.self_attn.q_proj.q_scale", + "shape": [ + 1024, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 40960, + "byteOffset": 327680 + }, + { + "name": "language_model.model.layers.9.self_attn.v_proj.q_weight", + "shape": [ + 256, + 80 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 81920, + "byteOffset": 368640 + }, + { + "name": "language_model.model.layers.9.self_attn.v_proj.q_scale", + "shape": [ + 256, + 20 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 450560 + }, + { + "name": "language_model.model.norm.weight", + "shape": [ + 640 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1280, + "byteOffset": 460800 + } + ], + "md5sum": "f8a02319ec8a543fee87e9f79ca6aeee" + } + ] +} \ No newline at end of file