{ "metadata": { "ParamSize": 327, "ParamBytes": 150885632.0, "BitsPerParam": 4.502399359852415 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "language_model.model.embed_tokens.q_weight", "shape": [ 262144, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "759c294ed3a6f81c12617131781b40e7" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 33201408, "records": [ { "name": "language_model.model.embed_tokens.q_scale", "shape": [ 262144, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "language_model.model.layers.0.input_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 10485760 }, { "name": "language_model.model.layers.0.mlp.down_proj.q_weight", "shape": [ 640, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 10487040 }, { "name": "language_model.model.layers.0.mlp.down_proj.q_scale", "shape": [ 640, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 11142400 }, { "name": "language_model.model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 4096, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 11224320 }, { "name": "language_model.model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 12535040 }, { "name": "language_model.model.layers.0.post_attention_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 12698880 }, { "name": "language_model.model.layers.0.post_feedforward_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 12700160 }, { "name": "language_model.model.layers.0.pre_feedforward_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 12701440 }, { "name": "language_model.model.layers.0.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 12702720 }, { "name": "language_model.model.layers.0.self_attn.k_proj.q_weight", "shape": [ 256, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 12703232 }, { "name": "language_model.model.layers.0.self_attn.k_proj.q_scale", "shape": [ 256, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 12785152 }, { "name": "language_model.model.layers.0.self_attn.o_proj.q_weight", "shape": [ 640, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 12795392 }, { "name": "language_model.model.layers.0.self_attn.o_proj.q_scale", "shape": [ 640, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 13123072 }, { "name": "language_model.model.layers.0.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 13164032 }, { "name": "language_model.model.layers.0.self_attn.q_proj.q_weight", "shape": [ 1024, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 13164544 }, { "name": "language_model.model.layers.0.self_attn.q_proj.q_scale", "shape": [ 1024, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 13492224 }, { "name": "language_model.model.layers.0.self_attn.v_proj.q_weight", "shape": [ 256, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 13533184 }, { "name": "language_model.model.layers.0.self_attn.v_proj.q_scale", "shape": [ 256, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13615104 }, { "name": "language_model.model.layers.1.input_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 13625344 }, { "name": "language_model.model.layers.1.mlp.down_proj.q_weight", "shape": [ 640, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 13626624 }, { "name": "language_model.model.layers.1.mlp.down_proj.q_scale", "shape": [ 640, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 14281984 }, { "name": "language_model.model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 4096, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 14363904 }, { "name": "language_model.model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 15674624 }, { "name": "language_model.model.layers.1.post_attention_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 15838464 }, { "name": "language_model.model.layers.1.post_feedforward_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 15839744 }, { "name": "language_model.model.layers.1.pre_feedforward_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 15841024 }, { "name": "language_model.model.layers.1.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 15842304 }, { "name": "language_model.model.layers.1.self_attn.k_proj.q_weight", "shape": [ 256, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 15842816 }, { "name": "language_model.model.layers.1.self_attn.k_proj.q_scale", "shape": [ 256, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 15924736 }, { "name": "language_model.model.layers.1.self_attn.o_proj.q_weight", "shape": [ 640, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 15934976 }, { "name": "language_model.model.layers.1.self_attn.o_proj.q_scale", "shape": [ 640, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 16262656 }, { "name": "language_model.model.layers.1.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 16303616 }, { "name": "language_model.model.layers.1.self_attn.q_proj.q_weight", "shape": [ 1024, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 16304128 }, { "name": "language_model.model.layers.1.self_attn.q_proj.q_scale", "shape": [ 1024, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 16631808 }, { "name": "language_model.model.layers.1.self_attn.v_proj.q_weight", "shape": [ 256, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 16672768 }, { "name": "language_model.model.layers.1.self_attn.v_proj.q_scale", "shape": [ 256, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 16754688 }, { "name": "language_model.model.layers.10.input_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 16764928 }, { "name": "language_model.model.layers.10.mlp.down_proj.q_weight", "shape": [ 640, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 16766208 }, { "name": "language_model.model.layers.10.mlp.down_proj.q_scale", "shape": [ 640, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 17421568 }, { "name": "language_model.model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 4096, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 17503488 }, { "name": "language_model.model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 18814208 }, { "name": "language_model.model.layers.10.post_attention_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 18978048 }, { "name": "language_model.model.layers.10.post_feedforward_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 18979328 }, { "name": "language_model.model.layers.10.pre_feedforward_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 18980608 }, { "name": "language_model.model.layers.10.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 18981888 }, { "name": "language_model.model.layers.10.self_attn.k_proj.q_weight", "shape": [ 256, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 18982400 }, { "name": "language_model.model.layers.10.self_attn.k_proj.q_scale", "shape": [ 256, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 19064320 }, { "name": "language_model.model.layers.10.self_attn.o_proj.q_weight", "shape": [ 640, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 19074560 }, { "name": "language_model.model.layers.10.self_attn.o_proj.q_scale", "shape": [ 640, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 19402240 }, { "name": "language_model.model.layers.10.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 19443200 }, { "name": "language_model.model.layers.10.self_attn.q_proj.q_weight", "shape": [ 1024, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 19443712 }, { "name": "language_model.model.layers.10.self_attn.q_proj.q_scale", "shape": [ 1024, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 19771392 }, { "name": "language_model.model.layers.10.self_attn.v_proj.q_weight", "shape": [ 256, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 19812352 }, { "name": "language_model.model.layers.10.self_attn.v_proj.q_scale", "shape": [ 256, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 19894272 }, { "name": "language_model.model.layers.11.input_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 19904512 }, { "name": "language_model.model.layers.11.mlp.down_proj.q_weight", "shape": [ 640, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 19905792 }, { "name": "language_model.model.layers.11.mlp.down_proj.q_scale", "shape": [ 640, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 20561152 }, { "name": "language_model.model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 4096, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20643072 }, { "name": "language_model.model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 21953792 }, { "name": "language_model.model.layers.11.post_attention_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 22117632 }, { "name": "language_model.model.layers.11.post_feedforward_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 22118912 }, { "name": "language_model.model.layers.11.pre_feedforward_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 22120192 }, { "name": "language_model.model.layers.11.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 22121472 }, { "name": "language_model.model.layers.11.self_attn.k_proj.q_weight", "shape": [ 256, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 22121984 }, { "name": "language_model.model.layers.11.self_attn.k_proj.q_scale", "shape": [ 256, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 22203904 }, { "name": "language_model.model.layers.11.self_attn.o_proj.q_weight", "shape": [ 640, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 22214144 }, { "name": "language_model.model.layers.11.self_attn.o_proj.q_scale", "shape": [ 640, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 22541824 }, { "name": "language_model.model.layers.11.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 22582784 }, { "name": "language_model.model.layers.11.self_attn.q_proj.q_weight", "shape": [ 1024, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 22583296 }, { "name": "language_model.model.layers.11.self_attn.q_proj.q_scale", "shape": [ 1024, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 22910976 }, { "name": "language_model.model.layers.11.self_attn.v_proj.q_weight", "shape": [ 256, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 22951936 }, { "name": "language_model.model.layers.11.self_attn.v_proj.q_scale", "shape": [ 256, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23033856 }, { "name": "language_model.model.layers.12.input_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 23044096 }, { "name": "language_model.model.layers.12.mlp.down_proj.q_weight", "shape": [ 640, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 23045376 }, { "name": "language_model.model.layers.12.mlp.down_proj.q_scale", "shape": [ 640, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 23700736 }, { "name": "language_model.model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 4096, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 23782656 }, { "name": "language_model.model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 25093376 }, { "name": "language_model.model.layers.12.post_attention_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 25257216 }, { "name": "language_model.model.layers.12.post_feedforward_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 25258496 }, { "name": "language_model.model.layers.12.pre_feedforward_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 25259776 }, { "name": "language_model.model.layers.12.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 25261056 }, { "name": "language_model.model.layers.12.self_attn.k_proj.q_weight", "shape": [ 256, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 25261568 }, { "name": "language_model.model.layers.12.self_attn.k_proj.q_scale", "shape": [ 256, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25343488 }, { "name": "language_model.model.layers.12.self_attn.o_proj.q_weight", "shape": [ 640, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 25353728 }, { "name": "language_model.model.layers.12.self_attn.o_proj.q_scale", "shape": [ 640, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 25681408 }, { "name": "language_model.model.layers.12.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 25722368 }, { "name": "language_model.model.layers.12.self_attn.q_proj.q_weight", "shape": [ 1024, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 25722880 }, { "name": "language_model.model.layers.12.self_attn.q_proj.q_scale", "shape": [ 1024, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 26050560 }, { "name": "language_model.model.layers.12.self_attn.v_proj.q_weight", "shape": [ 256, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 26091520 }, { "name": "language_model.model.layers.12.self_attn.v_proj.q_scale", "shape": [ 256, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26173440 }, { "name": "language_model.model.layers.13.input_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 26183680 }, { "name": "language_model.model.layers.13.mlp.down_proj.q_weight", "shape": [ 640, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 26184960 }, { "name": "language_model.model.layers.13.mlp.down_proj.q_scale", "shape": [ 640, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 26840320 }, { "name": "language_model.model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 4096, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 26922240 }, { "name": "language_model.model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 28232960 }, { "name": "language_model.model.layers.13.post_attention_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 28396800 }, { "name": "language_model.model.layers.13.post_feedforward_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 28398080 }, { "name": "language_model.model.layers.13.pre_feedforward_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 28399360 }, { "name": "language_model.model.layers.13.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 28400640 }, { "name": "language_model.model.layers.13.self_attn.k_proj.q_weight", "shape": [ 256, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 28401152 }, { "name": "language_model.model.layers.13.self_attn.k_proj.q_scale", "shape": [ 256, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 28483072 }, { "name": "language_model.model.layers.13.self_attn.o_proj.q_weight", "shape": [ 640, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 28493312 }, { "name": "language_model.model.layers.13.self_attn.o_proj.q_scale", "shape": [ 640, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 28820992 }, { "name": "language_model.model.layers.13.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 28861952 }, { "name": "language_model.model.layers.13.self_attn.q_proj.q_weight", "shape": [ 1024, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 28862464 }, { "name": "language_model.model.layers.13.self_attn.q_proj.q_scale", "shape": [ 1024, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 29190144 }, { "name": "language_model.model.layers.13.self_attn.v_proj.q_weight", "shape": [ 256, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 29231104 }, { "name": "language_model.model.layers.13.self_attn.v_proj.q_scale", "shape": [ 256, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29313024 }, { "name": "language_model.model.layers.14.input_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 29323264 }, { "name": "language_model.model.layers.14.mlp.down_proj.q_weight", "shape": [ 640, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 29324544 }, { "name": "language_model.model.layers.14.mlp.down_proj.q_scale", "shape": [ 640, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 29979904 }, { "name": "language_model.model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 4096, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 30061824 }, { "name": "language_model.model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 31372544 }, { "name": "language_model.model.layers.14.post_attention_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 31536384 }, { "name": "language_model.model.layers.14.post_feedforward_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 31537664 }, { "name": "language_model.model.layers.14.pre_feedforward_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 31538944 }, { "name": "language_model.model.layers.14.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 31540224 }, { "name": "language_model.model.layers.14.self_attn.k_proj.q_weight", "shape": [ 256, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 31540736 }, { "name": "language_model.model.layers.14.self_attn.k_proj.q_scale", "shape": [ 256, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 31622656 }, { "name": "language_model.model.layers.14.self_attn.o_proj.q_weight", "shape": [ 640, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 31632896 }, { "name": "language_model.model.layers.14.self_attn.o_proj.q_scale", "shape": [ 640, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 31960576 }, { "name": "language_model.model.layers.14.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 32001536 }, { "name": "language_model.model.layers.14.self_attn.q_proj.q_weight", "shape": [ 1024, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 32002048 }, { "name": "language_model.model.layers.14.self_attn.q_proj.q_scale", "shape": [ 1024, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 32329728 }, { "name": "language_model.model.layers.14.self_attn.v_proj.q_weight", "shape": [ 256, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 32370688 }, { "name": "language_model.model.layers.14.self_attn.v_proj.q_scale", "shape": [ 256, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32452608 }, { "name": "language_model.model.layers.15.input_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 32462848 }, { "name": "language_model.model.layers.15.mlp.down_proj.q_weight", "shape": [ 640, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 32464128 }, { "name": "language_model.model.layers.15.mlp.down_proj.q_scale", "shape": [ 640, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 33119488 } ], "md5sum": "632cf778d8386261684a978964e0297b" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 33336064, "records": [ { "name": "language_model.model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 4096, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 0 }, { "name": "language_model.model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 1310720 }, { "name": "language_model.model.layers.15.post_attention_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 1474560 }, { "name": "language_model.model.layers.15.post_feedforward_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 1475840 }, { "name": "language_model.model.layers.15.pre_feedforward_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 1477120 }, { "name": "language_model.model.layers.15.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 1478400 }, { "name": "language_model.model.layers.15.self_attn.k_proj.q_weight", "shape": [ 256, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 1478912 }, { "name": "language_model.model.layers.15.self_attn.k_proj.q_scale", "shape": [ 256, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1560832 }, { "name": "language_model.model.layers.15.self_attn.o_proj.q_weight", "shape": [ 640, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 1571072 }, { "name": "language_model.model.layers.15.self_attn.o_proj.q_scale", "shape": [ 640, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 1898752 }, { "name": "language_model.model.layers.15.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 1939712 }, { "name": "language_model.model.layers.15.self_attn.q_proj.q_weight", "shape": [ 1024, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 1940224 }, { "name": "language_model.model.layers.15.self_attn.q_proj.q_scale", "shape": [ 1024, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 2267904 }, { "name": "language_model.model.layers.15.self_attn.v_proj.q_weight", "shape": [ 256, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 2308864 }, { "name": "language_model.model.layers.15.self_attn.v_proj.q_scale", "shape": [ 256, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2390784 }, { "name": "language_model.model.layers.16.input_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 2401024 }, { "name": "language_model.model.layers.16.mlp.down_proj.q_weight", "shape": [ 640, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 2402304 }, { "name": "language_model.model.layers.16.mlp.down_proj.q_scale", "shape": [ 640, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 3057664 }, { "name": "language_model.model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 4096, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 3139584 }, { "name": "language_model.model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 4450304 }, { "name": "language_model.model.layers.16.post_attention_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 4614144 }, { "name": "language_model.model.layers.16.post_feedforward_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 4615424 }, { "name": "language_model.model.layers.16.pre_feedforward_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 4616704 }, { "name": "language_model.model.layers.16.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 4617984 }, { "name": "language_model.model.layers.16.self_attn.k_proj.q_weight", "shape": [ 256, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 4618496 }, { "name": "language_model.model.layers.16.self_attn.k_proj.q_scale", "shape": [ 256, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 4700416 }, { "name": "language_model.model.layers.16.self_attn.o_proj.q_weight", "shape": [ 640, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 4710656 }, { "name": "language_model.model.layers.16.self_attn.o_proj.q_scale", "shape": [ 640, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 5038336 }, { "name": "language_model.model.layers.16.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 5079296 }, { "name": "language_model.model.layers.16.self_attn.q_proj.q_weight", "shape": [ 1024, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 5079808 }, { "name": "language_model.model.layers.16.self_attn.q_proj.q_scale", "shape": [ 1024, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 5407488 }, { "name": "language_model.model.layers.16.self_attn.v_proj.q_weight", "shape": [ 256, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 5448448 }, { "name": "language_model.model.layers.16.self_attn.v_proj.q_scale", "shape": [ 256, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 5530368 }, { "name": "language_model.model.layers.17.input_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 5540608 }, { "name": "language_model.model.layers.17.mlp.down_proj.q_weight", "shape": [ 640, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 5541888 }, { "name": "language_model.model.layers.17.mlp.down_proj.q_scale", "shape": [ 640, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 6197248 }, { "name": "language_model.model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 4096, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 6279168 }, { "name": "language_model.model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 7589888 }, { "name": "language_model.model.layers.17.post_attention_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 7753728 }, { "name": "language_model.model.layers.17.post_feedforward_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 7755008 }, { "name": "language_model.model.layers.17.pre_feedforward_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 7756288 }, { "name": "language_model.model.layers.17.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 7757568 }, { "name": "language_model.model.layers.17.self_attn.k_proj.q_weight", "shape": [ 256, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 7758080 }, { "name": "language_model.model.layers.17.self_attn.k_proj.q_scale", "shape": [ 256, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 7840000 }, { "name": "language_model.model.layers.17.self_attn.o_proj.q_weight", "shape": [ 640, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 7850240 }, { "name": "language_model.model.layers.17.self_attn.o_proj.q_scale", "shape": [ 640, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 8177920 }, { "name": "language_model.model.layers.17.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 8218880 }, { "name": "language_model.model.layers.17.self_attn.q_proj.q_weight", "shape": [ 1024, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 8219392 }, { "name": "language_model.model.layers.17.self_attn.q_proj.q_scale", "shape": [ 1024, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 8547072 }, { "name": "language_model.model.layers.17.self_attn.v_proj.q_weight", "shape": [ 256, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 8588032 }, { "name": "language_model.model.layers.17.self_attn.v_proj.q_scale", "shape": [ 256, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8669952 }, { "name": "language_model.model.layers.2.input_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 8680192 }, { "name": "language_model.model.layers.2.mlp.down_proj.q_weight", "shape": [ 640, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 8681472 }, { "name": "language_model.model.layers.2.mlp.down_proj.q_scale", "shape": [ 640, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 9336832 }, { "name": "language_model.model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 4096, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 9418752 }, { "name": "language_model.model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 10729472 }, { "name": "language_model.model.layers.2.post_attention_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 10893312 }, { "name": "language_model.model.layers.2.post_feedforward_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 10894592 }, { "name": "language_model.model.layers.2.pre_feedforward_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 10895872 }, { "name": "language_model.model.layers.2.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 10897152 }, { "name": "language_model.model.layers.2.self_attn.k_proj.q_weight", "shape": [ 256, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 10897664 }, { "name": "language_model.model.layers.2.self_attn.k_proj.q_scale", "shape": [ 256, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 10979584 }, { "name": "language_model.model.layers.2.self_attn.o_proj.q_weight", "shape": [ 640, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 10989824 }, { "name": "language_model.model.layers.2.self_attn.o_proj.q_scale", "shape": [ 640, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 11317504 }, { "name": "language_model.model.layers.2.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 11358464 }, { "name": "language_model.model.layers.2.self_attn.q_proj.q_weight", "shape": [ 1024, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 11358976 }, { "name": "language_model.model.layers.2.self_attn.q_proj.q_scale", "shape": [ 1024, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 11686656 }, { "name": "language_model.model.layers.2.self_attn.v_proj.q_weight", "shape": [ 256, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 11727616 }, { "name": "language_model.model.layers.2.self_attn.v_proj.q_scale", "shape": [ 256, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11809536 }, { "name": "language_model.model.layers.3.input_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 11819776 }, { "name": "language_model.model.layers.3.mlp.down_proj.q_weight", "shape": [ 640, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 11821056 }, { "name": "language_model.model.layers.3.mlp.down_proj.q_scale", "shape": [ 640, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 12476416 }, { "name": "language_model.model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 4096, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 12558336 }, { "name": "language_model.model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 13869056 }, { "name": "language_model.model.layers.3.post_attention_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 14032896 }, { "name": "language_model.model.layers.3.post_feedforward_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 14034176 }, { "name": "language_model.model.layers.3.pre_feedforward_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 14035456 }, { "name": "language_model.model.layers.3.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 14036736 }, { "name": "language_model.model.layers.3.self_attn.k_proj.q_weight", "shape": [ 256, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 14037248 }, { "name": "language_model.model.layers.3.self_attn.k_proj.q_scale", "shape": [ 256, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14119168 }, { "name": "language_model.model.layers.3.self_attn.o_proj.q_weight", "shape": [ 640, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 14129408 }, { "name": "language_model.model.layers.3.self_attn.o_proj.q_scale", "shape": [ 640, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 14457088 }, { "name": "language_model.model.layers.3.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 14498048 }, { "name": "language_model.model.layers.3.self_attn.q_proj.q_weight", "shape": [ 1024, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 14498560 }, { "name": "language_model.model.layers.3.self_attn.q_proj.q_scale", "shape": [ 1024, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 14826240 }, { "name": "language_model.model.layers.3.self_attn.v_proj.q_weight", "shape": [ 256, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 14867200 }, { "name": "language_model.model.layers.3.self_attn.v_proj.q_scale", "shape": [ 256, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14949120 }, { "name": "language_model.model.layers.4.input_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 14959360 }, { "name": "language_model.model.layers.4.mlp.down_proj.q_weight", "shape": [ 640, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 14960640 }, { "name": "language_model.model.layers.4.mlp.down_proj.q_scale", "shape": [ 640, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 15616000 }, { "name": "language_model.model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 4096, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 15697920 }, { "name": "language_model.model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 17008640 }, { "name": "language_model.model.layers.4.post_attention_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 17172480 }, { "name": "language_model.model.layers.4.post_feedforward_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 17173760 }, { "name": "language_model.model.layers.4.pre_feedforward_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 17175040 }, { "name": "language_model.model.layers.4.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 17176320 }, { "name": "language_model.model.layers.4.self_attn.k_proj.q_weight", "shape": [ 256, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 17176832 }, { "name": "language_model.model.layers.4.self_attn.k_proj.q_scale", "shape": [ 256, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17258752 }, { "name": "language_model.model.layers.4.self_attn.o_proj.q_weight", "shape": [ 640, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 17268992 }, { "name": "language_model.model.layers.4.self_attn.o_proj.q_scale", "shape": [ 640, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 17596672 }, { "name": "language_model.model.layers.4.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 17637632 }, { "name": "language_model.model.layers.4.self_attn.q_proj.q_weight", "shape": [ 1024, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 17638144 }, { "name": "language_model.model.layers.4.self_attn.q_proj.q_scale", "shape": [ 1024, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 17965824 }, { "name": "language_model.model.layers.4.self_attn.v_proj.q_weight", "shape": [ 256, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 18006784 }, { "name": "language_model.model.layers.4.self_attn.v_proj.q_scale", "shape": [ 256, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 18088704 }, { "name": "language_model.model.layers.5.input_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 18098944 }, { "name": "language_model.model.layers.5.mlp.down_proj.q_weight", "shape": [ 640, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 18100224 }, { "name": "language_model.model.layers.5.mlp.down_proj.q_scale", "shape": [ 640, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 18755584 }, { "name": "language_model.model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 4096, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18837504 }, { "name": "language_model.model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 20148224 }, { "name": "language_model.model.layers.5.post_attention_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 20312064 }, { "name": "language_model.model.layers.5.post_feedforward_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 20313344 }, { "name": "language_model.model.layers.5.pre_feedforward_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 20314624 }, { "name": "language_model.model.layers.5.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 20315904 }, { "name": "language_model.model.layers.5.self_attn.k_proj.q_weight", "shape": [ 256, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 20316416 }, { "name": "language_model.model.layers.5.self_attn.k_proj.q_scale", "shape": [ 256, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20398336 }, { "name": "language_model.model.layers.5.self_attn.o_proj.q_weight", "shape": [ 640, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 20408576 }, { "name": "language_model.model.layers.5.self_attn.o_proj.q_scale", "shape": [ 640, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 20736256 }, { "name": "language_model.model.layers.5.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 20777216 }, { "name": "language_model.model.layers.5.self_attn.q_proj.q_weight", "shape": [ 1024, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 20777728 }, { "name": "language_model.model.layers.5.self_attn.q_proj.q_scale", "shape": [ 1024, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 21105408 }, { "name": "language_model.model.layers.5.self_attn.v_proj.q_weight", "shape": [ 256, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 21146368 }, { "name": "language_model.model.layers.5.self_attn.v_proj.q_scale", "shape": [ 256, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 21228288 }, { "name": "language_model.model.layers.6.input_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 21238528 }, { "name": "language_model.model.layers.6.mlp.down_proj.q_weight", "shape": [ 640, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 21239808 }, { "name": "language_model.model.layers.6.mlp.down_proj.q_scale", "shape": [ 640, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 21895168 }, { "name": "language_model.model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 4096, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 21977088 }, { "name": "language_model.model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 23287808 }, { "name": "language_model.model.layers.6.post_attention_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 23451648 }, { "name": "language_model.model.layers.6.post_feedforward_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 23452928 }, { "name": "language_model.model.layers.6.pre_feedforward_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 23454208 }, { "name": "language_model.model.layers.6.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 23455488 }, { "name": "language_model.model.layers.6.self_attn.k_proj.q_weight", "shape": [ 256, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 23456000 }, { "name": "language_model.model.layers.6.self_attn.k_proj.q_scale", "shape": [ 256, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23537920 }, { "name": "language_model.model.layers.6.self_attn.o_proj.q_weight", "shape": [ 640, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 23548160 }, { "name": "language_model.model.layers.6.self_attn.o_proj.q_scale", "shape": [ 640, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 23875840 }, { "name": "language_model.model.layers.6.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 23916800 }, { "name": "language_model.model.layers.6.self_attn.q_proj.q_weight", "shape": [ 1024, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 23917312 }, { "name": "language_model.model.layers.6.self_attn.q_proj.q_scale", "shape": [ 1024, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 24244992 }, { "name": "language_model.model.layers.6.self_attn.v_proj.q_weight", "shape": [ 256, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 24285952 }, { "name": "language_model.model.layers.6.self_attn.v_proj.q_scale", "shape": [ 256, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 24367872 }, { "name": "language_model.model.layers.7.input_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 24378112 }, { "name": "language_model.model.layers.7.mlp.down_proj.q_weight", "shape": [ 640, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 24379392 }, { "name": "language_model.model.layers.7.mlp.down_proj.q_scale", "shape": [ 640, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 25034752 }, { "name": "language_model.model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 4096, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25116672 }, { "name": "language_model.model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 26427392 }, { "name": "language_model.model.layers.7.post_attention_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 26591232 }, { "name": "language_model.model.layers.7.post_feedforward_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 26592512 }, { "name": "language_model.model.layers.7.pre_feedforward_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 26593792 }, { "name": "language_model.model.layers.7.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 26595072 }, { "name": "language_model.model.layers.7.self_attn.k_proj.q_weight", "shape": [ 256, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 26595584 }, { "name": "language_model.model.layers.7.self_attn.k_proj.q_scale", "shape": [ 256, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26677504 }, { "name": "language_model.model.layers.7.self_attn.o_proj.q_weight", "shape": [ 640, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 26687744 }, { "name": "language_model.model.layers.7.self_attn.o_proj.q_scale", "shape": [ 640, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 27015424 }, { "name": "language_model.model.layers.7.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 27056384 }, { "name": "language_model.model.layers.7.self_attn.q_proj.q_weight", "shape": [ 1024, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 27056896 }, { "name": "language_model.model.layers.7.self_attn.q_proj.q_scale", "shape": [ 1024, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 27384576 }, { "name": "language_model.model.layers.7.self_attn.v_proj.q_weight", "shape": [ 256, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 27425536 }, { "name": "language_model.model.layers.7.self_attn.v_proj.q_scale", "shape": [ 256, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 27507456 }, { "name": "language_model.model.layers.8.input_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 27517696 }, { "name": "language_model.model.layers.8.mlp.down_proj.q_weight", "shape": [ 640, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 27518976 }, { "name": "language_model.model.layers.8.mlp.down_proj.q_scale", "shape": [ 640, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 28174336 }, { "name": "language_model.model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 4096, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28256256 }, { "name": "language_model.model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 29566976 }, { "name": "language_model.model.layers.8.post_attention_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 29730816 }, { "name": "language_model.model.layers.8.post_feedforward_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 29732096 }, { "name": "language_model.model.layers.8.pre_feedforward_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 29733376 }, { "name": "language_model.model.layers.8.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 29734656 }, { "name": "language_model.model.layers.8.self_attn.k_proj.q_weight", "shape": [ 256, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 29735168 }, { "name": "language_model.model.layers.8.self_attn.k_proj.q_scale", "shape": [ 256, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29817088 }, { "name": "language_model.model.layers.8.self_attn.o_proj.q_weight", "shape": [ 640, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 29827328 }, { "name": "language_model.model.layers.8.self_attn.o_proj.q_scale", "shape": [ 640, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 30155008 }, { "name": "language_model.model.layers.8.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 30195968 }, { "name": "language_model.model.layers.8.self_attn.q_proj.q_weight", "shape": [ 1024, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 30196480 }, { "name": "language_model.model.layers.8.self_attn.q_proj.q_scale", "shape": [ 1024, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 30524160 }, { "name": "language_model.model.layers.8.self_attn.v_proj.q_weight", "shape": [ 256, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 30565120 }, { "name": "language_model.model.layers.8.self_attn.v_proj.q_scale", "shape": [ 256, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 30647040 }, { "name": "language_model.model.layers.9.input_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 30657280 }, { "name": "language_model.model.layers.9.mlp.down_proj.q_weight", "shape": [ 640, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 30658560 }, { "name": "language_model.model.layers.9.mlp.down_proj.q_scale", "shape": [ 640, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 31313920 }, { "name": "language_model.model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 4096, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 31395840 }, { "name": "language_model.model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 32706560 }, { "name": "language_model.model.layers.9.post_attention_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 32870400 }, { "name": "language_model.model.layers.9.post_feedforward_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 32871680 }, { "name": "language_model.model.layers.9.pre_feedforward_layernorm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 32872960 }, { "name": "language_model.model.layers.9.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 32874240 }, { "name": "language_model.model.layers.9.self_attn.k_proj.q_weight", "shape": [ 256, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 32874752 }, { "name": "language_model.model.layers.9.self_attn.k_proj.q_scale", "shape": [ 256, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32956672 }, { "name": "language_model.model.layers.9.self_attn.o_proj.q_weight", "shape": [ 640, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 32966912 }, { "name": "language_model.model.layers.9.self_attn.o_proj.q_scale", "shape": [ 640, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 33294592 }, { "name": "language_model.model.layers.9.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 33335552 } ], "md5sum": "771db05b4d379d652f02a88b4fbb428c" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 462080, "records": [ { "name": "language_model.model.layers.9.self_attn.q_proj.q_weight", "shape": [ 1024, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 0 }, { "name": "language_model.model.layers.9.self_attn.q_proj.q_scale", "shape": [ 1024, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 327680 }, { "name": "language_model.model.layers.9.self_attn.v_proj.q_weight", "shape": [ 256, 80 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 81920, "byteOffset": 368640 }, { "name": "language_model.model.layers.9.self_attn.v_proj.q_scale", "shape": [ 256, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 450560 }, { "name": "language_model.model.norm.weight", "shape": [ 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1280, "byteOffset": 460800 } ], "md5sum": "f8a02319ec8a543fee87e9f79ca6aeee" } ] }