{ "metadata": { "ParamSize": 798, "ParamBytes": 10525479936.0, "BitsPerParam": 16.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 652216320, "records": [ { "name": "lm_head.weight", "shape": [ 90990, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 652216320, "byteOffset": 0 } ], "md5sum": "45dace62e034cc8473b2f8e75339225e" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.27.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "4c35db9498023391336510e6c4acf76a" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 652216320, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 90990, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 652216320, "byteOffset": 0 } ], "md5sum": "7a06137040c1298651249b279be4dd36" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.0.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "4aec5036d1adc693f2900ba7e0553ba6" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 41108480, "records": [ { "name": "model.layers.0.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 18944, 1085 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 41108480, "byteOffset": 0 } ], "md5sum": "dcf9f5a93b5072ab7c4004a280fb8dfd" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 25688064, "records": [ { "name": "model.layers.0.mlp.up_proj.ALinear_no_train.weight", "shape": [ 18944, 678 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25688064, "byteOffset": 0 } ], "md5sum": "e649460cc0dce06626bb40c6b18c7b9e" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 29437952, "records": [ { "name": "model.layers.27.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 0 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 7168 }, { "name": "model.norm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14336 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 21504 }, { "name": "model.layers.0.mlp.gate_proj.ALinear_train.weight", "shape": [ 18944, 120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4546560, "byteOffset": 28672 }, { "name": "model.layers.0.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 1085, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7777280, "byteOffset": 4575232 }, { "name": "model.layers.0.mlp.gate_proj.BLinear_train.weight", "shape": [ 120, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 860160, "byteOffset": 12352512 }, { "name": "model.layers.0.mlp.up_proj.ALinear_train.weight", "shape": [ 18944, 75 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2841600, "byteOffset": 13212672 }, { "name": "model.layers.0.mlp.up_proj.BLinear_no_train.weight", "shape": [ 678, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4859904, "byteOffset": 16054272 }, { "name": "model.layers.0.mlp.up_proj.BLinear_train.weight", "shape": [ 75, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 537600, "byteOffset": 20914176 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 21451776 }, { "name": "model.layers.0.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 512, 101 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 103424, "byteOffset": 21458944 }, { "name": "model.layers.0.self_attn.k_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 21562368 }, { "name": "model.layers.0.self_attn.k_proj.ALinear_train.weight", "shape": [ 512, 11 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11264, "byteOffset": 21563392 }, { "name": "model.layers.0.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 101, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 723968, "byteOffset": 21574656 }, { "name": "model.layers.0.self_attn.k_proj.BLinear_train.weight", "shape": [ 11, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 78848, "byteOffset": 22298624 }, { "name": "model.layers.0.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 3584, 887 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6358016, "byteOffset": 22377472 }, { "name": "model.layers.0.self_attn.o_proj.ALinear_train.weight", "shape": [ 3584, 98 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 702464, "byteOffset": 28735488 } ], "md5sum": "b30e9ef2b57d60dc4a7efe6b74db611e" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 87331840, "records": [ { "name": "model.layers.1.mlp.down_proj.BLinear_no_train.weight", "shape": [ 2305, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 87331840, "byteOffset": 0 } ], "md5sum": "f3459f3e20735666c36adae907916efa" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 31112192, "records": [ { "name": "model.layers.0.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 887, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6358016, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.BLinear_train.weight", "shape": [ 98, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 702464, "byteOffset": 6358016 }, { "name": "model.layers.0.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 3584, 162 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 7060480 }, { "name": "model.layers.0.self_attn.q_proj.ALinear_train.bias", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 8221696 }, { "name": "model.layers.0.self_attn.q_proj.ALinear_train.weight", "shape": [ 3584, 17 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 8228864 }, { "name": "model.layers.0.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 162, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 8350720 }, { "name": "model.layers.0.self_attn.q_proj.BLinear_train.weight", "shape": [ 17, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 9511936 }, { "name": "model.layers.0.self_attn.v_proj.ALinear_no_train.weight", "shape": [ 512, 342 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 350208, "byteOffset": 9633792 }, { "name": "model.layers.0.self_attn.v_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 9984000 }, { "name": "model.layers.0.self_attn.v_proj.ALinear_train.weight", "shape": [ 512, 38 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 38912, "byteOffset": 9985024 }, { "name": "model.layers.0.self_attn.v_proj.BLinear_no_train.weight", "shape": [ 342, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2451456, "byteOffset": 10023936 }, { "name": "model.layers.0.self_attn.v_proj.BLinear_train.weight", "shape": [ 38, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 272384, "byteOffset": 12475392 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 12747776 }, { "name": "model.layers.1.mlp.down_proj.ALinear_no_train.weight", "shape": [ 3584, 2305 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16522240, "byteOffset": 12754944 }, { "name": "model.layers.1.mlp.down_proj.ALinear_train.weight", "shape": [ 3584, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 29277184 } ], "md5sum": "8e12a44abb1b09e903bbc5e4109a51f7" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 56528896, "records": [ { "name": "model.layers.1.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 18944, 1492 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56528896, "byteOffset": 0 } ], "md5sum": "3cc7818ff9c296b7c8357a93e68d0468" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 25688064, "records": [ { "name": "model.layers.1.mlp.up_proj.ALinear_no_train.weight", "shape": [ 18944, 678 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25688064, "byteOffset": 0 } ], "md5sum": "a0c54a37f6ccbe89546b72cd44685752" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 30669824, "records": [ { "name": "model.layers.1.mlp.down_proj.BLinear_train.weight", "shape": [ 256, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9699328, "byteOffset": 0 }, { "name": "model.layers.1.mlp.gate_proj.ALinear_train.weight", "shape": [ 18944, 165 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6251520, "byteOffset": 9699328 }, { "name": "model.layers.1.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 1492, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10694656, "byteOffset": 15950848 }, { "name": "model.layers.1.mlp.gate_proj.BLinear_train.weight", "shape": [ 165, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1182720, "byteOffset": 26645504 }, { "name": "model.layers.1.mlp.up_proj.ALinear_train.weight", "shape": [ 18944, 75 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2841600, "byteOffset": 27828224 } ], "md5sum": "308688ab2696237bfe3b1242a2f01d67" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.10.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "3edc4bbe39446bc14cb978905446605c" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 71949312, "records": [ { "name": "model.layers.10.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 18944, 1899 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 71949312, "byteOffset": 0 } ], "md5sum": "c27e50b3a1622c214fc9a5ff97e72d4e" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 32448512, "records": [ { "name": "model.layers.1.mlp.up_proj.BLinear_no_train.weight", "shape": [ 678, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4859904, "byteOffset": 0 }, { "name": "model.layers.1.mlp.up_proj.BLinear_train.weight", "shape": [ 75, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 537600, "byteOffset": 4859904 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 5397504 }, { "name": "model.layers.1.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 512, 41 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 41984, "byteOffset": 5404672 }, { "name": "model.layers.1.self_attn.k_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 5446656 }, { "name": "model.layers.1.self_attn.k_proj.ALinear_train.weight", "shape": [ 512, 4 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 5447680 }, { "name": "model.layers.1.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 41, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 293888, "byteOffset": 5451776 }, { "name": "model.layers.1.self_attn.k_proj.BLinear_train.weight", "shape": [ 4, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 28672, "byteOffset": 5745664 }, { "name": "model.layers.1.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 3584, 887 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6358016, "byteOffset": 5774336 }, { "name": "model.layers.1.self_attn.o_proj.ALinear_train.weight", "shape": [ 3584, 98 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 702464, "byteOffset": 12132352 }, { "name": "model.layers.1.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 887, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6358016, "byteOffset": 12834816 }, { "name": "model.layers.1.self_attn.o_proj.BLinear_train.weight", "shape": [ 98, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 702464, "byteOffset": 19192832 }, { "name": "model.layers.1.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 3584, 162 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 19895296 }, { "name": "model.layers.1.self_attn.q_proj.ALinear_train.bias", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 21056512 }, { "name": "model.layers.1.self_attn.q_proj.ALinear_train.weight", "shape": [ 3584, 17 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 21063680 }, { "name": "model.layers.1.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 162, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 21185536 }, { "name": "model.layers.1.self_attn.q_proj.BLinear_train.weight", "shape": [ 17, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 22346752 }, { "name": "model.layers.1.self_attn.v_proj.ALinear_no_train.weight", "shape": [ 512, 222 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 227328, "byteOffset": 22468608 }, { "name": "model.layers.1.self_attn.v_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 22695936 }, { "name": "model.layers.1.self_attn.v_proj.ALinear_train.weight", "shape": [ 512, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 22696960 }, { "name": "model.layers.1.self_attn.v_proj.BLinear_no_train.weight", "shape": [ 222, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1591296, "byteOffset": 22721536 }, { "name": "model.layers.1.self_attn.v_proj.BLinear_train.weight", "shape": [ 24, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 172032, "byteOffset": 24312832 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 24484864 }, { "name": "model.layers.10.mlp.gate_proj.ALinear_train.weight", "shape": [ 18944, 210 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7956480, "byteOffset": 24492032 } ], "md5sum": "3b5fc2133afa41a794da3fb346fc906d" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 56528896, "records": [ { "name": "model.layers.10.mlp.up_proj.ALinear_no_train.weight", "shape": [ 18944, 1492 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56528896, "byteOffset": 0 } ], "md5sum": "e25df63909ccf70cf2f4c2bed727bfec" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 33369088, "records": [ { "name": "model.layers.10.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 1899, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13612032, "byteOffset": 0 }, { "name": "model.layers.10.mlp.gate_proj.BLinear_train.weight", "shape": [ 210, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1505280, "byteOffset": 13612032 }, { "name": "model.layers.10.mlp.up_proj.ALinear_train.weight", "shape": [ 18944, 165 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6251520, "byteOffset": 15117312 }, { "name": "model.layers.10.mlp.up_proj.BLinear_no_train.weight", "shape": [ 1492, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10694656, "byteOffset": 21368832 }, { "name": "model.layers.10.mlp.up_proj.BLinear_train.weight", "shape": [ 165, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1182720, "byteOffset": 32063488 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33246208 }, { "name": "model.layers.10.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 512, 101 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 103424, "byteOffset": 33253376 }, { "name": "model.layers.10.self_attn.k_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 33356800 }, { "name": "model.layers.10.self_attn.k_proj.ALinear_train.weight", "shape": [ 512, 11 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11264, "byteOffset": 33357824 } ], "md5sum": "4ad5b9006d3dff5b264c33c4e951d02a" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 56528896, "records": [ { "name": "model.layers.11.mlp.down_proj.BLinear_no_train.weight", "shape": [ 1492, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56528896, "byteOffset": 0 } ], "md5sum": "d67ba13d3f8e497df4cf839d28df17b8" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 29196288, "records": [ { "name": "model.layers.10.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 101, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 723968, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.k_proj.BLinear_train.weight", "shape": [ 11, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 78848, "byteOffset": 723968 }, { "name": "model.layers.10.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 3584, 645 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4623360, "byteOffset": 802816 }, { "name": "model.layers.10.self_attn.o_proj.ALinear_train.weight", "shape": [ 3584, 71 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 508928, "byteOffset": 5426176 }, { "name": "model.layers.10.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 645, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4623360, "byteOffset": 5935104 }, { "name": "model.layers.10.self_attn.o_proj.BLinear_train.weight", "shape": [ 71, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 508928, "byteOffset": 10558464 }, { "name": "model.layers.10.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 3584, 162 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 11067392 }, { "name": "model.layers.10.self_attn.q_proj.ALinear_train.bias", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 12228608 }, { "name": "model.layers.10.self_attn.q_proj.ALinear_train.weight", "shape": [ 3584, 17 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 12235776 }, { "name": "model.layers.10.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 162, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 12357632 }, { "name": "model.layers.10.self_attn.q_proj.BLinear_train.weight", "shape": [ 17, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 13518848 }, { "name": "model.layers.10.self_attn.v_proj.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 13640704 }, { "name": "model.layers.10.self_attn.v_proj.weight", "shape": [ 512, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 13641728 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 17311744 }, { "name": "model.layers.11.mlp.down_proj.ALinear_no_train.weight", "shape": [ 3584, 1492 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10694656, "byteOffset": 17318912 }, { "name": "model.layers.11.mlp.down_proj.ALinear_train.weight", "shape": [ 3584, 165 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1182720, "byteOffset": 28013568 } ], "md5sum": "04c01cf0e5b4ef0e15e380376764b09d" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 71949312, "records": [ { "name": "model.layers.11.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 18944, 1899 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 71949312, "byteOffset": 0 } ], "md5sum": "a2227318053e8740dbb7505b8018f73d" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.11.mlp.up_proj.weight", "shape": [ 18944, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "338e2a7a6229f75979b863057c9c9705" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 30251008, "records": [ { "name": "model.layers.11.mlp.down_proj.BLinear_train.weight", "shape": [ 165, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6251520, "byteOffset": 0 }, { "name": "model.layers.11.mlp.gate_proj.ALinear_train.weight", "shape": [ 18944, 210 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7956480, "byteOffset": 6251520 }, { "name": "model.layers.11.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 1899, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13612032, "byteOffset": 14208000 }, { "name": "model.layers.11.mlp.gate_proj.BLinear_train.weight", "shape": [ 210, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1505280, "byteOffset": 27820032 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29325312 }, { "name": "model.layers.11.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 512, 101 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 103424, "byteOffset": 29332480 }, { "name": "model.layers.11.self_attn.k_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 29435904 }, { "name": "model.layers.11.self_attn.k_proj.ALinear_train.weight", "shape": [ 512, 11 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11264, "byteOffset": 29436928 }, { "name": "model.layers.11.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 101, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 723968, "byteOffset": 29448192 }, { "name": "model.layers.11.self_attn.k_proj.BLinear_train.weight", "shape": [ 11, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 78848, "byteOffset": 30172160 } ], "md5sum": "f3cd757b3203bba6a2b517a1e355d7d6" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.12.mlp.gate_proj.weight", "shape": [ 18944, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "1a46d0badd266a89fcba6190b132f163" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.2.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "8a56bfee6f0066a05d99965f24eb9dc2" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 71949312, "records": [ { "name": "model.layers.2.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 18944, 1899 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 71949312, "byteOffset": 0 } ], "md5sum": "76e76b88a1dc9f7fc1017387141aa875" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 31717376, "records": [ { "name": "model.layers.11.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 3584, 645 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4623360, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.ALinear_train.weight", "shape": [ 3584, 71 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 508928, "byteOffset": 4623360 }, { "name": "model.layers.11.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 645, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4623360, "byteOffset": 5132288 }, { "name": "model.layers.11.self_attn.o_proj.BLinear_train.weight", "shape": [ 71, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 508928, "byteOffset": 9755648 }, { "name": "model.layers.11.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 3584, 403 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2888704, "byteOffset": 10264576 }, { "name": "model.layers.11.self_attn.q_proj.ALinear_train.bias", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 13153280 }, { "name": "model.layers.11.self_attn.q_proj.ALinear_train.weight", "shape": [ 3584, 44 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 315392, "byteOffset": 13160448 }, { "name": "model.layers.11.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 403, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2888704, "byteOffset": 13475840 }, { "name": "model.layers.11.self_attn.q_proj.BLinear_train.weight", "shape": [ 44, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 315392, "byteOffset": 16364544 }, { "name": "model.layers.11.self_attn.v_proj.ALinear_no_train.weight", "shape": [ 512, 222 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 227328, "byteOffset": 16679936 }, { "name": "model.layers.11.self_attn.v_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 16907264 }, { "name": "model.layers.11.self_attn.v_proj.ALinear_train.weight", "shape": [ 512, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 16908288 }, { "name": "model.layers.11.self_attn.v_proj.BLinear_no_train.weight", "shape": [ 222, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1591296, "byteOffset": 16932864 }, { "name": "model.layers.11.self_attn.v_proj.BLinear_train.weight", "shape": [ 24, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 172032, "byteOffset": 18524160 }, { "name": "model.layers.12.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 512, 101 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 103424, "byteOffset": 18696192 }, { "name": "model.layers.12.self_attn.k_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 18799616 }, { "name": "model.layers.12.self_attn.k_proj.ALinear_train.weight", "shape": [ 512, 11 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11264, "byteOffset": 18800640 }, { "name": "model.layers.12.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 101, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 723968, "byteOffset": 18811904 }, { "name": "model.layers.12.self_attn.k_proj.BLinear_train.weight", "shape": [ 11, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 78848, "byteOffset": 19535872 }, { "name": "model.layers.12.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 3584, 403 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2888704, "byteOffset": 19614720 }, { "name": "model.layers.12.self_attn.o_proj.ALinear_train.weight", "shape": [ 3584, 44 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 315392, "byteOffset": 22503424 }, { "name": "model.layers.12.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 403, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2888704, "byteOffset": 22818816 }, { "name": "model.layers.12.self_attn.o_proj.BLinear_train.weight", "shape": [ 44, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 315392, "byteOffset": 25707520 }, { "name": "model.layers.12.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 3584, 162 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 26022912 }, { "name": "model.layers.12.self_attn.q_proj.ALinear_train.bias", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 27184128 }, { "name": "model.layers.12.self_attn.q_proj.ALinear_train.weight", "shape": [ 3584, 17 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 27191296 }, { "name": "model.layers.12.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 162, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 27313152 }, { "name": "model.layers.12.self_attn.q_proj.BLinear_train.weight", "shape": [ 17, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 28474368 }, { "name": "model.layers.12.self_attn.v_proj.ALinear_no_train.weight", "shape": [ 512, 342 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 350208, "byteOffset": 28596224 }, { "name": "model.layers.12.self_attn.v_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 28946432 }, { "name": "model.layers.12.self_attn.v_proj.ALinear_train.weight", "shape": [ 512, 38 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 38912, "byteOffset": 28947456 }, { "name": "model.layers.12.self_attn.v_proj.BLinear_no_train.weight", "shape": [ 342, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2451456, "byteOffset": 28986368 }, { "name": "model.layers.12.self_attn.v_proj.BLinear_train.weight", "shape": [ 38, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 272384, "byteOffset": 31437824 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 31710208 } ], "md5sum": "e6cfa439c6833a45173cf314b9eb6097" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 41108480, "records": [ { "name": "model.layers.2.mlp.up_proj.ALinear_no_train.weight", "shape": [ 18944, 1085 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 41108480, "byteOffset": 0 } ], "md5sum": "768dfde3ccf5be32cdc1c45d6b75a724" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 27620352, "records": [ { "name": "model.layers.2.mlp.gate_proj.ALinear_train.weight", "shape": [ 18944, 210 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7956480, "byteOffset": 0 }, { "name": "model.layers.2.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 1899, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13612032, "byteOffset": 7956480 }, { "name": "model.layers.2.mlp.gate_proj.BLinear_train.weight", "shape": [ 210, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1505280, "byteOffset": 21568512 }, { "name": "model.layers.2.mlp.up_proj.ALinear_train.weight", "shape": [ 18944, 120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4546560, "byteOffset": 23073792 } ], "md5sum": "637daa71809af4487de6a4a988c91900" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.3.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "ffb2d665cdb80794d2142deb409f1c28" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 71949312, "records": [ { "name": "model.layers.3.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 18944, 1899 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 71949312, "byteOffset": 0 } ], "md5sum": "5a25345b782908f270b7a2b57b26ec56" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 32380928, "records": [ { "name": "model.layers.2.mlp.up_proj.BLinear_no_train.weight", "shape": [ 1085, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7777280, "byteOffset": 0 }, { "name": "model.layers.2.mlp.up_proj.BLinear_train.weight", "shape": [ 120, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 860160, "byteOffset": 7777280 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 8637440 }, { "name": "model.layers.2.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 512, 101 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 103424, "byteOffset": 8644608 }, { "name": "model.layers.2.self_attn.k_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 8748032 }, { "name": "model.layers.2.self_attn.k_proj.ALinear_train.weight", "shape": [ 512, 11 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11264, "byteOffset": 8749056 }, { "name": "model.layers.2.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 101, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 723968, "byteOffset": 8760320 }, { "name": "model.layers.2.self_attn.k_proj.BLinear_train.weight", "shape": [ 11, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 78848, "byteOffset": 9484288 }, { "name": "model.layers.2.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 3584, 645 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4623360, "byteOffset": 9563136 }, { "name": "model.layers.2.self_attn.o_proj.ALinear_train.weight", "shape": [ 3584, 71 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 508928, "byteOffset": 14186496 }, { "name": "model.layers.2.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 645, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4623360, "byteOffset": 14695424 }, { "name": "model.layers.2.self_attn.o_proj.BLinear_train.weight", "shape": [ 71, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 508928, "byteOffset": 19318784 }, { "name": "model.layers.2.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 3584, 162 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 19827712 }, { "name": "model.layers.2.self_attn.q_proj.ALinear_train.bias", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 20988928 }, { "name": "model.layers.2.self_attn.q_proj.ALinear_train.weight", "shape": [ 3584, 17 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 20996096 }, { "name": "model.layers.2.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 162, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 21117952 }, { "name": "model.layers.2.self_attn.q_proj.BLinear_train.weight", "shape": [ 17, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 22279168 }, { "name": "model.layers.2.self_attn.v_proj.ALinear_no_train.weight", "shape": [ 512, 222 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 227328, "byteOffset": 22401024 }, { "name": "model.layers.2.self_attn.v_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 22628352 }, { "name": "model.layers.2.self_attn.v_proj.ALinear_train.weight", "shape": [ 512, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 22629376 }, { "name": "model.layers.2.self_attn.v_proj.BLinear_no_train.weight", "shape": [ 222, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1591296, "byteOffset": 22653952 }, { "name": "model.layers.2.self_attn.v_proj.BLinear_train.weight", "shape": [ 24, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 172032, "byteOffset": 24245248 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 24417280 }, { "name": "model.layers.3.mlp.gate_proj.ALinear_train.weight", "shape": [ 18944, 210 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7956480, "byteOffset": 24424448 } ], "md5sum": "614a523c6ee26747f6dc0bbf3b51dda9" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 56528896, "records": [ { "name": "model.layers.3.mlp.up_proj.ALinear_no_train.weight", "shape": [ 18944, 1492 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56528896, "byteOffset": 0 } ], "md5sum": "6193680d6140be647832920914fac54f" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 33369088, "records": [ { "name": "model.layers.3.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 1899, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13612032, "byteOffset": 0 }, { "name": "model.layers.3.mlp.gate_proj.BLinear_train.weight", "shape": [ 210, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1505280, "byteOffset": 13612032 }, { "name": "model.layers.3.mlp.up_proj.ALinear_train.weight", "shape": [ 18944, 165 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6251520, "byteOffset": 15117312 }, { "name": "model.layers.3.mlp.up_proj.BLinear_no_train.weight", "shape": [ 1492, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10694656, "byteOffset": 21368832 }, { "name": "model.layers.3.mlp.up_proj.BLinear_train.weight", "shape": [ 165, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1182720, "byteOffset": 32063488 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33246208 }, { "name": "model.layers.3.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 512, 101 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 103424, "byteOffset": 33253376 }, { "name": "model.layers.3.self_attn.k_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 33356800 }, { "name": "model.layers.3.self_attn.k_proj.ALinear_train.weight", "shape": [ 512, 11 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11264, "byteOffset": 33357824 } ], "md5sum": "2631ea33daf64f8c167fdf702c7aa9fd" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.4.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "aadaf79ab81b3108281892a06cc323d7" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.4.mlp.gate_proj.weight", "shape": [ 18944, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "a8a1449a3c199611a43cd11a3a2730a1" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 71949312, "records": [ { "name": "model.layers.4.mlp.up_proj.ALinear_no_train.weight", "shape": [ 18944, 1899 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 71949312, "byteOffset": 0 } ], "md5sum": "87419050989c04a282946d9f0556f93e" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 28025856, "records": [ { "name": "model.layers.3.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 101, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 723968, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.k_proj.BLinear_train.weight", "shape": [ 11, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 78848, "byteOffset": 723968 }, { "name": "model.layers.3.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 3584, 887 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6358016, "byteOffset": 802816 }, { "name": "model.layers.3.self_attn.o_proj.ALinear_train.weight", "shape": [ 3584, 98 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 702464, "byteOffset": 7160832 }, { "name": "model.layers.3.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 887, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6358016, "byteOffset": 7863296 }, { "name": "model.layers.3.self_attn.o_proj.BLinear_train.weight", "shape": [ 98, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 702464, "byteOffset": 14221312 }, { "name": "model.layers.3.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 3584, 162 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 14923776 }, { "name": "model.layers.3.self_attn.q_proj.ALinear_train.bias", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 16084992 }, { "name": "model.layers.3.self_attn.q_proj.ALinear_train.weight", "shape": [ 3584, 17 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 16092160 }, { "name": "model.layers.3.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 162, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 16214016 }, { "name": "model.layers.3.self_attn.q_proj.BLinear_train.weight", "shape": [ 17, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 17375232 }, { "name": "model.layers.3.self_attn.v_proj.ALinear_no_train.weight", "shape": [ 512, 282 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 288768, "byteOffset": 17497088 }, { "name": "model.layers.3.self_attn.v_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 17785856 }, { "name": "model.layers.3.self_attn.v_proj.ALinear_train.weight", "shape": [ 512, 31 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 31744, "byteOffset": 17786880 }, { "name": "model.layers.3.self_attn.v_proj.BLinear_no_train.weight", "shape": [ 282, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2021376, "byteOffset": 17818624 }, { "name": "model.layers.3.self_attn.v_proj.BLinear_train.weight", "shape": [ 31, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 222208, "byteOffset": 19840000 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 20062208 }, { "name": "model.layers.4.mlp.up_proj.ALinear_train.weight", "shape": [ 18944, 210 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7956480, "byteOffset": 20069376 } ], "md5sum": "18e94eb60de3cd146de1bf515cca11ba" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 26499072, "records": [ { "name": "model.layers.4.mlp.up_proj.BLinear_no_train.weight", "shape": [ 1899, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13612032, "byteOffset": 0 }, { "name": "model.layers.4.mlp.up_proj.BLinear_train.weight", "shape": [ 210, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1505280, "byteOffset": 13612032 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 15117312 }, { "name": "model.layers.4.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 512, 101 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 103424, "byteOffset": 15124480 }, { "name": "model.layers.4.self_attn.k_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 15227904 }, { "name": "model.layers.4.self_attn.k_proj.ALinear_train.weight", "shape": [ 512, 11 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11264, "byteOffset": 15228928 }, { "name": "model.layers.4.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 101, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 723968, "byteOffset": 15240192 }, { "name": "model.layers.4.self_attn.k_proj.BLinear_train.weight", "shape": [ 11, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 78848, "byteOffset": 15964160 }, { "name": "model.layers.4.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 3584, 403 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2888704, "byteOffset": 16043008 }, { "name": "model.layers.4.self_attn.o_proj.ALinear_train.weight", "shape": [ 3584, 44 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 315392, "byteOffset": 18931712 }, { "name": "model.layers.4.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 403, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2888704, "byteOffset": 19247104 }, { "name": "model.layers.4.self_attn.o_proj.BLinear_train.weight", "shape": [ 44, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 315392, "byteOffset": 22135808 }, { "name": "model.layers.4.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 3584, 162 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 22451200 }, { "name": "model.layers.4.self_attn.q_proj.ALinear_train.bias", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 23612416 }, { "name": "model.layers.4.self_attn.q_proj.ALinear_train.weight", "shape": [ 3584, 17 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 23619584 }, { "name": "model.layers.4.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 162, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 23741440 }, { "name": "model.layers.4.self_attn.q_proj.BLinear_train.weight", "shape": [ 17, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 24902656 }, { "name": "model.layers.4.self_attn.v_proj.ALinear_no_train.weight", "shape": [ 512, 162 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 165888, "byteOffset": 25024512 }, { "name": "model.layers.4.self_attn.v_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 25190400 }, { "name": "model.layers.4.self_attn.v_proj.ALinear_train.weight", "shape": [ 512, 17 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17408, "byteOffset": 25191424 }, { "name": "model.layers.4.self_attn.v_proj.BLinear_no_train.weight", "shape": [ 162, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 25208832 }, { "name": "model.layers.4.self_attn.v_proj.BLinear_train.weight", "shape": [ 17, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 26370048 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 26491904 } ], "md5sum": "d521e2c820a5f442713b62674be2e2bf" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 71949312, "records": [ { "name": "model.layers.5.mlp.down_proj.BLinear_no_train.weight", "shape": [ 1899, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 71949312, "byteOffset": 0 } ], "md5sum": "72152a3f4a19acac3b54bf85b9178977" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.5.mlp.gate_proj.weight", "shape": [ 18944, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "f47078662ecc66acf548873bb0e5419e" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.5.mlp.up_proj.weight", "shape": [ 18944, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "d2cf67981cd004c96469befaa5ca5a29" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.6.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "6ee0c32507b1be735fcf30a0455737ee" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.6.mlp.gate_proj.weight", "shape": [ 18944, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "d393b872d6552c73dccaaff317655fea" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.6.mlp.up_proj.weight", "shape": [ 18944, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "ac3beb1713e763621c825dbdb73d954d" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 30990336, "records": [ { "name": "model.layers.5.mlp.down_proj.ALinear_no_train.weight", "shape": [ 3584, 1899 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13612032, "byteOffset": 0 }, { "name": "model.layers.5.mlp.down_proj.ALinear_train.weight", "shape": [ 3584, 210 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1505280, "byteOffset": 13612032 }, { "name": "model.layers.5.mlp.down_proj.BLinear_train.weight", "shape": [ 210, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7956480, "byteOffset": 15117312 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 23073792 }, { "name": "model.layers.5.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 512, 162 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 165888, "byteOffset": 23080960 }, { "name": "model.layers.5.self_attn.k_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 23246848 }, { "name": "model.layers.5.self_attn.k_proj.ALinear_train.weight", "shape": [ 512, 17 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17408, "byteOffset": 23247872 }, { "name": "model.layers.5.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 162, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 23265280 }, { "name": "model.layers.5.self_attn.k_proj.BLinear_train.weight", "shape": [ 17, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 24426496 }, { "name": "model.layers.5.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 3584, 162 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 24548352 }, { "name": "model.layers.5.self_attn.o_proj.ALinear_train.weight", "shape": [ 3584, 17 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 25709568 }, { "name": "model.layers.5.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 162, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 25831424 }, { "name": "model.layers.5.self_attn.o_proj.BLinear_train.weight", "shape": [ 17, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 26992640 }, { "name": "model.layers.5.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 3584, 162 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 27114496 }, { "name": "model.layers.5.self_attn.q_proj.ALinear_train.bias", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 28275712 }, { "name": "model.layers.5.self_attn.q_proj.ALinear_train.weight", "shape": [ 3584, 17 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 28282880 }, { "name": "model.layers.5.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 162, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 28404736 }, { "name": "model.layers.5.self_attn.q_proj.BLinear_train.weight", "shape": [ 17, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 29565952 }, { "name": "model.layers.5.self_attn.v_proj.ALinear_no_train.weight", "shape": [ 512, 101 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 103424, "byteOffset": 29687808 }, { "name": "model.layers.5.self_attn.v_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 29791232 }, { "name": "model.layers.5.self_attn.v_proj.ALinear_train.weight", "shape": [ 512, 11 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11264, "byteOffset": 29792256 }, { "name": "model.layers.5.self_attn.v_proj.BLinear_no_train.weight", "shape": [ 101, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 723968, "byteOffset": 29803520 }, { "name": "model.layers.5.self_attn.v_proj.BLinear_train.weight", "shape": [ 11, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 78848, "byteOffset": 30527488 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 30606336 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 30613504 }, { "name": "model.layers.6.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 512, 41 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 41984, "byteOffset": 30620672 }, { "name": "model.layers.6.self_attn.k_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 30662656 }, { "name": "model.layers.6.self_attn.k_proj.ALinear_train.weight", "shape": [ 512, 4 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30663680 }, { "name": "model.layers.6.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 41, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 293888, "byteOffset": 30667776 }, { "name": "model.layers.6.self_attn.k_proj.BLinear_train.weight", "shape": [ 4, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 28672, "byteOffset": 30961664 } ], "md5sum": "0fd4af04ca531bf98c2612bbe3100e0b" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.7.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "ec3541234cddf21f7649aeb9bcd82a1e" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 87331840, "records": [ { "name": "model.layers.7.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 18944, 2305 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 87331840, "byteOffset": 0 } ], "md5sum": "3cee90c1b383d776506867f22fd38872" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 26215424, "records": [ { "name": "model.layers.6.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 3584, 645 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4623360, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.o_proj.ALinear_train.weight", "shape": [ 3584, 71 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 508928, "byteOffset": 4623360 }, { "name": "model.layers.6.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 645, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4623360, "byteOffset": 5132288 }, { "name": "model.layers.6.self_attn.o_proj.BLinear_train.weight", "shape": [ 71, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 508928, "byteOffset": 9755648 }, { "name": "model.layers.6.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 3584, 162 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 10264576 }, { "name": "model.layers.6.self_attn.q_proj.ALinear_train.bias", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 11425792 }, { "name": "model.layers.6.self_attn.q_proj.ALinear_train.weight", "shape": [ 3584, 17 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 11432960 }, { "name": "model.layers.6.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 162, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 11554816 }, { "name": "model.layers.6.self_attn.q_proj.BLinear_train.weight", "shape": [ 17, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 12716032 }, { "name": "model.layers.6.self_attn.v_proj.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 12837888 }, { "name": "model.layers.6.self_attn.v_proj.weight", "shape": [ 512, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 12838912 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 16508928 }, { "name": "model.layers.7.mlp.gate_proj.ALinear_train.weight", "shape": [ 18944, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9699328, "byteOffset": 16516096 } ], "md5sum": "f748098f35ab51b14c45ff06733486a5" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.7.mlp.up_proj.weight", "shape": [ 18944, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "4cbf7193bdffdad34fa96b532eeff7fa" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 32855040, "records": [ { "name": "model.layers.7.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 2305, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16522240, "byteOffset": 0 }, { "name": "model.layers.7.mlp.gate_proj.BLinear_train.weight", "shape": [ 256, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 16522240 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 18357248 }, { "name": "model.layers.7.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 512, 41 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 41984, "byteOffset": 18364416 }, { "name": "model.layers.7.self_attn.k_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 18406400 }, { "name": "model.layers.7.self_attn.k_proj.ALinear_train.weight", "shape": [ 512, 4 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18407424 }, { "name": "model.layers.7.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 41, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 293888, "byteOffset": 18411520 }, { "name": "model.layers.7.self_attn.k_proj.BLinear_train.weight", "shape": [ 4, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 28672, "byteOffset": 18705408 }, { "name": "model.layers.7.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 3584, 887 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6358016, "byteOffset": 18734080 }, { "name": "model.layers.7.self_attn.o_proj.ALinear_train.weight", "shape": [ 3584, 98 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 702464, "byteOffset": 25092096 }, { "name": "model.layers.7.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 887, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6358016, "byteOffset": 25794560 }, { "name": "model.layers.7.self_attn.o_proj.BLinear_train.weight", "shape": [ 98, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 702464, "byteOffset": 32152576 } ], "md5sum": "2a8a710d52c72ebd49d456663e12beb5" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.8.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "5f3712a24a3eba2561429c478bbeb97e" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 71949312, "records": [ { "name": "model.layers.8.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 18944, 1899 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 71949312, "byteOffset": 0 } ], "md5sum": "a3411f9bb137189d2a323b56f26357a6" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 87331840, "records": [ { "name": "model.layers.8.mlp.up_proj.ALinear_no_train.weight", "shape": [ 18944, 2305 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 87331840, "byteOffset": 0 } ], "md5sum": "b8fad39a5d8db324c8607555e227991d" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 29325312, "records": [ { "name": "model.layers.7.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 3584, 162 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.q_proj.ALinear_train.bias", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 1161216 }, { "name": "model.layers.7.self_attn.q_proj.ALinear_train.weight", "shape": [ 3584, 17 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 1168384 }, { "name": "model.layers.7.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 162, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 1290240 }, { "name": "model.layers.7.self_attn.q_proj.BLinear_train.weight", "shape": [ 17, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 2451456 }, { "name": "model.layers.7.self_attn.v_proj.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 2573312 }, { "name": "model.layers.7.self_attn.v_proj.weight", "shape": [ 512, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 2574336 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6244352 }, { "name": "model.layers.8.mlp.gate_proj.ALinear_train.weight", "shape": [ 18944, 210 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7956480, "byteOffset": 6251520 }, { "name": "model.layers.8.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 1899, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13612032, "byteOffset": 14208000 }, { "name": "model.layers.8.mlp.gate_proj.BLinear_train.weight", "shape": [ 210, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1505280, "byteOffset": 27820032 } ], "md5sum": "2ddcee75ad83dc3cab615ec8b4212a54" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 29531136, "records": [ { "name": "model.layers.8.mlp.up_proj.ALinear_train.weight", "shape": [ 18944, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9699328, "byteOffset": 0 }, { "name": "model.layers.8.mlp.up_proj.BLinear_no_train.weight", "shape": [ 2305, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16522240, "byteOffset": 9699328 }, { "name": "model.layers.8.mlp.up_proj.BLinear_train.weight", "shape": [ 256, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 26221568 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 28056576 }, { "name": "model.layers.8.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 512, 162 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 165888, "byteOffset": 28063744 }, { "name": "model.layers.8.self_attn.k_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 28229632 }, { "name": "model.layers.8.self_attn.k_proj.ALinear_train.weight", "shape": [ 512, 17 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17408, "byteOffset": 28230656 }, { "name": "model.layers.8.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 162, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 28248064 }, { "name": "model.layers.8.self_attn.k_proj.BLinear_train.weight", "shape": [ 17, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 29409280 } ], "md5sum": "e9dbc556195ad85a51417b72a7b27cc0" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 32864256, "records": [ { "name": "model.layers.8.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 3584, 645 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4623360, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.o_proj.ALinear_train.weight", "shape": [ 3584, 71 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 508928, "byteOffset": 4623360 }, { "name": "model.layers.8.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 645, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4623360, "byteOffset": 5132288 }, { "name": "model.layers.8.self_attn.o_proj.BLinear_train.weight", "shape": [ 71, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 508928, "byteOffset": 9755648 }, { "name": "model.layers.8.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 3584, 403 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2888704, "byteOffset": 10264576 }, { "name": "model.layers.8.self_attn.q_proj.ALinear_train.bias", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 13153280 }, { "name": "model.layers.8.self_attn.q_proj.ALinear_train.weight", "shape": [ 3584, 44 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 315392, "byteOffset": 13160448 }, { "name": "model.layers.8.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 403, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2888704, "byteOffset": 13475840 }, { "name": "model.layers.8.self_attn.q_proj.BLinear_train.weight", "shape": [ 44, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 315392, "byteOffset": 16364544 }, { "name": "model.layers.8.self_attn.v_proj.ALinear_no_train.weight", "shape": [ 512, 282 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 288768, "byteOffset": 16679936 }, { "name": "model.layers.8.self_attn.v_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 16968704 }, { "name": "model.layers.8.self_attn.v_proj.ALinear_train.weight", "shape": [ 512, 31 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 31744, "byteOffset": 16969728 }, { "name": "model.layers.8.self_attn.v_proj.BLinear_no_train.weight", "shape": [ 282, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2021376, "byteOffset": 17001472 }, { "name": "model.layers.8.self_attn.v_proj.BLinear_train.weight", "shape": [ 31, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 222208, "byteOffset": 19022848 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 19245056 }, { "name": "model.layers.9.mlp.down_proj.ALinear_no_train.weight", "shape": [ 3584, 1899 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13612032, "byteOffset": 19252224 } ], "md5sum": "cba28beaea39559e2fadcb0513d21d3c" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 71949312, "records": [ { "name": "model.layers.9.mlp.down_proj.BLinear_no_train.weight", "shape": [ 1899, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 71949312, "byteOffset": 0 } ], "md5sum": "e058c578448276420182cf92539ab96a" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 87331840, "records": [ { "name": "model.layers.9.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 18944, 2305 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 87331840, "byteOffset": 0 } ], "md5sum": "6d84449ee3b12643109888fea7e05d5d" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 19161088, "records": [ { "name": "model.layers.9.mlp.down_proj.ALinear_train.weight", "shape": [ 3584, 210 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1505280, "byteOffset": 0 }, { "name": "model.layers.9.mlp.down_proj.BLinear_train.weight", "shape": [ 210, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7956480, "byteOffset": 1505280 }, { "name": "model.layers.9.mlp.gate_proj.ALinear_train.weight", "shape": [ 18944, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9699328, "byteOffset": 9461760 } ], "md5sum": "5e98fa380076bf5f44d0140fee4c414b" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.9.mlp.up_proj.weight", "shape": [ 18944, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "953017ec8541fd2a6cfbabcf44ea5665" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.12.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "c22f2da2b93e7be10eeb8b87011e8d0e" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.12.mlp.up_proj.weight", "shape": [ 18944, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "ad58cb7c41bb1dd841a285fc7dbdce10" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 30851072, "records": [ { "name": "model.layers.9.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 2305, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16522240, "byteOffset": 0 }, { "name": "model.layers.9.mlp.gate_proj.BLinear_train.weight", "shape": [ 256, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 16522240 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 18357248 }, { "name": "model.layers.9.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 512, 41 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 41984, "byteOffset": 18364416 }, { "name": "model.layers.9.self_attn.k_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 18406400 }, { "name": "model.layers.9.self_attn.k_proj.ALinear_train.weight", "shape": [ 512, 4 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18407424 }, { "name": "model.layers.9.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 41, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 293888, "byteOffset": 18411520 }, { "name": "model.layers.9.self_attn.k_proj.BLinear_train.weight", "shape": [ 4, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 28672, "byteOffset": 18705408 }, { "name": "model.layers.9.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 3584, 403 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2888704, "byteOffset": 18734080 }, { "name": "model.layers.9.self_attn.o_proj.ALinear_train.weight", "shape": [ 3584, 44 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 315392, "byteOffset": 21622784 }, { "name": "model.layers.9.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 403, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2888704, "byteOffset": 21938176 }, { "name": "model.layers.9.self_attn.o_proj.BLinear_train.weight", "shape": [ 44, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 315392, "byteOffset": 24826880 }, { "name": "model.layers.9.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 3584, 162 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 25142272 }, { "name": "model.layers.9.self_attn.q_proj.ALinear_train.bias", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 26303488 }, { "name": "model.layers.9.self_attn.q_proj.ALinear_train.weight", "shape": [ 3584, 17 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 26310656 }, { "name": "model.layers.9.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 162, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 26432512 }, { "name": "model.layers.9.self_attn.q_proj.BLinear_train.weight", "shape": [ 17, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 27593728 }, { "name": "model.layers.9.self_attn.v_proj.ALinear_no_train.weight", "shape": [ 512, 342 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 350208, "byteOffset": 27715584 }, { "name": "model.layers.9.self_attn.v_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 28065792 }, { "name": "model.layers.9.self_attn.v_proj.ALinear_train.weight", "shape": [ 512, 38 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 38912, "byteOffset": 28066816 }, { "name": "model.layers.9.self_attn.v_proj.BLinear_no_train.weight", "shape": [ 342, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2451456, "byteOffset": 28105728 }, { "name": "model.layers.9.self_attn.v_proj.BLinear_train.weight", "shape": [ 38, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 272384, "byteOffset": 30557184 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 30829568 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 30836736 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 30843904 } ], "md5sum": "5db1af048e24ea836a125b2e22e83d0d" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 41108480, "records": [ { "name": "model.layers.13.mlp.down_proj.BLinear_no_train.weight", "shape": [ 1085, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 41108480, "byteOffset": 0 } ], "md5sum": "a484141cdd8214fc33951f06c1c48958" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 56528896, "records": [ { "name": "model.layers.13.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 18944, 1492 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56528896, "byteOffset": 0 } ], "md5sum": "ec5053c303d0ea3ecac0ac613aec6a92" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 56528896, "records": [ { "name": "model.layers.13.mlp.up_proj.ALinear_no_train.weight", "shape": [ 18944, 1492 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56528896, "byteOffset": 0 } ], "md5sum": "679bf657b3b4b02aac2e5949cd5e76b3" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 31312896, "records": [ { "name": "model.layers.13.mlp.down_proj.ALinear_no_train.weight", "shape": [ 3584, 1085 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7777280, "byteOffset": 0 }, { "name": "model.layers.13.mlp.down_proj.ALinear_train.weight", "shape": [ 3584, 120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 860160, "byteOffset": 7777280 }, { "name": "model.layers.13.mlp.down_proj.BLinear_train.weight", "shape": [ 120, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4546560, "byteOffset": 8637440 }, { "name": "model.layers.13.mlp.gate_proj.ALinear_train.weight", "shape": [ 18944, 165 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6251520, "byteOffset": 13184000 }, { "name": "model.layers.13.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 1492, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10694656, "byteOffset": 19435520 }, { "name": "model.layers.13.mlp.gate_proj.BLinear_train.weight", "shape": [ 165, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1182720, "byteOffset": 30130176 } ], "md5sum": "21d448d43d51620352b30ecfad4c8fd7" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 32530432, "records": [ { "name": "model.layers.13.mlp.up_proj.ALinear_train.weight", "shape": [ 18944, 165 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6251520, "byteOffset": 0 }, { "name": "model.layers.13.mlp.up_proj.BLinear_no_train.weight", "shape": [ 1492, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10694656, "byteOffset": 6251520 }, { "name": "model.layers.13.mlp.up_proj.BLinear_train.weight", "shape": [ 165, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1182720, "byteOffset": 16946176 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 18128896 }, { "name": "model.layers.13.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 512, 101 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 103424, "byteOffset": 18136064 }, { "name": "model.layers.13.self_attn.k_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 18239488 }, { "name": "model.layers.13.self_attn.k_proj.ALinear_train.weight", "shape": [ 512, 11 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11264, "byteOffset": 18240512 }, { "name": "model.layers.13.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 101, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 723968, "byteOffset": 18251776 }, { "name": "model.layers.13.self_attn.k_proj.BLinear_train.weight", "shape": [ 11, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 78848, "byteOffset": 18975744 }, { "name": "model.layers.13.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 3584, 645 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4623360, "byteOffset": 19054592 }, { "name": "model.layers.13.self_attn.o_proj.ALinear_train.weight", "shape": [ 3584, 71 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 508928, "byteOffset": 23677952 }, { "name": "model.layers.13.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 645, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4623360, "byteOffset": 24186880 }, { "name": "model.layers.13.self_attn.o_proj.BLinear_train.weight", "shape": [ 71, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 508928, "byteOffset": 28810240 }, { "name": "model.layers.13.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 3584, 403 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2888704, "byteOffset": 29319168 }, { "name": "model.layers.13.self_attn.q_proj.ALinear_train.bias", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 32207872 }, { "name": "model.layers.13.self_attn.q_proj.ALinear_train.weight", "shape": [ 3584, 44 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 315392, "byteOffset": 32215040 } ], "md5sum": "91db706eabc3090e9282e0ee9541fea2" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 41108480, "records": [ { "name": "model.layers.14.mlp.down_proj.BLinear_no_train.weight", "shape": [ 1085, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 41108480, "byteOffset": 0 } ], "md5sum": "cdd4b0f6a54306a2eed1247717d820a4" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 71949312, "records": [ { "name": "model.layers.14.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 18944, 1899 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 71949312, "byteOffset": 0 } ], "md5sum": "f1b1bbafafce0bb2f9f566c151857cca" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 28022784, "records": [ { "name": "model.layers.13.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 403, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2888704, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.q_proj.BLinear_train.weight", "shape": [ 44, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 315392, "byteOffset": 2888704 }, { "name": "model.layers.13.self_attn.v_proj.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 3204096 }, { "name": "model.layers.13.self_attn.v_proj.weight", "shape": [ 512, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 3205120 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6875136 }, { "name": "model.layers.14.mlp.down_proj.ALinear_no_train.weight", "shape": [ 3584, 1085 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7777280, "byteOffset": 6882304 }, { "name": "model.layers.14.mlp.down_proj.ALinear_train.weight", "shape": [ 3584, 120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 860160, "byteOffset": 14659584 }, { "name": "model.layers.14.mlp.down_proj.BLinear_train.weight", "shape": [ 120, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4546560, "byteOffset": 15519744 }, { "name": "model.layers.14.mlp.gate_proj.ALinear_train.weight", "shape": [ 18944, 210 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7956480, "byteOffset": 20066304 } ], "md5sum": "928b5fc00346807a16f999045de8e0c4" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.14.mlp.up_proj.weight", "shape": [ 18944, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "01b2670394fb1dafdc3854f74b134b30" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 33351680, "records": [ { "name": "model.layers.14.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 1899, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13612032, "byteOffset": 0 }, { "name": "model.layers.14.mlp.gate_proj.BLinear_train.weight", "shape": [ 210, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1505280, "byteOffset": 13612032 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 15117312 }, { "name": "model.layers.14.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 512, 101 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 103424, "byteOffset": 15124480 }, { "name": "model.layers.14.self_attn.k_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 15227904 }, { "name": "model.layers.14.self_attn.k_proj.ALinear_train.weight", "shape": [ 512, 11 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11264, "byteOffset": 15228928 }, { "name": "model.layers.14.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 101, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 723968, "byteOffset": 15240192 }, { "name": "model.layers.14.self_attn.k_proj.BLinear_train.weight", "shape": [ 11, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 78848, "byteOffset": 15964160 }, { "name": "model.layers.14.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 3584, 162 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 16043008 }, { "name": "model.layers.14.self_attn.o_proj.ALinear_train.weight", "shape": [ 3584, 17 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 17204224 }, { "name": "model.layers.14.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 162, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 17326080 }, { "name": "model.layers.14.self_attn.o_proj.BLinear_train.weight", "shape": [ 17, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 18487296 }, { "name": "model.layers.14.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 3584, 162 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 18609152 }, { "name": "model.layers.14.self_attn.q_proj.ALinear_train.bias", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 19770368 }, { "name": "model.layers.14.self_attn.q_proj.ALinear_train.weight", "shape": [ 3584, 17 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 19777536 }, { "name": "model.layers.14.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 162, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 19899392 }, { "name": "model.layers.14.self_attn.q_proj.BLinear_train.weight", "shape": [ 17, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 21060608 }, { "name": "model.layers.14.self_attn.v_proj.ALinear_no_train.weight", "shape": [ 512, 162 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 165888, "byteOffset": 21182464 }, { "name": "model.layers.14.self_attn.v_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 21348352 }, { "name": "model.layers.14.self_attn.v_proj.ALinear_train.weight", "shape": [ 512, 17 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17408, "byteOffset": 21349376 }, { "name": "model.layers.14.self_attn.v_proj.BLinear_no_train.weight", "shape": [ 162, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 21366784 }, { "name": "model.layers.14.self_attn.v_proj.BLinear_train.weight", "shape": [ 17, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 22528000 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 22649856 }, { "name": "model.layers.15.mlp.down_proj.ALinear_no_train.weight", "shape": [ 3584, 1492 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10694656, "byteOffset": 22657024 } ], "md5sum": "2bcb020480572c56769b8401f46560bc" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 56528896, "records": [ { "name": "model.layers.15.mlp.down_proj.BLinear_no_train.weight", "shape": [ 1492, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56528896, "byteOffset": 0 } ], "md5sum": "d5dacf4b594c3cba69402db44d1174b8" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 56528896, "records": [ { "name": "model.layers.15.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 18944, 1492 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56528896, "byteOffset": 0 } ], "md5sum": "1c6acb662f35d344a810be235a641275" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 71949312, "records": [ { "name": "model.layers.15.mlp.up_proj.ALinear_no_train.weight", "shape": [ 18944, 1899 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 71949312, "byteOffset": 0 } ], "md5sum": "b3be031688739813ada6407ef2cace2d" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 33519616, "records": [ { "name": "model.layers.15.mlp.down_proj.ALinear_train.weight", "shape": [ 3584, 165 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1182720, "byteOffset": 0 }, { "name": "model.layers.15.mlp.down_proj.BLinear_train.weight", "shape": [ 165, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6251520, "byteOffset": 1182720 }, { "name": "model.layers.15.mlp.gate_proj.ALinear_train.weight", "shape": [ 18944, 165 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6251520, "byteOffset": 7434240 }, { "name": "model.layers.15.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 1492, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10694656, "byteOffset": 13685760 }, { "name": "model.layers.15.mlp.gate_proj.BLinear_train.weight", "shape": [ 165, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1182720, "byteOffset": 24380416 }, { "name": "model.layers.15.mlp.up_proj.ALinear_train.weight", "shape": [ 18944, 210 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7956480, "byteOffset": 25563136 } ], "md5sum": "cae5855f25c0b007d6b0e1faf2372cab" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 33271808, "records": [ { "name": "model.layers.15.mlp.up_proj.BLinear_no_train.weight", "shape": [ 1899, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13612032, "byteOffset": 0 }, { "name": "model.layers.15.mlp.up_proj.BLinear_train.weight", "shape": [ 210, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1505280, "byteOffset": 13612032 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 15117312 }, { "name": "model.layers.15.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 512, 162 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 165888, "byteOffset": 15124480 }, { "name": "model.layers.15.self_attn.k_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 15290368 }, { "name": "model.layers.15.self_attn.k_proj.ALinear_train.weight", "shape": [ 512, 17 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17408, "byteOffset": 15291392 }, { "name": "model.layers.15.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 162, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 15308800 }, { "name": "model.layers.15.self_attn.k_proj.BLinear_train.weight", "shape": [ 17, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 16470016 }, { "name": "model.layers.15.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 3584, 645 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4623360, "byteOffset": 16591872 }, { "name": "model.layers.15.self_attn.o_proj.ALinear_train.weight", "shape": [ 3584, 71 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 508928, "byteOffset": 21215232 }, { "name": "model.layers.15.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 645, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4623360, "byteOffset": 21724160 }, { "name": "model.layers.15.self_attn.o_proj.BLinear_train.weight", "shape": [ 71, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 508928, "byteOffset": 26347520 }, { "name": "model.layers.15.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 3584, 403 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2888704, "byteOffset": 26856448 }, { "name": "model.layers.15.self_attn.q_proj.ALinear_train.bias", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29745152 }, { "name": "model.layers.15.self_attn.q_proj.ALinear_train.weight", "shape": [ 3584, 44 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 315392, "byteOffset": 29752320 }, { "name": "model.layers.15.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 403, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2888704, "byteOffset": 30067712 }, { "name": "model.layers.15.self_attn.q_proj.BLinear_train.weight", "shape": [ 44, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 315392, "byteOffset": 32956416 } ], "md5sum": "0d6b74868bd1705ed90f9b05bce130a4" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 56528896, "records": [ { "name": "model.layers.16.mlp.down_proj.BLinear_no_train.weight", "shape": [ 1492, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56528896, "byteOffset": 0 } ], "md5sum": "c3602dc0300ae815be7c18dc3b28384d" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 56528896, "records": [ { "name": "model.layers.16.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 18944, 1492 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56528896, "byteOffset": 0 } ], "md5sum": "32303d6664a5b48940b194bb4a6792ea" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 27501568, "records": [ { "name": "model.layers.15.self_attn.v_proj.ALinear_no_train.weight", "shape": [ 512, 342 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 350208, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.v_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 350208 }, { "name": "model.layers.15.self_attn.v_proj.ALinear_train.weight", "shape": [ 512, 38 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 38912, "byteOffset": 351232 }, { "name": "model.layers.15.self_attn.v_proj.BLinear_no_train.weight", "shape": [ 342, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2451456, "byteOffset": 390144 }, { "name": "model.layers.15.self_attn.v_proj.BLinear_train.weight", "shape": [ 38, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 272384, "byteOffset": 2841600 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 3113984 }, { "name": "model.layers.16.mlp.down_proj.ALinear_no_train.weight", "shape": [ 3584, 1492 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10694656, "byteOffset": 3121152 }, { "name": "model.layers.16.mlp.down_proj.ALinear_train.weight", "shape": [ 3584, 165 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1182720, "byteOffset": 13815808 }, { "name": "model.layers.16.mlp.down_proj.BLinear_train.weight", "shape": [ 165, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6251520, "byteOffset": 14998528 }, { "name": "model.layers.16.mlp.gate_proj.ALinear_train.weight", "shape": [ 18944, 165 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6251520, "byteOffset": 21250048 } ], "md5sum": "2a081786d3e3b86d821387788c71325b" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.16.mlp.up_proj.weight", "shape": [ 18944, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "3cd7137d6510c961fa251d3ed67319d9" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 33161216, "records": [ { "name": "model.layers.16.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 1492, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10694656, "byteOffset": 0 }, { "name": "model.layers.16.mlp.gate_proj.BLinear_train.weight", "shape": [ 165, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1182720, "byteOffset": 10694656 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 11877376 }, { "name": "model.layers.16.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 512, 101 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 103424, "byteOffset": 11884544 }, { "name": "model.layers.16.self_attn.k_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 11987968 }, { "name": "model.layers.16.self_attn.k_proj.ALinear_train.weight", "shape": [ 512, 11 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11264, "byteOffset": 11988992 }, { "name": "model.layers.16.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 101, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 723968, "byteOffset": 12000256 }, { "name": "model.layers.16.self_attn.k_proj.BLinear_train.weight", "shape": [ 11, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 78848, "byteOffset": 12724224 }, { "name": "model.layers.16.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 3584, 645 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4623360, "byteOffset": 12803072 }, { "name": "model.layers.16.self_attn.o_proj.ALinear_train.weight", "shape": [ 3584, 71 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 508928, "byteOffset": 17426432 }, { "name": "model.layers.16.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 645, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4623360, "byteOffset": 17935360 }, { "name": "model.layers.16.self_attn.o_proj.BLinear_train.weight", "shape": [ 71, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 508928, "byteOffset": 22558720 }, { "name": "model.layers.16.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 3584, 403 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2888704, "byteOffset": 23067648 }, { "name": "model.layers.16.self_attn.q_proj.ALinear_train.bias", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 25956352 }, { "name": "model.layers.16.self_attn.q_proj.ALinear_train.weight", "shape": [ 3584, 44 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 315392, "byteOffset": 25963520 }, { "name": "model.layers.16.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 403, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2888704, "byteOffset": 26278912 }, { "name": "model.layers.16.self_attn.q_proj.BLinear_train.weight", "shape": [ 44, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 315392, "byteOffset": 29167616 }, { "name": "model.layers.16.self_attn.v_proj.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 29483008 }, { "name": "model.layers.16.self_attn.v_proj.weight", "shape": [ 512, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 29484032 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33154048 } ], "md5sum": "62268e01f7288961e7249e28835c20f1" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 41108480, "records": [ { "name": "model.layers.17.mlp.down_proj.BLinear_no_train.weight", "shape": [ 1085, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 41108480, "byteOffset": 0 } ], "md5sum": "6c793fed698835060cfbe3768fe6b0c4" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 25688064, "records": [ { "name": "model.layers.17.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 18944, 678 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25688064, "byteOffset": 0 } ], "md5sum": "2beab6391cf6b97ba84a48ab5a6a9cb4" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 71949312, "records": [ { "name": "model.layers.17.mlp.up_proj.ALinear_no_train.weight", "shape": [ 18944, 1899 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 71949312, "byteOffset": 0 } ], "md5sum": "0276cdd1a818146314f194d22a58bfd0" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 29379584, "records": [ { "name": "model.layers.17.mlp.down_proj.ALinear_no_train.weight", "shape": [ 3584, 1085 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7777280, "byteOffset": 0 }, { "name": "model.layers.17.mlp.down_proj.ALinear_train.weight", "shape": [ 3584, 120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 860160, "byteOffset": 7777280 }, { "name": "model.layers.17.mlp.down_proj.BLinear_train.weight", "shape": [ 120, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4546560, "byteOffset": 8637440 }, { "name": "model.layers.17.mlp.gate_proj.ALinear_train.weight", "shape": [ 18944, 75 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2841600, "byteOffset": 13184000 }, { "name": "model.layers.17.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 678, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4859904, "byteOffset": 16025600 }, { "name": "model.layers.17.mlp.gate_proj.BLinear_train.weight", "shape": [ 75, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 537600, "byteOffset": 20885504 }, { "name": "model.layers.17.mlp.up_proj.ALinear_train.weight", "shape": [ 18944, 210 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7956480, "byteOffset": 21423104 } ], "md5sum": "362ef858dbea63154dd849f1629ae50a" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 30889984, "records": [ { "name": "model.layers.17.mlp.up_proj.BLinear_no_train.weight", "shape": [ 1899, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13612032, "byteOffset": 0 }, { "name": "model.layers.17.mlp.up_proj.BLinear_train.weight", "shape": [ 210, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1505280, "byteOffset": 13612032 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 15117312 }, { "name": "model.layers.17.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 512, 162 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 165888, "byteOffset": 15124480 }, { "name": "model.layers.17.self_attn.k_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 15290368 }, { "name": "model.layers.17.self_attn.k_proj.ALinear_train.weight", "shape": [ 512, 17 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17408, "byteOffset": 15291392 }, { "name": "model.layers.17.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 162, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 15308800 }, { "name": "model.layers.17.self_attn.k_proj.BLinear_train.weight", "shape": [ 17, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 16470016 }, { "name": "model.layers.17.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 3584, 403 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2888704, "byteOffset": 16591872 }, { "name": "model.layers.17.self_attn.o_proj.ALinear_train.weight", "shape": [ 3584, 44 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 315392, "byteOffset": 19480576 }, { "name": "model.layers.17.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 403, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2888704, "byteOffset": 19795968 }, { "name": "model.layers.17.self_attn.o_proj.BLinear_train.weight", "shape": [ 44, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 315392, "byteOffset": 22684672 }, { "name": "model.layers.17.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 3584, 403 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2888704, "byteOffset": 23000064 }, { "name": "model.layers.17.self_attn.q_proj.ALinear_train.bias", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 25888768 }, { "name": "model.layers.17.self_attn.q_proj.ALinear_train.weight", "shape": [ 3584, 44 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 315392, "byteOffset": 25895936 }, { "name": "model.layers.17.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 403, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2888704, "byteOffset": 26211328 }, { "name": "model.layers.17.self_attn.q_proj.BLinear_train.weight", "shape": [ 44, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 315392, "byteOffset": 29100032 }, { "name": "model.layers.17.self_attn.v_proj.ALinear_no_train.weight", "shape": [ 512, 162 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 165888, "byteOffset": 29415424 }, { "name": "model.layers.17.self_attn.v_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 29581312 }, { "name": "model.layers.17.self_attn.v_proj.ALinear_train.weight", "shape": [ 512, 17 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17408, "byteOffset": 29582336 }, { "name": "model.layers.17.self_attn.v_proj.BLinear_no_train.weight", "shape": [ 162, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 29599744 }, { "name": "model.layers.17.self_attn.v_proj.BLinear_train.weight", "shape": [ 17, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 30760960 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 30882816 } ], "md5sum": "d2840f0b7a3424ad7ff61c5cdbbab9cf" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 56528896, "records": [ { "name": "model.layers.18.mlp.down_proj.BLinear_no_train.weight", "shape": [ 1492, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56528896, "byteOffset": 0 } ], "md5sum": "0b0c90869d86746835eddbfe38c3db7b" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 56528896, "records": [ { "name": "model.layers.18.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 18944, 1492 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56528896, "byteOffset": 0 } ], "md5sum": "d48cbc4c6ef101da4ad0e0a21f1d4830" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 24380416, "records": [ { "name": "model.layers.18.mlp.down_proj.ALinear_no_train.weight", "shape": [ 3584, 1492 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10694656, "byteOffset": 0 }, { "name": "model.layers.18.mlp.down_proj.ALinear_train.weight", "shape": [ 3584, 165 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1182720, "byteOffset": 10694656 }, { "name": "model.layers.18.mlp.down_proj.BLinear_train.weight", "shape": [ 165, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6251520, "byteOffset": 11877376 }, { "name": "model.layers.18.mlp.gate_proj.ALinear_train.weight", "shape": [ 18944, 165 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6251520, "byteOffset": 18128896 } ], "md5sum": "3798fb043a42189a7f7cbaedf92db329" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 56528896, "records": [ { "name": "model.layers.18.mlp.up_proj.ALinear_no_train.weight", "shape": [ 18944, 1492 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56528896, "byteOffset": 0 } ], "md5sum": "6d01e9e7186696b48424b4b57fe25bdb" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 31480832, "records": [ { "name": "model.layers.18.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 1492, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10694656, "byteOffset": 0 }, { "name": "model.layers.18.mlp.gate_proj.BLinear_train.weight", "shape": [ 165, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1182720, "byteOffset": 10694656 }, { "name": "model.layers.18.mlp.up_proj.ALinear_train.weight", "shape": [ 18944, 165 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6251520, "byteOffset": 11877376 }, { "name": "model.layers.18.mlp.up_proj.BLinear_no_train.weight", "shape": [ 1492, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10694656, "byteOffset": 18128896 }, { "name": "model.layers.18.mlp.up_proj.BLinear_train.weight", "shape": [ 165, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1182720, "byteOffset": 28823552 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 30006272 }, { "name": "model.layers.18.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 512, 162 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 165888, "byteOffset": 30013440 }, { "name": "model.layers.18.self_attn.k_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 30179328 }, { "name": "model.layers.18.self_attn.k_proj.ALinear_train.weight", "shape": [ 512, 17 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17408, "byteOffset": 30180352 }, { "name": "model.layers.18.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 162, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 30197760 }, { "name": "model.layers.18.self_attn.k_proj.BLinear_train.weight", "shape": [ 17, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 31358976 } ], "md5sum": "93b5ab6297efc2c53e3913e62b61b5a9" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 71949312, "records": [ { "name": "model.layers.19.mlp.down_proj.BLinear_no_train.weight", "shape": [ 1899, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 71949312, "byteOffset": 0 } ], "md5sum": "d5f86bc73967b88ec46fc67e02ab11d8" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 27219968, "records": [ { "name": "model.layers.18.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 3584, 403 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2888704, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.o_proj.ALinear_train.weight", "shape": [ 3584, 44 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 315392, "byteOffset": 2888704 }, { "name": "model.layers.18.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 403, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2888704, "byteOffset": 3204096 }, { "name": "model.layers.18.self_attn.o_proj.BLinear_train.weight", "shape": [ 44, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 315392, "byteOffset": 6092800 }, { "name": "model.layers.18.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 3584, 162 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 6408192 }, { "name": "model.layers.18.self_attn.q_proj.ALinear_train.bias", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 7569408 }, { "name": "model.layers.18.self_attn.q_proj.ALinear_train.weight", "shape": [ 3584, 17 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 7576576 }, { "name": "model.layers.18.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 162, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 7698432 }, { "name": "model.layers.18.self_attn.q_proj.BLinear_train.weight", "shape": [ 17, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 8859648 }, { "name": "model.layers.18.self_attn.v_proj.ALinear_no_train.weight", "shape": [ 512, 342 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 350208, "byteOffset": 8981504 }, { "name": "model.layers.18.self_attn.v_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 9331712 }, { "name": "model.layers.18.self_attn.v_proj.ALinear_train.weight", "shape": [ 512, 38 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 38912, "byteOffset": 9332736 }, { "name": "model.layers.18.self_attn.v_proj.BLinear_no_train.weight", "shape": [ 342, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2451456, "byteOffset": 9371648 }, { "name": "model.layers.18.self_attn.v_proj.BLinear_train.weight", "shape": [ 38, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 272384, "byteOffset": 11823104 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 12095488 }, { "name": "model.layers.19.mlp.down_proj.ALinear_no_train.weight", "shape": [ 3584, 1899 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13612032, "byteOffset": 12102656 }, { "name": "model.layers.19.mlp.down_proj.ALinear_train.weight", "shape": [ 3584, 210 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1505280, "byteOffset": 25714688 } ], "md5sum": "88b067c103ec68380950b6eae6775707" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 56528896, "records": [ { "name": "model.layers.19.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 18944, 1492 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56528896, "byteOffset": 0 } ], "md5sum": "0b1588c5ae175e78c198f85aff60774a" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 41108480, "records": [ { "name": "model.layers.19.mlp.up_proj.ALinear_no_train.weight", "shape": [ 18944, 1085 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 41108480, "byteOffset": 0 } ], "md5sum": "d5705723aa7db4a0d465a13b94febd80" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 30631936, "records": [ { "name": "model.layers.19.mlp.down_proj.BLinear_train.weight", "shape": [ 210, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7956480, "byteOffset": 0 }, { "name": "model.layers.19.mlp.gate_proj.ALinear_train.weight", "shape": [ 18944, 165 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6251520, "byteOffset": 7956480 }, { "name": "model.layers.19.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 1492, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10694656, "byteOffset": 14208000 }, { "name": "model.layers.19.mlp.gate_proj.BLinear_train.weight", "shape": [ 165, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1182720, "byteOffset": 24902656 }, { "name": "model.layers.19.mlp.up_proj.ALinear_train.weight", "shape": [ 18944, 120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4546560, "byteOffset": 26085376 } ], "md5sum": "f512977c604bc6adbb65e57f99827e77" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 30100480, "records": [ { "name": "model.layers.19.mlp.up_proj.BLinear_no_train.weight", "shape": [ 1085, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7777280, "byteOffset": 0 }, { "name": "model.layers.19.mlp.up_proj.BLinear_train.weight", "shape": [ 120, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 860160, "byteOffset": 7777280 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 8637440 }, { "name": "model.layers.19.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 512, 101 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 103424, "byteOffset": 8644608 }, { "name": "model.layers.19.self_attn.k_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 8748032 }, { "name": "model.layers.19.self_attn.k_proj.ALinear_train.weight", "shape": [ 512, 11 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11264, "byteOffset": 8749056 }, { "name": "model.layers.19.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 101, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 723968, "byteOffset": 8760320 }, { "name": "model.layers.19.self_attn.k_proj.BLinear_train.weight", "shape": [ 11, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 78848, "byteOffset": 9484288 }, { "name": "model.layers.19.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 3584, 887 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6358016, "byteOffset": 9563136 }, { "name": "model.layers.19.self_attn.o_proj.ALinear_train.weight", "shape": [ 3584, 98 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 702464, "byteOffset": 15921152 }, { "name": "model.layers.19.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 887, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6358016, "byteOffset": 16623616 }, { "name": "model.layers.19.self_attn.o_proj.BLinear_train.weight", "shape": [ 98, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 702464, "byteOffset": 22981632 }, { "name": "model.layers.19.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 3584, 403 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2888704, "byteOffset": 23684096 }, { "name": "model.layers.19.self_attn.q_proj.ALinear_train.bias", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 26572800 }, { "name": "model.layers.19.self_attn.q_proj.ALinear_train.weight", "shape": [ 3584, 44 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 315392, "byteOffset": 26579968 }, { "name": "model.layers.19.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 403, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2888704, "byteOffset": 26895360 }, { "name": "model.layers.19.self_attn.q_proj.BLinear_train.weight", "shape": [ 44, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 315392, "byteOffset": 29784064 }, { "name": "model.layers.19.self_attn.v_proj.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 30099456 } ], "md5sum": "7af957bf0de2a2f3ff59eb778abb6b38" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.20.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "8cce187f2415cad1f90d3339cb55be83" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 56528896, "records": [ { "name": "model.layers.20.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 18944, 1492 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56528896, "byteOffset": 0 } ], "md5sum": "3364aaa737c7b57f8846fd841f2a493b" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 71949312, "records": [ { "name": "model.layers.20.mlp.up_proj.ALinear_no_train.weight", "shape": [ 18944, 1899 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 71949312, "byteOffset": 0 } ], "md5sum": "81b333859806abeed41640fca05099bf" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 29762560, "records": [ { "name": "model.layers.19.self_attn.v_proj.weight", "shape": [ 512, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 3670016 }, { "name": "model.layers.20.mlp.gate_proj.ALinear_train.weight", "shape": [ 18944, 165 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6251520, "byteOffset": 3677184 }, { "name": "model.layers.20.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 1492, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10694656, "byteOffset": 9928704 }, { "name": "model.layers.20.mlp.gate_proj.BLinear_train.weight", "shape": [ 165, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1182720, "byteOffset": 20623360 }, { "name": "model.layers.20.mlp.up_proj.ALinear_train.weight", "shape": [ 18944, 210 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7956480, "byteOffset": 21806080 } ], "md5sum": "af13419c879625d5ed8c3860a0ac9a34" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.21.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "8764d57ad12540ff025292e37d3f0277" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 71949312, "records": [ { "name": "model.layers.21.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 18944, 1899 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 71949312, "byteOffset": 0 } ], "md5sum": "bfd8fb2345929bf442f152928b3d13e0" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 30341120, "records": [ { "name": "model.layers.20.mlp.up_proj.BLinear_no_train.weight", "shape": [ 1899, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13612032, "byteOffset": 0 }, { "name": "model.layers.20.mlp.up_proj.BLinear_train.weight", "shape": [ 210, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1505280, "byteOffset": 13612032 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 15117312 }, { "name": "model.layers.20.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 512, 101 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 103424, "byteOffset": 15124480 }, { "name": "model.layers.20.self_attn.k_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 15227904 }, { "name": "model.layers.20.self_attn.k_proj.ALinear_train.weight", "shape": [ 512, 11 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11264, "byteOffset": 15228928 }, { "name": "model.layers.20.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 101, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 723968, "byteOffset": 15240192 }, { "name": "model.layers.20.self_attn.k_proj.BLinear_train.weight", "shape": [ 11, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 78848, "byteOffset": 15964160 }, { "name": "model.layers.20.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 3584, 403 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2888704, "byteOffset": 16043008 }, { "name": "model.layers.20.self_attn.o_proj.ALinear_train.weight", "shape": [ 3584, 44 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 315392, "byteOffset": 18931712 }, { "name": "model.layers.20.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 403, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2888704, "byteOffset": 19247104 }, { "name": "model.layers.20.self_attn.o_proj.BLinear_train.weight", "shape": [ 44, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 315392, "byteOffset": 22135808 }, { "name": "model.layers.20.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 3584, 403 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2888704, "byteOffset": 22451200 }, { "name": "model.layers.20.self_attn.q_proj.ALinear_train.bias", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 25339904 }, { "name": "model.layers.20.self_attn.q_proj.ALinear_train.weight", "shape": [ 3584, 44 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 315392, "byteOffset": 25347072 }, { "name": "model.layers.20.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 403, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2888704, "byteOffset": 25662464 }, { "name": "model.layers.20.self_attn.q_proj.BLinear_train.weight", "shape": [ 44, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 315392, "byteOffset": 28551168 }, { "name": "model.layers.20.self_attn.v_proj.ALinear_no_train.weight", "shape": [ 512, 162 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 165888, "byteOffset": 28866560 }, { "name": "model.layers.20.self_attn.v_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 29032448 }, { "name": "model.layers.20.self_attn.v_proj.ALinear_train.weight", "shape": [ 512, 17 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17408, "byteOffset": 29033472 }, { "name": "model.layers.20.self_attn.v_proj.BLinear_no_train.weight", "shape": [ 162, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 29050880 }, { "name": "model.layers.20.self_attn.v_proj.BLinear_train.weight", "shape": [ 17, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 30212096 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 30333952 } ], "md5sum": "597278329e4b9ba6127705414f2909f6" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 71949312, "records": [ { "name": "model.layers.21.mlp.up_proj.ALinear_no_train.weight", "shape": [ 18944, 1899 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 71949312, "byteOffset": 0 } ], "md5sum": "4875fd08fbf6e4343e4ed6a04b4b5eaf" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 31030272, "records": [ { "name": "model.layers.21.mlp.gate_proj.ALinear_train.weight", "shape": [ 18944, 210 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7956480, "byteOffset": 0 }, { "name": "model.layers.21.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 1899, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13612032, "byteOffset": 7956480 }, { "name": "model.layers.21.mlp.gate_proj.BLinear_train.weight", "shape": [ 210, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1505280, "byteOffset": 21568512 }, { "name": "model.layers.21.mlp.up_proj.ALinear_train.weight", "shape": [ 18944, 210 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7956480, "byteOffset": 23073792 } ], "md5sum": "d49e511d79d3ee463c4cd9eed9aadfb3" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 32723968, "records": [ { "name": "model.layers.21.mlp.up_proj.BLinear_no_train.weight", "shape": [ 1899, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13612032, "byteOffset": 0 }, { "name": "model.layers.21.mlp.up_proj.BLinear_train.weight", "shape": [ 210, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1505280, "byteOffset": 13612032 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 15117312 }, { "name": "model.layers.21.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 512, 101 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 103424, "byteOffset": 15124480 }, { "name": "model.layers.21.self_attn.k_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 15227904 }, { "name": "model.layers.21.self_attn.k_proj.ALinear_train.weight", "shape": [ 512, 11 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11264, "byteOffset": 15228928 }, { "name": "model.layers.21.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 101, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 723968, "byteOffset": 15240192 }, { "name": "model.layers.21.self_attn.k_proj.BLinear_train.weight", "shape": [ 11, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 78848, "byteOffset": 15964160 }, { "name": "model.layers.21.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 3584, 645 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4623360, "byteOffset": 16043008 }, { "name": "model.layers.21.self_attn.o_proj.ALinear_train.weight", "shape": [ 3584, 71 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 508928, "byteOffset": 20666368 }, { "name": "model.layers.21.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 645, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4623360, "byteOffset": 21175296 }, { "name": "model.layers.21.self_attn.o_proj.BLinear_train.weight", "shape": [ 71, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 508928, "byteOffset": 25798656 }, { "name": "model.layers.21.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 3584, 403 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2888704, "byteOffset": 26307584 }, { "name": "model.layers.21.self_attn.q_proj.ALinear_train.bias", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29196288 }, { "name": "model.layers.21.self_attn.q_proj.ALinear_train.weight", "shape": [ 3584, 44 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 315392, "byteOffset": 29203456 }, { "name": "model.layers.21.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 403, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2888704, "byteOffset": 29518848 }, { "name": "model.layers.21.self_attn.q_proj.BLinear_train.weight", "shape": [ 44, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 315392, "byteOffset": 32407552 }, { "name": "model.layers.21.self_attn.v_proj.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 32722944 } ], "md5sum": "fec0d84332d29da198acba6e0b309d6b" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.22.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "14a28fa07ad0abd2ae75667e049f68ee" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 87331840, "records": [ { "name": "model.layers.22.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 18944, 2305 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 87331840, "byteOffset": 0 } ], "md5sum": "82691ec5931c606c336d9d0390d8bb06" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 56528896, "records": [ { "name": "model.layers.22.mlp.up_proj.ALinear_no_train.weight", "shape": [ 18944, 1492 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56528896, "byteOffset": 0 } ], "md5sum": "34ec9ed42b18e8fcda79b2a350a7fcc2" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 31733760, "records": [ { "name": "model.layers.21.self_attn.v_proj.weight", "shape": [ 512, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 3670016 }, { "name": "model.layers.22.mlp.gate_proj.ALinear_train.weight", "shape": [ 18944, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9699328, "byteOffset": 3677184 }, { "name": "model.layers.22.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 2305, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16522240, "byteOffset": 13376512 }, { "name": "model.layers.22.mlp.gate_proj.BLinear_train.weight", "shape": [ 256, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 29898752 } ], "md5sum": "cb1f6eb870642506587aacd6172fd073" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.23.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "18b118dbe970adf599a874dec5ae3d69" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 87331840, "records": [ { "name": "model.layers.23.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 18944, 2305 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 87331840, "byteOffset": 0 } ], "md5sum": "a001841848d5d717bf6c2c227c0936f7" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 30608384, "records": [ { "name": "model.layers.22.mlp.up_proj.ALinear_train.weight", "shape": [ 18944, 165 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6251520, "byteOffset": 0 }, { "name": "model.layers.22.mlp.up_proj.BLinear_no_train.weight", "shape": [ 1492, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10694656, "byteOffset": 6251520 }, { "name": "model.layers.22.mlp.up_proj.BLinear_train.weight", "shape": [ 165, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1182720, "byteOffset": 16946176 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 18128896 }, { "name": "model.layers.22.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 512, 101 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 103424, "byteOffset": 18136064 }, { "name": "model.layers.22.self_attn.k_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 18239488 }, { "name": "model.layers.22.self_attn.k_proj.ALinear_train.weight", "shape": [ 512, 11 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11264, "byteOffset": 18240512 }, { "name": "model.layers.22.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 101, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 723968, "byteOffset": 18251776 }, { "name": "model.layers.22.self_attn.k_proj.BLinear_train.weight", "shape": [ 11, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 78848, "byteOffset": 18975744 }, { "name": "model.layers.22.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 3584, 403 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2888704, "byteOffset": 19054592 }, { "name": "model.layers.22.self_attn.o_proj.ALinear_train.weight", "shape": [ 3584, 44 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 315392, "byteOffset": 21943296 }, { "name": "model.layers.22.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 403, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2888704, "byteOffset": 22258688 }, { "name": "model.layers.22.self_attn.o_proj.BLinear_train.weight", "shape": [ 44, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 315392, "byteOffset": 25147392 }, { "name": "model.layers.22.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 3584, 162 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 25462784 }, { "name": "model.layers.22.self_attn.q_proj.ALinear_train.bias", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 26624000 }, { "name": "model.layers.22.self_attn.q_proj.ALinear_train.weight", "shape": [ 3584, 17 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 26631168 }, { "name": "model.layers.22.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 162, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 26753024 }, { "name": "model.layers.22.self_attn.q_proj.BLinear_train.weight", "shape": [ 17, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 27914240 }, { "name": "model.layers.22.self_attn.v_proj.ALinear_no_train.weight", "shape": [ 512, 282 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 288768, "byteOffset": 28036096 }, { "name": "model.layers.22.self_attn.v_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 28324864 }, { "name": "model.layers.22.self_attn.v_proj.ALinear_train.weight", "shape": [ 512, 31 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 31744, "byteOffset": 28325888 }, { "name": "model.layers.22.self_attn.v_proj.BLinear_no_train.weight", "shape": [ 282, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2021376, "byteOffset": 28357632 }, { "name": "model.layers.22.self_attn.v_proj.BLinear_train.weight", "shape": [ 31, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 222208, "byteOffset": 30379008 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 30601216 } ], "md5sum": "e1ba00afe8fdb029993c1000c1599f73" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 71949312, "records": [ { "name": "model.layers.23.mlp.up_proj.ALinear_no_train.weight", "shape": [ 18944, 1899 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 71949312, "byteOffset": 0 } ], "md5sum": "3cda6f8b1a96a490bbadd134bb35cde0" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 28056576, "records": [ { "name": "model.layers.23.mlp.gate_proj.ALinear_train.weight", "shape": [ 18944, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9699328, "byteOffset": 0 }, { "name": "model.layers.23.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 2305, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16522240, "byteOffset": 9699328 }, { "name": "model.layers.23.mlp.gate_proj.BLinear_train.weight", "shape": [ 256, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 26221568 } ], "md5sum": "0495c16369fba4f346848e712321920a" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 29131776, "records": [ { "name": "model.layers.23.mlp.up_proj.ALinear_train.weight", "shape": [ 18944, 210 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7956480, "byteOffset": 0 }, { "name": "model.layers.23.mlp.up_proj.BLinear_no_train.weight", "shape": [ 1899, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13612032, "byteOffset": 7956480 }, { "name": "model.layers.23.mlp.up_proj.BLinear_train.weight", "shape": [ 210, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1505280, "byteOffset": 21568512 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 23073792 }, { "name": "model.layers.23.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 512, 101 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 103424, "byteOffset": 23080960 }, { "name": "model.layers.23.self_attn.k_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 23184384 }, { "name": "model.layers.23.self_attn.k_proj.ALinear_train.weight", "shape": [ 512, 11 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11264, "byteOffset": 23185408 }, { "name": "model.layers.23.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 101, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 723968, "byteOffset": 23196672 }, { "name": "model.layers.23.self_attn.k_proj.BLinear_train.weight", "shape": [ 11, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 78848, "byteOffset": 23920640 }, { "name": "model.layers.23.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 3584, 645 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4623360, "byteOffset": 23999488 }, { "name": "model.layers.23.self_attn.o_proj.ALinear_train.weight", "shape": [ 3584, 71 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 508928, "byteOffset": 28622848 } ], "md5sum": "d26271c639d197195cbb4471dc9bc849" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.24.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "38832fa2c4a89160d544f6eed6be27cb" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 87331840, "records": [ { "name": "model.layers.24.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 18944, 2305 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 87331840, "byteOffset": 0 } ], "md5sum": "6c86abd9251a7aa7cbfe2e345607e40b" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 24925184, "records": [ { "name": "model.layers.23.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 645, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4623360, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.o_proj.BLinear_train.weight", "shape": [ 71, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 508928, "byteOffset": 4623360 }, { "name": "model.layers.23.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 3584, 403 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2888704, "byteOffset": 5132288 }, { "name": "model.layers.23.self_attn.q_proj.ALinear_train.bias", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 8020992 }, { "name": "model.layers.23.self_attn.q_proj.ALinear_train.weight", "shape": [ 3584, 44 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 315392, "byteOffset": 8028160 }, { "name": "model.layers.23.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 403, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2888704, "byteOffset": 8343552 }, { "name": "model.layers.23.self_attn.q_proj.BLinear_train.weight", "shape": [ 44, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 315392, "byteOffset": 11232256 }, { "name": "model.layers.23.self_attn.v_proj.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 11547648 }, { "name": "model.layers.23.self_attn.v_proj.weight", "shape": [ 512, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 11548672 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 15218688 }, { "name": "model.layers.24.mlp.gate_proj.ALinear_train.weight", "shape": [ 18944, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9699328, "byteOffset": 15225856 } ], "md5sum": "8e86666fb6349173fb2f22e925ecdc0b" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 56528896, "records": [ { "name": "model.layers.24.mlp.up_proj.ALinear_no_train.weight", "shape": [ 18944, 1492 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56528896, "byteOffset": 0 } ], "md5sum": "cf2bcf1cbfdcad8095b358095563b9bb" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 24608768, "records": [ { "name": "model.layers.24.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 2305, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16522240, "byteOffset": 0 }, { "name": "model.layers.24.mlp.gate_proj.BLinear_train.weight", "shape": [ 256, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 16522240 }, { "name": "model.layers.24.mlp.up_proj.ALinear_train.weight", "shape": [ 18944, 165 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6251520, "byteOffset": 18357248 } ], "md5sum": "a41e6e79c14ac0250cdae4bbb4dbd5c1" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.25.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "c5de0fcf9ab062acb7d0e4e0409ac600" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.25.mlp.gate_proj.weight", "shape": [ 18944, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "ff14131399b00cca8a189478b36b0e95" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 87331840, "records": [ { "name": "model.layers.25.mlp.up_proj.ALinear_no_train.weight", "shape": [ 18944, 2305 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 87331840, "byteOffset": 0 } ], "md5sum": "d6245fbdbc40fe4c94b2f6319ce6eb42" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 29304832, "records": [ { "name": "model.layers.24.mlp.up_proj.BLinear_no_train.weight", "shape": [ 1492, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10694656, "byteOffset": 0 }, { "name": "model.layers.24.mlp.up_proj.BLinear_train.weight", "shape": [ 165, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1182720, "byteOffset": 10694656 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 11877376 }, { "name": "model.layers.24.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 512, 101 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 103424, "byteOffset": 11884544 }, { "name": "model.layers.24.self_attn.k_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 11987968 }, { "name": "model.layers.24.self_attn.k_proj.ALinear_train.weight", "shape": [ 512, 11 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11264, "byteOffset": 11988992 }, { "name": "model.layers.24.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 101, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 723968, "byteOffset": 12000256 }, { "name": "model.layers.24.self_attn.k_proj.BLinear_train.weight", "shape": [ 11, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 78848, "byteOffset": 12724224 }, { "name": "model.layers.24.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 3584, 403 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2888704, "byteOffset": 12803072 }, { "name": "model.layers.24.self_attn.o_proj.ALinear_train.weight", "shape": [ 3584, 44 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 315392, "byteOffset": 15691776 }, { "name": "model.layers.24.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 403, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2888704, "byteOffset": 16007168 }, { "name": "model.layers.24.self_attn.o_proj.BLinear_train.weight", "shape": [ 44, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 315392, "byteOffset": 18895872 }, { "name": "model.layers.24.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 3584, 403 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2888704, "byteOffset": 19211264 }, { "name": "model.layers.24.self_attn.q_proj.ALinear_train.bias", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 22099968 }, { "name": "model.layers.24.self_attn.q_proj.ALinear_train.weight", "shape": [ 3584, 44 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 315392, "byteOffset": 22107136 }, { "name": "model.layers.24.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 403, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2888704, "byteOffset": 22422528 }, { "name": "model.layers.24.self_attn.q_proj.BLinear_train.weight", "shape": [ 44, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 315392, "byteOffset": 25311232 }, { "name": "model.layers.24.self_attn.v_proj.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 25626624 }, { "name": "model.layers.24.self_attn.v_proj.weight", "shape": [ 512, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 25627648 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29297664 } ], "md5sum": "b87b740cf5e261c789b251c27aa44cf7" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 32838656, "records": [ { "name": "model.layers.25.mlp.up_proj.ALinear_train.weight", "shape": [ 18944, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9699328, "byteOffset": 0 }, { "name": "model.layers.25.mlp.up_proj.BLinear_no_train.weight", "shape": [ 2305, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16522240, "byteOffset": 9699328 }, { "name": "model.layers.25.mlp.up_proj.BLinear_train.weight", "shape": [ 256, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 26221568 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 28056576 }, { "name": "model.layers.25.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 512, 101 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 103424, "byteOffset": 28063744 }, { "name": "model.layers.25.self_attn.k_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 28167168 }, { "name": "model.layers.25.self_attn.k_proj.ALinear_train.weight", "shape": [ 512, 11 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11264, "byteOffset": 28168192 }, { "name": "model.layers.25.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 101, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 723968, "byteOffset": 28179456 }, { "name": "model.layers.25.self_attn.k_proj.BLinear_train.weight", "shape": [ 11, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 78848, "byteOffset": 28903424 }, { "name": "model.layers.25.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 3584, 162 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 28982272 }, { "name": "model.layers.25.self_attn.o_proj.ALinear_train.weight", "shape": [ 3584, 17 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 30143488 }, { "name": "model.layers.25.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 162, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 30265344 }, { "name": "model.layers.25.self_attn.o_proj.BLinear_train.weight", "shape": [ 17, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 31426560 }, { "name": "model.layers.25.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 3584, 162 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 31548416 }, { "name": "model.layers.25.self_attn.q_proj.ALinear_train.bias", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 32709632 }, { "name": "model.layers.25.self_attn.q_proj.ALinear_train.weight", "shape": [ 3584, 17 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 32716800 } ], "md5sum": "b9bb7ca35d48065e8512bab160e729d2" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.26.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "bdf5575ded388b5fbb187a4e0d922db7" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.26.mlp.gate_proj.weight", "shape": [ 18944, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "6e6955f79aebe267969d44acba1a1bce" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.26.mlp.up_proj.weight", "shape": [ 18944, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "80aedd1cc53d86fa1c340658a475f94d" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.27.mlp.gate_proj.weight", "shape": [ 18944, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "7a25415d10f0b95143c7fe2cca7027f3" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.27.mlp.up_proj.weight", "shape": [ 18944, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "82243e8b3bc9efe4f20ff0dc8527abe7" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 29904896, "records": [ { "name": "model.layers.25.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 162, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.q_proj.BLinear_train.weight", "shape": [ 17, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 1161216 }, { "name": "model.layers.25.self_attn.v_proj.ALinear_no_train.weight", "shape": [ 512, 222 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 227328, "byteOffset": 1283072 }, { "name": "model.layers.25.self_attn.v_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 1510400 }, { "name": "model.layers.25.self_attn.v_proj.ALinear_train.weight", "shape": [ 512, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 1511424 }, { "name": "model.layers.25.self_attn.v_proj.BLinear_no_train.weight", "shape": [ 222, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1591296, "byteOffset": 1536000 }, { "name": "model.layers.25.self_attn.v_proj.BLinear_train.weight", "shape": [ 24, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 172032, "byteOffset": 3127296 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 3299328 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 3306496 }, { "name": "model.layers.26.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 512, 101 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 103424, "byteOffset": 3313664 }, { "name": "model.layers.26.self_attn.k_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 3417088 }, { "name": "model.layers.26.self_attn.k_proj.ALinear_train.weight", "shape": [ 512, 11 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11264, "byteOffset": 3418112 }, { "name": "model.layers.26.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 101, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 723968, "byteOffset": 3429376 }, { "name": "model.layers.26.self_attn.k_proj.BLinear_train.weight", "shape": [ 11, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 78848, "byteOffset": 4153344 }, { "name": "model.layers.26.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 3584, 403 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2888704, "byteOffset": 4232192 }, { "name": "model.layers.26.self_attn.o_proj.ALinear_train.weight", "shape": [ 3584, 44 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 315392, "byteOffset": 7120896 }, { "name": "model.layers.26.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 403, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2888704, "byteOffset": 7436288 }, { "name": "model.layers.26.self_attn.o_proj.BLinear_train.weight", "shape": [ 44, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 315392, "byteOffset": 10324992 }, { "name": "model.layers.26.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 3584, 162 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 10640384 }, { "name": "model.layers.26.self_attn.q_proj.ALinear_train.bias", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 11801600 }, { "name": "model.layers.26.self_attn.q_proj.ALinear_train.weight", "shape": [ 3584, 17 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 11808768 }, { "name": "model.layers.26.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 162, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 11930624 }, { "name": "model.layers.26.self_attn.q_proj.BLinear_train.weight", "shape": [ 17, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 13091840 }, { "name": "model.layers.26.self_attn.v_proj.ALinear_no_train.weight", "shape": [ 512, 162 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 165888, "byteOffset": 13213696 }, { "name": "model.layers.26.self_attn.v_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 13379584 }, { "name": "model.layers.26.self_attn.v_proj.ALinear_train.weight", "shape": [ 512, 17 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17408, "byteOffset": 13380608 }, { "name": "model.layers.26.self_attn.v_proj.BLinear_no_train.weight", "shape": [ 162, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 13398016 }, { "name": "model.layers.26.self_attn.v_proj.BLinear_train.weight", "shape": [ 17, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 14559232 }, { "name": "model.layers.27.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 512, 101 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 103424, "byteOffset": 14681088 }, { "name": "model.layers.27.self_attn.k_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 14784512 }, { "name": "model.layers.27.self_attn.k_proj.ALinear_train.weight", "shape": [ 512, 11 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11264, "byteOffset": 14785536 }, { "name": "model.layers.27.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 101, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 723968, "byteOffset": 14796800 }, { "name": "model.layers.27.self_attn.k_proj.BLinear_train.weight", "shape": [ 11, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 78848, "byteOffset": 15520768 }, { "name": "model.layers.27.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 3584, 645 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4623360, "byteOffset": 15599616 }, { "name": "model.layers.27.self_attn.o_proj.ALinear_train.weight", "shape": [ 3584, 71 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 508928, "byteOffset": 20222976 }, { "name": "model.layers.27.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 645, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4623360, "byteOffset": 20731904 }, { "name": "model.layers.27.self_attn.o_proj.BLinear_train.weight", "shape": [ 71, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 508928, "byteOffset": 25355264 }, { "name": "model.layers.27.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 3584, 162 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 25864192 }, { "name": "model.layers.27.self_attn.q_proj.ALinear_train.bias", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 27025408 }, { "name": "model.layers.27.self_attn.q_proj.ALinear_train.weight", "shape": [ 3584, 17 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 27032576 }, { "name": "model.layers.27.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 162, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 27154432 }, { "name": "model.layers.27.self_attn.q_proj.BLinear_train.weight", "shape": [ 17, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 28315648 }, { "name": "model.layers.27.self_attn.v_proj.ALinear_no_train.weight", "shape": [ 512, 162 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 165888, "byteOffset": 28437504 }, { "name": "model.layers.27.self_attn.v_proj.ALinear_train.bias", "shape": [ 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1024, "byteOffset": 28603392 }, { "name": "model.layers.27.self_attn.v_proj.ALinear_train.weight", "shape": [ 512, 17 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17408, "byteOffset": 28604416 }, { "name": "model.layers.27.self_attn.v_proj.BLinear_no_train.weight", "shape": [ 162, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1161216, "byteOffset": 28621824 }, { "name": "model.layers.27.self_attn.v_proj.BLinear_train.weight", "shape": [ 17, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 121856, "byteOffset": 29783040 } ], "md5sum": "81c25cc95c4ba80222503054ce72c5aa" } ] }