diff --git "a/Cauchy-3B-preview-MLC/ndarray-cache.json" "b/Cauchy-3B-preview-MLC/ndarray-cache.json" --- "a/Cauchy-3B-preview-MLC/ndarray-cache.json" +++ "b/Cauchy-3B-preview-MLC/ndarray-cache.json" @@ -13,8 +13,8 @@ { "name": "model.embed_tokens.weight", "shape": [ - 151646, - 2304 + 2304, + 151646 ], "dtype": "float16", "format": "f32-to-bf16", @@ -22,7 +22,7 @@ "byteOffset": 0 } ], - "md5sum": "8a00c0a708e920e3852c8b9851629960" + "md5sum": "c0c4bfb94521a8c77a60e9e08388b491" }, { "dataPath": "params_shard_1.bin", @@ -32,8 +32,9 @@ { "name": "model.layers.0.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -41,7 +42,7 @@ "byteOffset": 0 } ], - "md5sum": "0b51f0e75d7f873e2295779f4d58e014" + "md5sum": "b2b7798ad551b16180cd977d54178226" }, { "dataPath": "params_shard_2.bin", @@ -51,8 +52,9 @@ { "name": "model.layers.0.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -60,7 +62,7 @@ "byteOffset": 0 } ], - "md5sum": "020dfd02adb78334e916925506f6201e" + "md5sum": "8fd72a671e76869aed05c859b6ba14e2" }, { "dataPath": "params_shard_3.bin", @@ -70,8 +72,9 @@ { "name": "model.layers.0.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -91,8 +94,9 @@ { "name": "model.layers.0.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -140,7 +144,7 @@ "byteOffset": 26553856 } ], - "md5sum": "17321cf9d4e04c8efb5866b7b5ae1846" + "md5sum": "a34af8d73ea66b4677e72514ee65730d" }, { "dataPath": "params_shard_4.bin", @@ -150,8 +154,9 @@ { "name": "model.layers.1.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -159,7 +164,7 @@ "byteOffset": 0 } ], - "md5sum": "1f3679252a76366e068ef3fe7e97ce4b" + "md5sum": "8352e04537ba8ea59506c6048b556d03" }, { "dataPath": "params_shard_5.bin", @@ -169,8 +174,9 @@ { "name": "model.layers.1.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -178,7 +184,7 @@ "byteOffset": 0 } ], - "md5sum": "8cc32b075b6a78ca36a08c8466d96351" + "md5sum": "ad8b98ad91495b58324ebe83e4f47b4a" }, { "dataPath": "params_shard_6.bin", @@ -188,8 +194,9 @@ { "name": "model.layers.1.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -209,8 +216,9 @@ { "name": "model.layers.1.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -258,7 +266,7 @@ "byteOffset": 26553856 } ], - "md5sum": "4e57f85cc1219a8a99b67b8897eff232" + "md5sum": "6a3e7f3d50b2c151ce66814b88adbebb" }, { "dataPath": "params_shard_7.bin", @@ -268,8 +276,9 @@ { "name": "model.layers.2.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -277,7 +286,7 @@ "byteOffset": 0 } ], - "md5sum": "20a651819b193bd4c24d12e4682aec1d" + "md5sum": "b88ab1961d2917ffa7a776ec4857151d" }, { "dataPath": "params_shard_8.bin", @@ -287,8 +296,9 @@ { "name": "model.layers.2.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -296,7 +306,7 @@ "byteOffset": 0 } ], - "md5sum": "10d6166e08f8b83f65f5245a9f77df22" + "md5sum": "d1ab5189e8ea43826a367086964539a1" }, { "dataPath": "params_shard_9.bin", @@ -306,8 +316,9 @@ { "name": "model.layers.2.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -327,8 +338,9 @@ { "name": "model.layers.2.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -376,7 +388,7 @@ "byteOffset": 26553856 } ], - "md5sum": "a7e7fac7bf234b6a18384bd509b2ab3f" + "md5sum": "31a5e376dc6355d50405bef1b18154a0" }, { "dataPath": "params_shard_10.bin", @@ -386,8 +398,9 @@ { "name": "model.layers.3.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -395,7 +408,7 @@ "byteOffset": 0 } ], - "md5sum": "e462cc7eb312ac1874346d30055ccab4" + "md5sum": "6efc62345c7f09b0ddf12e55344df2a7" }, { "dataPath": "params_shard_11.bin", @@ -405,8 +418,9 @@ { "name": "model.layers.3.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -414,7 +428,7 @@ "byteOffset": 0 } ], - "md5sum": "ddf04a452eb6aa2644667c5c117f1337" + "md5sum": "77220b226468146055a1fa7550e645e0" }, { "dataPath": "params_shard_12.bin", @@ -424,8 +438,9 @@ { "name": "model.layers.3.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -445,8 +460,9 @@ { "name": "model.layers.3.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -494,7 +510,7 @@ "byteOffset": 26553856 } ], - "md5sum": "5b1a6fa5162053d7d221daca711a2584" + "md5sum": "39b310dba007f2d19c56922fae7ed5b0" }, { "dataPath": "params_shard_13.bin", @@ -504,8 +520,9 @@ { "name": "model.layers.4.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -513,7 +530,7 @@ "byteOffset": 0 } ], - "md5sum": "aabc87c3644d59ae90947efb9357b795" + "md5sum": "264b3f70f73122faed705c91f51388db" }, { "dataPath": "params_shard_14.bin", @@ -523,8 +540,9 @@ { "name": "model.layers.4.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -532,7 +550,7 @@ "byteOffset": 0 } ], - "md5sum": "6be0c1c3cbbc35cc1e802bf5355ca4bb" + "md5sum": "50056449a9dcc72b4f608128d30aaab4" }, { "dataPath": "params_shard_15.bin", @@ -542,8 +560,9 @@ { "name": "model.layers.4.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -563,8 +582,9 @@ { "name": "model.layers.4.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -612,7 +632,7 @@ "byteOffset": 26553856 } ], - "md5sum": "15f0c36294902032a085e0af4bca98eb" + "md5sum": "0d1ab9913e897a8c9072f198d654be9e" }, { "dataPath": "params_shard_16.bin", @@ -622,8 +642,9 @@ { "name": "model.layers.5.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -631,7 +652,7 @@ "byteOffset": 0 } ], - "md5sum": "828fd40268398cd42119527af7242710" + "md5sum": "e29e1799190e7807cce339cf223dc911" }, { "dataPath": "params_shard_17.bin", @@ -641,8 +662,9 @@ { "name": "model.layers.5.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -650,7 +672,7 @@ "byteOffset": 0 } ], - "md5sum": "49ad8f8873cb997078f0c3d485219a21" + "md5sum": "6a0dd24637ced53c0582303feb667feb" }, { "dataPath": "params_shard_18.bin", @@ -660,8 +682,9 @@ { "name": "model.layers.5.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -681,8 +704,9 @@ { "name": "model.layers.5.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -730,7 +754,7 @@ "byteOffset": 26553856 } ], - "md5sum": "bfaf010d4b2cd525e3ffee5d7b6df5fb" + "md5sum": "3b1b041395faecb9b9d7282a161bec49" }, { "dataPath": "params_shard_19.bin", @@ -740,8 +764,9 @@ { "name": "model.layers.6.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -749,7 +774,7 @@ "byteOffset": 0 } ], - "md5sum": "b680a60da1be3c5441d275ae2b1fde80" + "md5sum": "7c2f25dab15ef369aaf4d67aa6823b23" }, { "dataPath": "params_shard_20.bin", @@ -759,8 +784,9 @@ { "name": "model.layers.6.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -768,7 +794,7 @@ "byteOffset": 0 } ], - "md5sum": "337749bdbea714fa8f7680640f68116b" + "md5sum": "26de2a704f0e21b76218375f64bfe951" }, { "dataPath": "params_shard_21.bin", @@ -778,8 +804,9 @@ { "name": "model.layers.6.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -799,8 +826,9 @@ { "name": "model.layers.6.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -848,7 +876,7 @@ "byteOffset": 26553856 } ], - "md5sum": "cfb4c81eb9e2d498451f30a6b98e50a6" + "md5sum": "dc6a0c7554328066638b1767a1cd1541" }, { "dataPath": "params_shard_22.bin", @@ -858,8 +886,9 @@ { "name": "model.layers.7.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -867,7 +896,7 @@ "byteOffset": 0 } ], - "md5sum": "aec539f45ed879270f1df323595c3a2f" + "md5sum": "11ee5041a35a58aeecadff5d8ad504e1" }, { "dataPath": "params_shard_23.bin", @@ -877,8 +906,9 @@ { "name": "model.layers.7.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -886,7 +916,7 @@ "byteOffset": 0 } ], - "md5sum": "f5bfd11d2a12dbb8fb7392d308c03a3e" + "md5sum": "069f2396584c4de563c885e5e0358911" }, { "dataPath": "params_shard_24.bin", @@ -896,8 +926,9 @@ { "name": "model.layers.7.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -917,8 +948,9 @@ { "name": "model.layers.7.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -966,7 +998,7 @@ "byteOffset": 26553856 } ], - "md5sum": "9f206c117b340dc480dc75677347a4af" + "md5sum": "0005fbb383ccf496db3337836a8d9453" }, { "dataPath": "params_shard_25.bin", @@ -976,8 +1008,9 @@ { "name": "model.layers.8.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -985,7 +1018,7 @@ "byteOffset": 0 } ], - "md5sum": "c57dd4891c15b9a88665790360aff452" + "md5sum": "aea7507daea0446116f6f6d685d6f913" }, { "dataPath": "params_shard_26.bin", @@ -995,8 +1028,9 @@ { "name": "model.layers.8.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -1004,7 +1038,7 @@ "byteOffset": 0 } ], - "md5sum": "f2520e169951edcffcd3d325c5ded79a" + "md5sum": "727eeb18ede4b771493a9a6eca99ac44" }, { "dataPath": "params_shard_27.bin", @@ -1014,8 +1048,9 @@ { "name": "model.layers.8.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -1035,8 +1070,9 @@ { "name": "model.layers.8.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -1084,7 +1120,7 @@ "byteOffset": 26553856 } ], - "md5sum": "34467250cec5c5de0fcf2d0c82e61445" + "md5sum": "383a440217093395a0a28492e64a8263" }, { "dataPath": "params_shard_28.bin", @@ -1094,8 +1130,9 @@ { "name": "model.layers.9.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -1103,7 +1140,7 @@ "byteOffset": 0 } ], - "md5sum": "8c4ffcdba588b5c3ac74e04acbbbf823" + "md5sum": "c2374550c2d0ca9f18da6585e66c2250" }, { "dataPath": "params_shard_29.bin", @@ -1113,8 +1150,9 @@ { "name": "model.layers.9.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -1122,7 +1160,7 @@ "byteOffset": 0 } ], - "md5sum": "9c203cf666a2021712570028c57599a4" + "md5sum": "24ef70e6697339816ceb060c9252e0fa" }, { "dataPath": "params_shard_30.bin", @@ -1132,8 +1170,9 @@ { "name": "model.layers.9.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -1153,8 +1192,9 @@ { "name": "model.layers.9.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -1202,7 +1242,7 @@ "byteOffset": 26553856 } ], - "md5sum": "f5d677fac9f4bcd26718bbe02b1411b5" + "md5sum": "8d2e4ecc75448ab17437330cfb9a5c4a" }, { "dataPath": "params_shard_31.bin", @@ -1212,8 +1252,9 @@ { "name": "model.layers.10.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -1221,7 +1262,7 @@ "byteOffset": 0 } ], - "md5sum": "69f48c376df36168ef7bff954dc0c29f" + "md5sum": "6ce1e6f0f4db822b8de4f8f6284e5d9a" }, { "dataPath": "params_shard_32.bin", @@ -1231,8 +1272,9 @@ { "name": "model.layers.10.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -1240,7 +1282,7 @@ "byteOffset": 0 } ], - "md5sum": "df5a8548bc0b94a1345afab5df3c9059" + "md5sum": "3e0b7259552f04dd5ad9973a735529e7" }, { "dataPath": "params_shard_33.bin", @@ -1250,8 +1292,9 @@ { "name": "model.layers.10.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -1271,8 +1314,9 @@ { "name": "model.layers.10.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -1320,7 +1364,7 @@ "byteOffset": 26553856 } ], - "md5sum": "7ff75d3c9e3d7d1d194f2c9da821b1be" + "md5sum": "00724cf0c5d8f7be78b37e7b19297a2f" }, { "dataPath": "params_shard_34.bin", @@ -1330,8 +1374,9 @@ { "name": "model.layers.11.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -1339,7 +1384,7 @@ "byteOffset": 0 } ], - "md5sum": "77a4a3dab1300bbdaefc07f2ed772fa9" + "md5sum": "869fb52b59a403dc24e7560170bc585b" }, { "dataPath": "params_shard_35.bin", @@ -1349,8 +1394,9 @@ { "name": "model.layers.11.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -1358,7 +1404,7 @@ "byteOffset": 0 } ], - "md5sum": "c6d4656264295f832ae27b1bb3e160fb" + "md5sum": "61c95b781ad048c7278d3e2dd3f66c2f" }, { "dataPath": "params_shard_36.bin", @@ -1368,8 +1414,9 @@ { "name": "model.layers.11.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -1389,8 +1436,9 @@ { "name": "model.layers.11.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -1438,7 +1486,7 @@ "byteOffset": 26553856 } ], - "md5sum": "aca813c77019c7d15e61f857bdc92bf2" + "md5sum": "cdf5269d41ce98fa0f378a660635c480" }, { "dataPath": "params_shard_37.bin", @@ -1448,8 +1496,9 @@ { "name": "model.layers.12.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -1457,7 +1506,7 @@ "byteOffset": 0 } ], - "md5sum": "01a13d3483ee2c0083294195de3cf4b1" + "md5sum": "b809342b4f6da334c4c2d6f95e48bcac" }, { "dataPath": "params_shard_38.bin", @@ -1467,8 +1516,9 @@ { "name": "model.layers.12.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -1476,7 +1526,7 @@ "byteOffset": 0 } ], - "md5sum": "ab3a640f607c6231dad6d3876fc9c3c6" + "md5sum": "92b1b101abdb8659462200361f87a217" }, { "dataPath": "params_shard_39.bin", @@ -1486,8 +1536,9 @@ { "name": "model.layers.12.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -1507,8 +1558,9 @@ { "name": "model.layers.12.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -1556,7 +1608,7 @@ "byteOffset": 26553856 } ], - "md5sum": "1b257a510c658329cc44db1a3c53f44e" + "md5sum": "1f26220eb2e5218e6eddaea808f48906" }, { "dataPath": "params_shard_40.bin", @@ -1566,8 +1618,9 @@ { "name": "model.layers.13.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -1575,7 +1628,7 @@ "byteOffset": 0 } ], - "md5sum": "febc040a077d61705b0a3667c3fb9853" + "md5sum": "8f7d579dbd3896b0c9d8494e2c09eebf" }, { "dataPath": "params_shard_41.bin", @@ -1585,8 +1638,9 @@ { "name": "model.layers.13.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -1594,7 +1648,7 @@ "byteOffset": 0 } ], - "md5sum": "853ae9f4dbbcac2c368d8a2f9321b28a" + "md5sum": "2a0b1ad1f526b5f9470c817375c5076d" }, { "dataPath": "params_shard_42.bin", @@ -1604,8 +1658,9 @@ { "name": "model.layers.13.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -1625,8 +1680,9 @@ { "name": "model.layers.13.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -1674,7 +1730,7 @@ "byteOffset": 26553856 } ], - "md5sum": "0dbe8eb371670e37b638a449bea10bb0" + "md5sum": "d522f399e9ccfd0a2f58a7a6a0c6e462" }, { "dataPath": "params_shard_43.bin", @@ -1684,8 +1740,9 @@ { "name": "model.layers.14.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -1693,7 +1750,7 @@ "byteOffset": 0 } ], - "md5sum": "7dd010aba35552f2e2ab504ee18845cf" + "md5sum": "c6676fadd0632de78bfe771174beab8f" }, { "dataPath": "params_shard_44.bin", @@ -1703,8 +1760,9 @@ { "name": "model.layers.14.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -1712,7 +1770,7 @@ "byteOffset": 0 } ], - "md5sum": "c8d9a8daa95f822367906f785c7578f6" + "md5sum": "3661b070dd1ce346158fe37712343dc9" }, { "dataPath": "params_shard_45.bin", @@ -1722,8 +1780,9 @@ { "name": "model.layers.14.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -1743,8 +1802,9 @@ { "name": "model.layers.14.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -1792,7 +1852,7 @@ "byteOffset": 26553856 } ], - "md5sum": "70cf14853c1343f91f0d537ad179d9a2" + "md5sum": "246547e55187bb51c535ebe86e3bb372" }, { "dataPath": "params_shard_46.bin", @@ -1802,8 +1862,9 @@ { "name": "model.layers.15.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -1811,7 +1872,7 @@ "byteOffset": 0 } ], - "md5sum": "bc98551dae52e3f7fe2bd62fb99c104b" + "md5sum": "9ce899f8c2d657574735a59d716e8c1c" }, { "dataPath": "params_shard_47.bin", @@ -1821,8 +1882,9 @@ { "name": "model.layers.15.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -1830,7 +1892,7 @@ "byteOffset": 0 } ], - "md5sum": "ff94bc212abe9781a21199d18a9b5e05" + "md5sum": "ba08ef438f054198bdbc7585c4ce41d4" }, { "dataPath": "params_shard_48.bin", @@ -1840,8 +1902,9 @@ { "name": "model.layers.15.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -1861,8 +1924,9 @@ { "name": "model.layers.15.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -1910,7 +1974,7 @@ "byteOffset": 26553856 } ], - "md5sum": "6fedd089bca8a3eeeb9c42764d2302ea" + "md5sum": "6dfac609e502c0c105885c2fd5e8c2d8" }, { "dataPath": "params_shard_49.bin", @@ -1920,8 +1984,9 @@ { "name": "model.layers.16.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -1929,7 +1994,7 @@ "byteOffset": 0 } ], - "md5sum": "81a55a18aaa3470867fd62a62507101b" + "md5sum": "c275f2697847e268b9247bade0c9f0f7" }, { "dataPath": "params_shard_50.bin", @@ -1939,8 +2004,9 @@ { "name": "model.layers.16.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -1948,7 +2014,7 @@ "byteOffset": 0 } ], - "md5sum": "43a71c09897814f3c6ccae465cb6c8da" + "md5sum": "72a5cb8a3c3f6d29092373adb2986cbd" }, { "dataPath": "params_shard_51.bin", @@ -1958,8 +2024,9 @@ { "name": "model.layers.16.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -1979,8 +2046,9 @@ { "name": "model.layers.16.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -2028,7 +2096,7 @@ "byteOffset": 26553856 } ], - "md5sum": "c1ec00b97492f1c186a1c13e39976925" + "md5sum": "cc7a26b6e087aee4454259a8886779eb" }, { "dataPath": "params_shard_52.bin", @@ -2038,8 +2106,9 @@ { "name": "model.layers.17.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -2047,7 +2116,7 @@ "byteOffset": 0 } ], - "md5sum": "e91dd21103cd927ccb949c54148b8fcd" + "md5sum": "4c9b5d50dbb7f79af445566b8704f67a" }, { "dataPath": "params_shard_53.bin", @@ -2057,8 +2126,9 @@ { "name": "model.layers.17.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -2066,7 +2136,7 @@ "byteOffset": 0 } ], - "md5sum": "7efcd7e548234a864cc2fc57871fbb6c" + "md5sum": "d7eb3e656935e1f03d3900f4c43712d6" }, { "dataPath": "params_shard_54.bin", @@ -2076,8 +2146,9 @@ { "name": "model.layers.17.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -2097,8 +2168,9 @@ { "name": "model.layers.17.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -2146,7 +2218,7 @@ "byteOffset": 26553856 } ], - "md5sum": "a838a67a1a4ea4cf245d41af0758b2f3" + "md5sum": "e38b6776283187bccba898f2e14f9d07" }, { "dataPath": "params_shard_55.bin", @@ -2156,8 +2228,9 @@ { "name": "model.layers.18.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -2165,7 +2238,7 @@ "byteOffset": 0 } ], - "md5sum": "b89849c76c686b16ec268c3a7e359848" + "md5sum": "79459b9878ebf4a7319db8e48e363085" }, { "dataPath": "params_shard_56.bin", @@ -2175,8 +2248,9 @@ { "name": "model.layers.18.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -2184,7 +2258,7 @@ "byteOffset": 0 } ], - "md5sum": "8fa78290cc7d704f55e18e1db08e1fd2" + "md5sum": "4ccad088e94f33dab7984c1f11d93b35" }, { "dataPath": "params_shard_57.bin", @@ -2194,8 +2268,9 @@ { "name": "model.layers.18.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -2215,8 +2290,9 @@ { "name": "model.layers.18.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -2264,7 +2340,7 @@ "byteOffset": 26553856 } ], - "md5sum": "a4c6a7f40b2465c0ac1fded661827b11" + "md5sum": "9e0b3d2b3859dc4c75fcb23538d6a896" }, { "dataPath": "params_shard_58.bin", @@ -2274,8 +2350,9 @@ { "name": "model.layers.19.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -2283,7 +2360,7 @@ "byteOffset": 0 } ], - "md5sum": "74ca6dc81d52666d32e331cf993c9e96" + "md5sum": "bd7d9ef55bc311d9e9c5b276cb534e38" }, { "dataPath": "params_shard_59.bin", @@ -2293,8 +2370,9 @@ { "name": "model.layers.19.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -2302,7 +2380,7 @@ "byteOffset": 0 } ], - "md5sum": "8ebfc95d51d7773c1e9a03d3bac333e7" + "md5sum": "db4a7469a94496bee8ee68dd7aa9c148" }, { "dataPath": "params_shard_60.bin", @@ -2312,8 +2390,9 @@ { "name": "model.layers.19.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -2333,8 +2412,9 @@ { "name": "model.layers.19.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -2382,7 +2462,7 @@ "byteOffset": 26553856 } ], - "md5sum": "bf4d611409e241b06b15a753b895aedd" + "md5sum": "6d0fc88a3784ec33544e68fbf7f9b4f8" }, { "dataPath": "params_shard_61.bin", @@ -2392,8 +2472,9 @@ { "name": "model.layers.20.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -2401,7 +2482,7 @@ "byteOffset": 0 } ], - "md5sum": "f7dbd70d69a5e3bdbc56092e31287a8b" + "md5sum": "ba927b0c6d6760dc005757dac781d7d5" }, { "dataPath": "params_shard_62.bin", @@ -2411,8 +2492,9 @@ { "name": "model.layers.20.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -2420,7 +2502,7 @@ "byteOffset": 0 } ], - "md5sum": "d2f7569f781a764837fb7d3fd78f961a" + "md5sum": "dff9c6708c3ba41992e06e6174793a3d" }, { "dataPath": "params_shard_63.bin", @@ -2430,8 +2512,9 @@ { "name": "model.layers.20.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -2451,8 +2534,9 @@ { "name": "model.layers.20.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -2500,7 +2584,7 @@ "byteOffset": 26553856 } ], - "md5sum": "6c81716520bd261b464893bf5adef2a7" + "md5sum": "b3ab38d6e058abb322b3287a242ae42a" }, { "dataPath": "params_shard_64.bin", @@ -2510,8 +2594,9 @@ { "name": "model.layers.21.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -2519,7 +2604,7 @@ "byteOffset": 0 } ], - "md5sum": "a894c267f0e4b1b87f222a6d007ec8c8" + "md5sum": "9e0d0d730dba5d0e0c90d1fd6bde988f" }, { "dataPath": "params_shard_65.bin", @@ -2529,8 +2614,9 @@ { "name": "model.layers.21.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -2538,7 +2624,7 @@ "byteOffset": 0 } ], - "md5sum": "75e5b87c1fe31d8691bff47b00c6765a" + "md5sum": "629e70cd777288ab4b97d4fb91e915bb" }, { "dataPath": "params_shard_66.bin", @@ -2548,8 +2634,9 @@ { "name": "model.layers.21.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -2569,8 +2656,9 @@ { "name": "model.layers.21.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -2618,7 +2706,7 @@ "byteOffset": 26553856 } ], - "md5sum": "89bd9812065fd711121756e3718ac20d" + "md5sum": "0d77eb414fb2b96cbd308e8688fb1ab2" }, { "dataPath": "params_shard_67.bin", @@ -2628,8 +2716,9 @@ { "name": "model.layers.22.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -2637,7 +2726,7 @@ "byteOffset": 0 } ], - "md5sum": "8071940b11f71673dcd303f815ab1488" + "md5sum": "caa575faa36289686d51457ac83fc935" }, { "dataPath": "params_shard_68.bin", @@ -2647,8 +2736,9 @@ { "name": "model.layers.22.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -2656,7 +2746,7 @@ "byteOffset": 0 } ], - "md5sum": "db5ba8be7b0b6fb79bde3c1694245e94" + "md5sum": "8e102a68ecfad554323dc75678b948e5" }, { "dataPath": "params_shard_69.bin", @@ -2666,8 +2756,9 @@ { "name": "model.layers.22.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -2687,8 +2778,9 @@ { "name": "model.layers.22.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -2736,7 +2828,7 @@ "byteOffset": 26553856 } ], - "md5sum": "1596639d5aed9dd04969ef3f6f514c3e" + "md5sum": "483363a6a64146f8ccc79e6889f220e1" }, { "dataPath": "params_shard_70.bin", @@ -2746,8 +2838,9 @@ { "name": "model.layers.23.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -2755,7 +2848,7 @@ "byteOffset": 0 } ], - "md5sum": "898ac371c479aa647b2bce35b1621f5c" + "md5sum": "e361049c58cea5250b73b95fbf57fec1" }, { "dataPath": "params_shard_71.bin", @@ -2765,8 +2858,9 @@ { "name": "model.layers.23.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -2774,7 +2868,7 @@ "byteOffset": 0 } ], - "md5sum": "f0c27de13c71f7d3f3b544c3e3cd530a" + "md5sum": "2a37b42f271d984c02f33986cdc546ec" }, { "dataPath": "params_shard_72.bin", @@ -2784,8 +2878,9 @@ { "name": "model.layers.23.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -2805,8 +2900,9 @@ { "name": "model.layers.23.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -2854,7 +2950,7 @@ "byteOffset": 26553856 } ], - "md5sum": "35f196679e29df198abcb91097247e19" + "md5sum": "ae4b220a01e6371dfd63c7bf32a6d8fc" }, { "dataPath": "params_shard_73.bin", @@ -2864,8 +2960,9 @@ { "name": "model.layers.24.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -2873,7 +2970,7 @@ "byteOffset": 0 } ], - "md5sum": "18fe487f453290c4dc0c8a9da16bb5b2" + "md5sum": "9f9be8674496ec3058a070e08850acc2" }, { "dataPath": "params_shard_74.bin", @@ -2883,8 +2980,9 @@ { "name": "model.layers.24.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -2892,7 +2990,7 @@ "byteOffset": 0 } ], - "md5sum": "a5a224772cd6e804fdc02ca644d87ecc" + "md5sum": "d956b4b0518d7760c83f3687646180bf" }, { "dataPath": "params_shard_75.bin", @@ -2902,8 +3000,9 @@ { "name": "model.layers.24.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -2923,8 +3022,9 @@ { "name": "model.layers.24.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -2972,7 +3072,7 @@ "byteOffset": 26553856 } ], - "md5sum": "acde40c7dca1b3b83c25596be496f2df" + "md5sum": "76382836c253a1a62436c762d2fcf268" }, { "dataPath": "params_shard_76.bin", @@ -2982,8 +3082,9 @@ { "name": "model.layers.25.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -2991,7 +3092,7 @@ "byteOffset": 0 } ], - "md5sum": "881c17f3a6a55c566680694cc7b16e76" + "md5sum": "d11abf7988d3cc8dd4c4ec43cb70b5fb" }, { "dataPath": "params_shard_77.bin", @@ -3001,8 +3102,9 @@ { "name": "model.layers.25.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -3010,7 +3112,7 @@ "byteOffset": 0 } ], - "md5sum": "ab9dd15bd3dcb15f8bf2f40b8abebec6" + "md5sum": "eb8375d3e7a5f91881f0935703a6ac70" }, { "dataPath": "params_shard_78.bin", @@ -3020,8 +3122,9 @@ { "name": "model.layers.25.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -3041,8 +3144,9 @@ { "name": "model.layers.25.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -3090,7 +3194,7 @@ "byteOffset": 26553856 } ], - "md5sum": "adec2a133d14574602301911112081aa" + "md5sum": "c48ed2d90fb4a7533c8d5a43009c567a" }, { "dataPath": "params_shard_79.bin", @@ -3100,8 +3204,9 @@ { "name": "model.layers.26.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -3109,7 +3214,7 @@ "byteOffset": 0 } ], - "md5sum": "109af8773bb273d0774768f572d3dce8" + "md5sum": "7c77d9d534ff83575dec661b2ddacdd7" }, { "dataPath": "params_shard_80.bin", @@ -3119,8 +3224,9 @@ { "name": "model.layers.26.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -3128,7 +3234,7 @@ "byteOffset": 0 } ], - "md5sum": "de0ab6c160e53c4005a30b41c4bc7fb1" + "md5sum": "6205311e54c1795e2fdea2e35b2e147d" }, { "dataPath": "params_shard_81.bin", @@ -3138,8 +3244,9 @@ { "name": "model.layers.26.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -3159,8 +3266,9 @@ { "name": "model.layers.26.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -3208,7 +3316,7 @@ "byteOffset": 26553856 } ], - "md5sum": "a2b79e938188d75e3a4664b4ca55afef" + "md5sum": "1072cf06f6fdb2fe4605eb6c689910bf" }, { "dataPath": "params_shard_82.bin", @@ -3218,8 +3326,9 @@ { "name": "model.layers.27.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -3227,7 +3336,7 @@ "byteOffset": 0 } ], - "md5sum": "a445ac1deff5f613259243e041241016" + "md5sum": "9e4e8b79f9daae76b5ffd834d039780e" }, { "dataPath": "params_shard_83.bin", @@ -3237,8 +3346,9 @@ { "name": "model.layers.27.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -3246,7 +3356,7 @@ "byteOffset": 0 } ], - "md5sum": "6ef27f5f50bd9c9fdc06fe726ada58ab" + "md5sum": "85909e335659878f76729604d3582607" }, { "dataPath": "params_shard_84.bin", @@ -3256,8 +3366,9 @@ { "name": "model.layers.27.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -3277,8 +3388,9 @@ { "name": "model.layers.27.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -3326,7 +3438,7 @@ "byteOffset": 26553856 } ], - "md5sum": "fb3e1e74c6adf5546f93f1e352de03a4" + "md5sum": "25dcc058ac7c3a40e442530d259d36c8" }, { "dataPath": "params_shard_85.bin", @@ -3336,8 +3448,9 @@ { "name": "model.layers.28.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -3345,7 +3458,7 @@ "byteOffset": 0 } ], - "md5sum": "1b8581cc5827a861b015ca2e42c21005" + "md5sum": "48f80950335a0b951dee3276ec443bf4" }, { "dataPath": "params_shard_86.bin", @@ -3355,8 +3468,9 @@ { "name": "model.layers.28.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -3364,7 +3478,7 @@ "byteOffset": 0 } ], - "md5sum": "0a33be54252c65195188c950f264ce0e" + "md5sum": "98dd8e97ff619a80580894f4214b5c6a" }, { "dataPath": "params_shard_87.bin", @@ -3374,8 +3488,9 @@ { "name": "model.layers.28.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -3395,8 +3510,9 @@ { "name": "model.layers.28.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -3444,7 +3560,7 @@ "byteOffset": 26553856 } ], - "md5sum": "efe50e78b2671f15192eaf83007d436f" + "md5sum": "a22ec67acfe6cbcb2497dffad6a9cf4d" }, { "dataPath": "params_shard_88.bin", @@ -3454,8 +3570,9 @@ { "name": "model.layers.29.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -3463,7 +3580,7 @@ "byteOffset": 0 } ], - "md5sum": "775f5326f0a92a7bf095e9a20b217c4a" + "md5sum": "b16bce7494665760f4ac18f14375fa4d" }, { "dataPath": "params_shard_89.bin", @@ -3473,8 +3590,9 @@ { "name": "model.layers.29.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -3482,7 +3600,7 @@ "byteOffset": 0 } ], - "md5sum": "193e733526e9cc9bdaf36b4f0069cd39" + "md5sum": "0e8fe2e5322812016be52da8f3993fe0" }, { "dataPath": "params_shard_90.bin", @@ -3492,8 +3610,9 @@ { "name": "model.layers.29.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -3513,8 +3632,9 @@ { "name": "model.layers.29.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -3562,7 +3682,7 @@ "byteOffset": 26553856 } ], - "md5sum": "b4c616377f183886cbbb7ae44e2dd478" + "md5sum": "5775462c018641678b39fa117f709b35" }, { "dataPath": "params_shard_91.bin", @@ -3572,8 +3692,9 @@ { "name": "model.layers.30.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -3581,7 +3702,7 @@ "byteOffset": 0 } ], - "md5sum": "c986e29c1e904e7ba89ffc8355dba865" + "md5sum": "e5733d57f58480120e28c6224ed8b937" }, { "dataPath": "params_shard_92.bin", @@ -3591,8 +3712,9 @@ { "name": "model.layers.30.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -3600,7 +3722,7 @@ "byteOffset": 0 } ], - "md5sum": "634fde97a6ff4d5fe88adb63bc28bd49" + "md5sum": "67c2dc4f7671056881f425348c63ae6b" }, { "dataPath": "params_shard_93.bin", @@ -3610,8 +3732,9 @@ { "name": "model.layers.30.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -3631,8 +3754,9 @@ { "name": "model.layers.30.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -3680,7 +3804,7 @@ "byteOffset": 26553856 } ], - "md5sum": "b8ac916b4d72a94ccd575dd4c4b59a34" + "md5sum": "57a6dc175184fda2aa35e7f0c6fd2c9f" }, { "dataPath": "params_shard_94.bin", @@ -3690,8 +3814,9 @@ { "name": "model.layers.31.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -3699,7 +3824,7 @@ "byteOffset": 0 } ], - "md5sum": "4a026e9468954e66f8e198f1fbe6b0ee" + "md5sum": "f77dd52fc50f15add1744fc3711fdae2" }, { "dataPath": "params_shard_95.bin", @@ -3709,8 +3834,9 @@ { "name": "model.layers.31.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -3718,7 +3844,7 @@ "byteOffset": 0 } ], - "md5sum": "f0146fd21f831ca3b6a0bc019db184ee" + "md5sum": "2956253c32bb4a22b0b9b9ad7a5c54b1" }, { "dataPath": "params_shard_96.bin", @@ -3728,8 +3854,9 @@ { "name": "model.layers.31.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -3749,8 +3876,9 @@ { "name": "model.layers.31.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -3798,7 +3926,7 @@ "byteOffset": 26553856 } ], - "md5sum": "e91bb4ddee6b2faf6b29f1bb17ce3a33" + "md5sum": "93781e6b51b095b4587863900a1bf550" }, { "dataPath": "params_shard_97.bin", @@ -3808,8 +3936,9 @@ { "name": "model.layers.32.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -3817,7 +3946,7 @@ "byteOffset": 0 } ], - "md5sum": "4d4b626afb01e4f7c744a64b58ee65da" + "md5sum": "ecf71575931fe643d5e9ae291d51ef5b" }, { "dataPath": "params_shard_98.bin", @@ -3827,8 +3956,9 @@ { "name": "model.layers.32.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -3836,7 +3966,7 @@ "byteOffset": 0 } ], - "md5sum": "bf6c528ba2026ae46a36d58ac0c15154" + "md5sum": "49e4ff78febf30823b75d444def9b802" }, { "dataPath": "params_shard_99.bin", @@ -3846,8 +3976,9 @@ { "name": "model.layers.32.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -3867,8 +3998,9 @@ { "name": "model.layers.32.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -3916,7 +4048,7 @@ "byteOffset": 26553856 } ], - "md5sum": "a44252c6a7d5f7313fa7ce980946ecb0" + "md5sum": "ec8290191b8b377aa7ef5dff6a9190c2" }, { "dataPath": "params_shard_100.bin", @@ -3926,8 +4058,9 @@ { "name": "model.layers.33.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -3935,7 +4068,7 @@ "byteOffset": 0 } ], - "md5sum": "75556cc8aaf00f34b82da8fb3e12a9db" + "md5sum": "662098a8ed0c194ee4dac0d3d3563eb4" }, { "dataPath": "params_shard_101.bin", @@ -3945,8 +4078,9 @@ { "name": "model.layers.33.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -3954,7 +4088,7 @@ "byteOffset": 0 } ], - "md5sum": "8b1302cd2a9c514a1221e8d6c7f28911" + "md5sum": "f4d8921af5ba8c72eda4517a4775f1fd" }, { "dataPath": "params_shard_102.bin", @@ -3964,8 +4098,9 @@ { "name": "model.layers.33.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -3985,8 +4120,9 @@ { "name": "model.layers.33.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -4034,7 +4170,7 @@ "byteOffset": 26553856 } ], - "md5sum": "93e8d0ab9193f4e8856d80423938f699" + "md5sum": "0fc91bfb07024cb7e52038ed0eb405e3" }, { "dataPath": "params_shard_103.bin", @@ -4044,8 +4180,9 @@ { "name": "model.layers.34.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -4053,7 +4190,7 @@ "byteOffset": 0 } ], - "md5sum": "e985f2da0ea10f0c2a917e9c9bbc97f5" + "md5sum": "ee42f7e06c5e030af2e9060bec00b07e" }, { "dataPath": "params_shard_104.bin", @@ -4063,8 +4200,9 @@ { "name": "model.layers.34.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -4072,7 +4210,7 @@ "byteOffset": 0 } ], - "md5sum": "3569d81bd0f475446a7a12841db57284" + "md5sum": "37851961d593aec1293af31e3863bb09" }, { "dataPath": "params_shard_105.bin", @@ -4082,8 +4220,9 @@ { "name": "model.layers.34.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -4103,8 +4242,9 @@ { "name": "model.layers.34.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -4152,7 +4292,7 @@ "byteOffset": 26553856 } ], - "md5sum": "0bc77b2fdf34fdb9a9d8a833b3ddadac" + "md5sum": "fa179e7167e9987fdc8fd87a00e5b157" }, { "dataPath": "params_shard_106.bin", @@ -4162,8 +4302,9 @@ { "name": "model.layers.35.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -4171,7 +4312,7 @@ "byteOffset": 0 } ], - "md5sum": "3ac59afff684fa729d6ab82f46ab262d" + "md5sum": "8110c074d89ab2450d6bdf7cbd934d5a" }, { "dataPath": "params_shard_107.bin", @@ -4181,8 +4322,9 @@ { "name": "model.layers.35.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -4190,7 +4332,7 @@ "byteOffset": 0 } ], - "md5sum": "4d6a110621762882f75714e4b81d4bbc" + "md5sum": "a6431de1ef25b2e2002170c672027233" }, { "dataPath": "params_shard_108.bin", @@ -4200,8 +4342,9 @@ { "name": "model.layers.35.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -4221,8 +4364,9 @@ { "name": "model.layers.35.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -4270,7 +4414,7 @@ "byteOffset": 26553856 } ], - "md5sum": "0c499ffabe5602f7a15c86357b760797" + "md5sum": "3ace05f99fe6ee154d1f8d6932fc1e91" }, { "dataPath": "params_shard_109.bin", @@ -4280,8 +4424,9 @@ { "name": "model.layers.36.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -4289,7 +4434,7 @@ "byteOffset": 0 } ], - "md5sum": "43ce8b0bf2e5829b11f34cc18e1ca8cd" + "md5sum": "3427cf89f591ea7feade8973f84c88cf" }, { "dataPath": "params_shard_110.bin", @@ -4299,8 +4444,9 @@ { "name": "model.layers.36.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -4308,7 +4454,7 @@ "byteOffset": 0 } ], - "md5sum": "9b1ec4a3f992125c3b0e917e07bbb162" + "md5sum": "13b4c4175befdec1edcd21fc143d1144" }, { "dataPath": "params_shard_111.bin", @@ -4318,8 +4464,9 @@ { "name": "model.layers.36.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -4339,8 +4486,9 @@ { "name": "model.layers.36.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -4388,7 +4536,7 @@ "byteOffset": 26553856 } ], - "md5sum": "d6fa19d981be4c6267c02311f23e7f61" + "md5sum": "5466aa1db4c7be054f8b3621f1c4aee0" }, { "dataPath": "params_shard_112.bin", @@ -4398,8 +4546,9 @@ { "name": "model.layers.37.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -4407,7 +4556,7 @@ "byteOffset": 0 } ], - "md5sum": "0af122ac697c8ed599e91552782e47fa" + "md5sum": "cbb4d16f02b88014d43a407746ef86a8" }, { "dataPath": "params_shard_113.bin", @@ -4417,8 +4566,9 @@ { "name": "model.layers.37.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -4426,7 +4576,7 @@ "byteOffset": 0 } ], - "md5sum": "2dc4f3661fe397133bcbe20517d43ee2" + "md5sum": "620e82519973787c6337e7ce53bf67f3" }, { "dataPath": "params_shard_114.bin", @@ -4436,8 +4586,9 @@ { "name": "model.layers.37.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -4457,8 +4608,9 @@ { "name": "model.layers.37.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -4506,7 +4658,7 @@ "byteOffset": 26553856 } ], - "md5sum": "6f652f01885aec0ec1d43438a3cba8cd" + "md5sum": "a6f8775657d221cd38422e5e1cac3c65" }, { "dataPath": "params_shard_115.bin", @@ -4516,8 +4668,9 @@ { "name": "model.layers.38.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -4525,7 +4678,7 @@ "byteOffset": 0 } ], - "md5sum": "76cbc4536d88af65693b896e403af9b5" + "md5sum": "a51e4cf5ee7a46418ba61b758114a32b" }, { "dataPath": "params_shard_116.bin", @@ -4535,8 +4688,9 @@ { "name": "model.layers.38.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -4544,7 +4698,7 @@ "byteOffset": 0 } ], - "md5sum": "f1e895b5330d603f13e6333a9d82d660" + "md5sum": "8f522f0dfb10a3b2e5aa79f71d8aa484" }, { "dataPath": "params_shard_117.bin", @@ -4554,8 +4708,9 @@ { "name": "model.layers.38.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -4575,8 +4730,9 @@ { "name": "model.layers.38.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -4624,7 +4780,7 @@ "byteOffset": 26553856 } ], - "md5sum": "b8dc6bf1d85ce53cedb6673cea076299" + "md5sum": "41a67979340df891bf9d7654eca90dd2" }, { "dataPath": "params_shard_118.bin", @@ -4634,8 +4790,9 @@ { "name": "model.layers.39.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -4643,7 +4800,7 @@ "byteOffset": 0 } ], - "md5sum": "9e2bc5756551044e635b41f52f9b4177" + "md5sum": "98de39d715ce5fe46b8262fc8737672f" }, { "dataPath": "params_shard_119.bin", @@ -4653,8 +4810,9 @@ { "name": "model.layers.39.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -4662,7 +4820,7 @@ "byteOffset": 0 } ], - "md5sum": "72d705f7b4e7211b26378200ca803f8a" + "md5sum": "72f407b7352d86cd3c8b8bb6ca55366a" }, { "dataPath": "params_shard_120.bin", @@ -4672,8 +4830,9 @@ { "name": "model.layers.39.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -4693,8 +4852,9 @@ { "name": "model.layers.39.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -4742,7 +4902,7 @@ "byteOffset": 26553856 } ], - "md5sum": "ad568aa6c82002b73142573d03a220b2" + "md5sum": "b741247f0f11e3e0da10d831ab3a7487" }, { "dataPath": "params_shard_121.bin", @@ -4752,8 +4912,9 @@ { "name": "model.layers.40.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -4761,7 +4922,7 @@ "byteOffset": 0 } ], - "md5sum": "2c158df95aba5b611fe7c7dd4431b1d6" + "md5sum": "ee47ffabcc4a80eb7fc825d510220e9b" }, { "dataPath": "params_shard_122.bin", @@ -4771,8 +4932,9 @@ { "name": "model.layers.40.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -4780,7 +4942,7 @@ "byteOffset": 0 } ], - "md5sum": "e2a18ae6943d9d98a1d872702af1c910" + "md5sum": "03d7934168188df230bd36ec838b97c2" }, { "dataPath": "params_shard_123.bin", @@ -4790,8 +4952,9 @@ { "name": "model.layers.40.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -4811,8 +4974,9 @@ { "name": "model.layers.40.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -4860,7 +5024,7 @@ "byteOffset": 26553856 } ], - "md5sum": "42a404aa420bc00b1f5354c7ac663255" + "md5sum": "93e73f3e143079a89115d91c26a7dc63" }, { "dataPath": "params_shard_124.bin", @@ -4870,8 +5034,9 @@ { "name": "model.layers.41.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -4879,7 +5044,7 @@ "byteOffset": 0 } ], - "md5sum": "4253a7adf9ba97cfc775ee6f161fbf60" + "md5sum": "296836b587a32c2582502a2b37c75ac6" }, { "dataPath": "params_shard_125.bin", @@ -4889,8 +5054,9 @@ { "name": "model.layers.41.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -4898,7 +5064,7 @@ "byteOffset": 0 } ], - "md5sum": "ea771bac8638e0468c9744a35036f25e" + "md5sum": "721d7b768fa1a9bf27810fb8b5052f63" }, { "dataPath": "params_shard_126.bin", @@ -4908,8 +5074,9 @@ { "name": "model.layers.41.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -4929,8 +5096,9 @@ { "name": "model.layers.41.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -4978,7 +5146,7 @@ "byteOffset": 26553856 } ], - "md5sum": "38af81cc881eef6f178c598b04b41e07" + "md5sum": "86885fe73453642a44f1d631736dd1a8" }, { "dataPath": "params_shard_127.bin", @@ -4988,8 +5156,9 @@ { "name": "model.layers.42.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -4997,7 +5166,7 @@ "byteOffset": 0 } ], - "md5sum": "462684e5544819be14051c776db27094" + "md5sum": "da0257f208c1ada5e05a1cf5f2f6c5d6" }, { "dataPath": "params_shard_128.bin", @@ -5007,8 +5176,9 @@ { "name": "model.layers.42.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -5016,7 +5186,7 @@ "byteOffset": 0 } ], - "md5sum": "fda84c5878d14c51281dfcf5c6ad27dd" + "md5sum": "baf61197f1557e2e1ff37bcf380b6610" }, { "dataPath": "params_shard_129.bin", @@ -5026,8 +5196,9 @@ { "name": "model.layers.42.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -5047,8 +5218,9 @@ { "name": "model.layers.42.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -5096,7 +5268,7 @@ "byteOffset": 26553856 } ], - "md5sum": "818f3b20433ae46b61557906b7a2ffa1" + "md5sum": "d007a7f9f4fad954b41f286436cb1a97" }, { "dataPath": "params_shard_130.bin", @@ -5106,8 +5278,9 @@ { "name": "model.layers.43.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -5115,7 +5288,7 @@ "byteOffset": 0 } ], - "md5sum": "45a38fcf43b6e08112813786b040b360" + "md5sum": "9a524cfec5855788648c9244f4da2b9f" }, { "dataPath": "params_shard_131.bin", @@ -5125,8 +5298,9 @@ { "name": "model.layers.43.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -5134,7 +5308,7 @@ "byteOffset": 0 } ], - "md5sum": "755d64055a40d57b909274ad6b3828a7" + "md5sum": "260b68dfa238b509251c22d8d08192c6" }, { "dataPath": "params_shard_132.bin", @@ -5144,8 +5318,9 @@ { "name": "model.layers.43.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -5165,8 +5340,9 @@ { "name": "model.layers.43.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -5214,7 +5390,7 @@ "byteOffset": 26553856 } ], - "md5sum": "843ea2667755f0798b16291527bd98da" + "md5sum": "50c9fd65189fb634ebaa2e4a5d2ef0e2" }, { "dataPath": "params_shard_133.bin", @@ -5224,8 +5400,9 @@ { "name": "model.layers.44.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -5233,7 +5410,7 @@ "byteOffset": 0 } ], - "md5sum": "792ba4115be13409b85f960084232872" + "md5sum": "124e4ce41ca518c5bb5dbeed5f075e51" }, { "dataPath": "params_shard_134.bin", @@ -5243,8 +5420,9 @@ { "name": "model.layers.44.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -5252,7 +5430,7 @@ "byteOffset": 0 } ], - "md5sum": "3a606eb28ad3f6b6ca0d2fe11c49b926" + "md5sum": "115fb369207d68ab23324e4192dc8896" }, { "dataPath": "params_shard_135.bin", @@ -5262,8 +5440,9 @@ { "name": "model.layers.44.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -5283,8 +5462,9 @@ { "name": "model.layers.44.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -5332,7 +5512,7 @@ "byteOffset": 26553856 } ], - "md5sum": "c0bb745ec8c6101dcf80009693e7bcaf" + "md5sum": "291e158a175f78ec536af06e9f53580f" }, { "dataPath": "params_shard_136.bin", @@ -5342,8 +5522,9 @@ { "name": "model.layers.45.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -5351,7 +5532,7 @@ "byteOffset": 0 } ], - "md5sum": "e4bd7de020af7c1a36f9088a940d9e02" + "md5sum": "47b5ebed20a02f2b00fe7d8a38bb2c32" }, { "dataPath": "params_shard_137.bin", @@ -5361,8 +5542,9 @@ { "name": "model.layers.45.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -5370,7 +5552,7 @@ "byteOffset": 0 } ], - "md5sum": "43d665200643dfafb8b443459691b986" + "md5sum": "5c9d8122da6125a203abe3af80aeaaa3" }, { "dataPath": "params_shard_138.bin", @@ -5380,8 +5562,9 @@ { "name": "model.layers.45.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -5401,8 +5584,9 @@ { "name": "model.layers.45.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -5450,7 +5634,7 @@ "byteOffset": 26553856 } ], - "md5sum": "fd951780b75f7e2afddd557a9335db86" + "md5sum": "53a2e8c6d0112f6418300e5a01d9c1b5" }, { "dataPath": "params_shard_139.bin", @@ -5460,8 +5644,9 @@ { "name": "model.layers.46.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -5469,7 +5654,7 @@ "byteOffset": 0 } ], - "md5sum": "33b5bf363aa61a2c037b05a89cdd6af1" + "md5sum": "b6ad6ec0b6eddc529e1bfffd87de49d4" }, { "dataPath": "params_shard_140.bin", @@ -5479,8 +5664,9 @@ { "name": "model.layers.46.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -5488,7 +5674,7 @@ "byteOffset": 0 } ], - "md5sum": "f41344cbc8adf4a564499a079d7ef626" + "md5sum": "f1044e16390c33bf62e7344080c223ef" }, { "dataPath": "params_shard_141.bin", @@ -5498,8 +5684,9 @@ { "name": "model.layers.46.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -5519,8 +5706,9 @@ { "name": "model.layers.46.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -5568,7 +5756,7 @@ "byteOffset": 26553856 } ], - "md5sum": "f5c8ebc8ccacec9fb86c747506b7cbfe" + "md5sum": "9b498f275ebaa2c5805e54afc9c1448f" }, { "dataPath": "params_shard_142.bin", @@ -5578,8 +5766,9 @@ { "name": "model.layers.47.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -5587,7 +5776,7 @@ "byteOffset": 0 } ], - "md5sum": "93a18592738a7db4051d264fee4a8642" + "md5sum": "b170ef009f14a8f1b766c1f1a468e6be" }, { "dataPath": "params_shard_143.bin", @@ -5597,8 +5786,9 @@ { "name": "model.layers.47.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -5606,7 +5796,7 @@ "byteOffset": 0 } ], - "md5sum": "99a1c02aa6fbbbcefc207693ac99856b" + "md5sum": "8252517b14a26d59c30b4b3ba349c96b" }, { "dataPath": "params_shard_144.bin", @@ -5616,8 +5806,9 @@ { "name": "model.layers.47.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -5637,8 +5828,9 @@ { "name": "model.layers.47.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -5686,7 +5878,7 @@ "byteOffset": 26553856 } ], - "md5sum": "c4747b30cfb030300bb0e5317960d522" + "md5sum": "4559d757f6b5276049b05089d86ae3c4" }, { "dataPath": "params_shard_145.bin", @@ -5696,8 +5888,9 @@ { "name": "model.layers.48.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -5705,7 +5898,7 @@ "byteOffset": 0 } ], - "md5sum": "b8efef507310596128918513c12072cb" + "md5sum": "3935be25181b4b90108ab92fe7f3c78e" }, { "dataPath": "params_shard_146.bin", @@ -5715,8 +5908,9 @@ { "name": "model.layers.48.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -5724,7 +5918,7 @@ "byteOffset": 0 } ], - "md5sum": "e923b49d570044c970aba72de5a2178e" + "md5sum": "fbec991e910c60b08321a536c87e6fe5" }, { "dataPath": "params_shard_147.bin", @@ -5734,8 +5928,9 @@ { "name": "model.layers.48.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -5755,8 +5950,9 @@ { "name": "model.layers.48.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -5804,7 +6000,7 @@ "byteOffset": 26553856 } ], - "md5sum": "2b943dcb6478134e3a3e05900de24d88" + "md5sum": "fc3fbbce684689189c9250c49c52a85b" }, { "dataPath": "params_shard_148.bin", @@ -5814,8 +6010,9 @@ { "name": "model.layers.49.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -5823,7 +6020,7 @@ "byteOffset": 0 } ], - "md5sum": "b186b56d5017f6cb142d1d0166c0279d" + "md5sum": "2fd359033db9141df631b383579ecbb2" }, { "dataPath": "params_shard_149.bin", @@ -5833,8 +6030,9 @@ { "name": "model.layers.49.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -5842,7 +6040,7 @@ "byteOffset": 0 } ], - "md5sum": "5f6e22a53fbef6fac2b2ca7eb32c7356" + "md5sum": "c100ce1397c81c53ad414bbd58f24021" }, { "dataPath": "params_shard_150.bin", @@ -5852,8 +6050,9 @@ { "name": "model.layers.49.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -5873,8 +6072,9 @@ { "name": "model.layers.49.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -5922,7 +6122,7 @@ "byteOffset": 26553856 } ], - "md5sum": "855b7d0bc49f99b6db73d9990e4f195b" + "md5sum": "32ca1d09ab0ea105c0a5158a4e8a6779" }, { "dataPath": "params_shard_151.bin", @@ -5932,8 +6132,9 @@ { "name": "model.layers.50.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -5941,7 +6142,7 @@ "byteOffset": 0 } ], - "md5sum": "4fb58d160c44f6ad94019d83b6e08868" + "md5sum": "f8cfb4a137ce8f6f9d5bb9079407cbf9" }, { "dataPath": "params_shard_152.bin", @@ -5951,8 +6152,9 @@ { "name": "model.layers.50.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -5960,7 +6162,7 @@ "byteOffset": 0 } ], - "md5sum": "e154014218fff8db84216e7463234e2a" + "md5sum": "697e789cdd6f7bdabed3dace30605517" }, { "dataPath": "params_shard_153.bin", @@ -5970,8 +6172,9 @@ { "name": "model.layers.50.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -5991,8 +6194,9 @@ { "name": "model.layers.50.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -6040,7 +6244,7 @@ "byteOffset": 26553856 } ], - "md5sum": "a7c086dab81fca7367ba286c329819b4" + "md5sum": "10d9295ec0d7bc093668831769b084a3" }, { "dataPath": "params_shard_154.bin", @@ -6050,8 +6254,9 @@ { "name": "model.layers.51.mlp.gate_up_proj.weight", "shape": [ - 11520, - 2304 + 90, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -6059,7 +6264,7 @@ "byteOffset": 0 } ], - "md5sum": "d262a020e9610130a7c63e840f188b30" + "md5sum": "81d3fe0c19ffabbac405c1d5f4a2463f" }, { "dataPath": "params_shard_155.bin", @@ -6069,8 +6274,9 @@ { "name": "model.layers.51.mlp.down_proj.weight", "shape": [ - 2304, - 5760 + 18, + 5760, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -6078,7 +6284,7 @@ "byteOffset": 0 } ], - "md5sum": "222a4577f662260d72145a585874ff71" + "md5sum": "283392b9a27feb8c4b902ad7debd6d97" }, { "dataPath": "params_shard_156.bin", @@ -6088,8 +6294,9 @@ { "name": "model.layers.51.self_attn.wqkv_pack.weight", "shape": [ - 3456, - 2304 + 27, + 2304, + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -6109,8 +6316,9 @@ { "name": "model.layers.51.self_attn.o_proj.weight", "shape": [ + 18, 2304, - 2304 + 128 ], "dtype": "float16", "format": "f32-to-bf16", @@ -6168,7 +6376,7 @@ "byteOffset": 26558464 } ], - "md5sum": "1ee3b7c35df372997f67e3224dc89ba3" + "md5sum": "194e5d4075ab2782d9479bc1df591928" } ] } \ No newline at end of file