{ "metadata": { "ParamSize": 825, "ParamBytes": 9893851136.0, "BitsPerParam": 16.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 632496128, "records": [ { "name": "lm_head.weight", "shape": [ 77209, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 632496128, "byteOffset": 0 } ], "md5sum": "1c387f720c6b73a4b5e9a8122967904c" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.14.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "e86a61dee6e802e29642a210c03cced1" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.14.mlp.up_proj.weight", "shape": [ 14336, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "9f579b888cff8c2fdb1603beaa465f2d" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.14.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 0 }, { "name": "model.layers.14.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 14336, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20557824, "byteOffset": 8192 }, { "name": "model.layers.14.mlp.gate_proj.ALinear_train.weight", "shape": [ 14336, 79 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2265088, "byteOffset": 20566016 }, { "name": "model.layers.14.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 717, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5873664, "byteOffset": 22831104 }, { "name": "model.layers.14.mlp.gate_proj.BLinear_train.weight", "shape": [ 79, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 647168, "byteOffset": 28704768 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29351936 } ], "md5sum": "86951865b76e5898a0c1c8bdde048897" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.15.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "7790d76f3b990945ae9ef7b92e419cc8" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 32858112, "records": [ { "name": "model.layers.15.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 14336, 1146 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32858112, "byteOffset": 0 } ], "md5sum": "4e87c6eb93a237d76aa31eebeb93b6f0" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 32858112, "records": [ { "name": "model.layers.15.mlp.up_proj.ALinear_no_train.weight", "shape": [ 14336, 1146 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32858112, "byteOffset": 0 } ], "md5sum": "807d2abbf99bb336902760117fa59a23" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 24428544, "records": [ { "name": "model.layers.14.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 4096, 738 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6045696, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.ALinear_train.weight", "shape": [ 4096, 81 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 6045696 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 6709248 }, { "name": "model.layers.15.mlp.gate_proj.ALinear_train.weight", "shape": [ 14336, 127 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3641344, "byteOffset": 6717440 }, { "name": "model.layers.15.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 1146, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9388032, "byteOffset": 10358784 }, { "name": "model.layers.15.mlp.gate_proj.BLinear_train.weight", "shape": [ 127, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1040384, "byteOffset": 19746816 }, { "name": "model.layers.15.mlp.up_proj.ALinear_train.weight", "shape": [ 14336, 127 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3641344, "byteOffset": 20787200 } ], "md5sum": "094dbb49b8d3f3f9286c0c6e5ec58925" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 28049408, "records": [ { "name": "model.layers.15.mlp.up_proj.BLinear_no_train.weight", "shape": [ 1146, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9388032, "byteOffset": 0 }, { "name": "model.layers.15.mlp.up_proj.BLinear_train.weight", "shape": [ 127, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1040384, "byteOffset": 9388032 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10428416 }, { "name": "model.layers.15.self_attn.k_proj.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10436608 }, { "name": "model.layers.15.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 4096, 1014 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8306688, "byteOffset": 18825216 }, { "name": "model.layers.15.self_attn.o_proj.ALinear_train.weight", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 27131904 } ], "md5sum": "b2d0805ac74280efa3b634153b3d7f8e" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.16.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "fa85ea7670c4ad852459893f65b8afa8" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 45187072, "records": [ { "name": "model.layers.16.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 14336, 1576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45187072, "byteOffset": 0 } ], "md5sum": "150ff306ded0eb755534c9672fb1f2f7" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 31027200, "records": [ { "name": "model.layers.15.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 1014, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8306688, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.o_proj.BLinear_train.weight", "shape": [ 112, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 8306688 }, { "name": "model.layers.15.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 4096, 461 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3776512, "byteOffset": 9224192 }, { "name": "model.layers.15.self_attn.q_proj.ALinear_train.weight", "shape": [ 4096, 51 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 417792, "byteOffset": 13000704 }, { "name": "model.layers.15.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 461, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3776512, "byteOffset": 13418496 }, { "name": "model.layers.15.self_attn.q_proj.BLinear_train.weight", "shape": [ 51, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 417792, "byteOffset": 17195008 }, { "name": "model.layers.15.self_attn.v_proj.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 17612800 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26001408 }, { "name": "model.layers.16.mlp.gate_proj.ALinear_train.weight", "shape": [ 14336, 175 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5017600, "byteOffset": 26009600 } ], "md5sum": "51be3b797432ffae45068fda7b8b91f9" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 45187072, "records": [ { "name": "model.layers.16.mlp.up_proj.ALinear_no_train.weight", "shape": [ 14336, 1576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45187072, "byteOffset": 0 } ], "md5sum": "0655758892d2dd3ec11315de6c700a77" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 32272384, "records": [ { "name": "model.layers.16.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 1576, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12910592, "byteOffset": 0 }, { "name": "model.layers.16.mlp.gate_proj.BLinear_train.weight", "shape": [ 175, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 12910592 }, { "name": "model.layers.16.mlp.up_proj.ALinear_train.weight", "shape": [ 14336, 175 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5017600, "byteOffset": 14344192 }, { "name": "model.layers.16.mlp.up_proj.BLinear_no_train.weight", "shape": [ 1576, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12910592, "byteOffset": 19361792 } ], "md5sum": "410382caf8bea3d363aa5006fa231197" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 25337856, "records": [ { "name": "model.layers.16.mlp.up_proj.BLinear_train.weight", "shape": [ 175, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 0 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 1433600 }, { "name": "model.layers.16.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 1024, 184 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 376832, "byteOffset": 1441792 }, { "name": "model.layers.16.self_attn.k_proj.ALinear_train.weight", "shape": [ 1024, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 1818624 }, { "name": "model.layers.16.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 184, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1507328, "byteOffset": 1859584 }, { "name": "model.layers.16.self_attn.k_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 3366912 }, { "name": "model.layers.16.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 4096, 1014 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8306688, "byteOffset": 3530752 }, { "name": "model.layers.16.self_attn.o_proj.ALinear_train.weight", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 11837440 }, { "name": "model.layers.16.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 1014, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8306688, "byteOffset": 12754944 }, { "name": "model.layers.16.self_attn.o_proj.BLinear_train.weight", "shape": [ 112, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21061632 }, { "name": "model.layers.16.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 4096, 185 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 21979136 }, { "name": "model.layers.16.self_attn.q_proj.ALinear_train.weight", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 23494656 }, { "name": "model.layers.16.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 185, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 23658496 }, { "name": "model.layers.16.self_attn.q_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 25174016 } ], "md5sum": "731afd57487038e0879b8e1ef38ff78a" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.17.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "8a96ac78adea8067573653ec998d7271" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 45187072, "records": [ { "name": "model.layers.17.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 14336, 1576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45187072, "byteOffset": 0 } ], "md5sum": "73e83dea3f5809da8fc92fab59bb812c" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 45187072, "records": [ { "name": "model.layers.17.mlp.up_proj.ALinear_no_train.weight", "shape": [ 14336, 1576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45187072, "byteOffset": 0 } ], "md5sum": "46935aadfc6bc4c70cdd671056716d32" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 32776192, "records": [ { "name": "model.layers.16.self_attn.v_proj.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8388608 }, { "name": "model.layers.17.mlp.gate_proj.ALinear_train.weight", "shape": [ 14336, 175 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5017600, "byteOffset": 8396800 }, { "name": "model.layers.17.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 1576, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12910592, "byteOffset": 13414400 }, { "name": "model.layers.17.mlp.gate_proj.BLinear_train.weight", "shape": [ 175, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 26324992 }, { "name": "model.layers.17.mlp.up_proj.ALinear_train.weight", "shape": [ 14336, 175 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5017600, "byteOffset": 27758592 } ], "md5sum": "749d3e30f0f02e8caf897baad2d8ea7b" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 31969280, "records": [ { "name": "model.layers.17.mlp.up_proj.BLinear_no_train.weight", "shape": [ 1576, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12910592, "byteOffset": 0 }, { "name": "model.layers.17.mlp.up_proj.BLinear_train.weight", "shape": [ 175, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 12910592 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 14344192 }, { "name": "model.layers.17.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 1024, 74 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 151552, "byteOffset": 14352384 }, { "name": "model.layers.17.self_attn.k_proj.ALinear_train.weight", "shape": [ 1024, 8 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14503936 }, { "name": "model.layers.17.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 74, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 606208, "byteOffset": 14520320 }, { "name": "model.layers.17.self_attn.k_proj.BLinear_train.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 15126528 }, { "name": "model.layers.17.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 4096, 738 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6045696, "byteOffset": 15192064 }, { "name": "model.layers.17.self_attn.o_proj.ALinear_train.weight", "shape": [ 4096, 81 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 21237760 }, { "name": "model.layers.17.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 738, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6045696, "byteOffset": 21901312 }, { "name": "model.layers.17.self_attn.o_proj.BLinear_train.weight", "shape": [ 81, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 27947008 }, { "name": "model.layers.17.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 4096, 185 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 28610560 }, { "name": "model.layers.17.self_attn.q_proj.ALinear_train.weight", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 30126080 }, { "name": "model.layers.17.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 185, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 30289920 }, { "name": "model.layers.17.self_attn.q_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 31805440 } ], "md5sum": "6dbf30f4d394be7728b9ae79ffcb1f04" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.18.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "f8a0e4b39d361e79ad696a7acf0f6883" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 45187072, "records": [ { "name": "model.layers.18.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 14336, 1576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45187072, "byteOffset": 0 } ], "md5sum": "f5e9909f886d0ce13aaf11602741a66a" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 45187072, "records": [ { "name": "model.layers.18.mlp.up_proj.ALinear_no_train.weight", "shape": [ 14336, 1576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45187072, "byteOffset": 0 } ], "md5sum": "8e42dbf860b1758fc1c99da8ea515264" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 32776192, "records": [ { "name": "model.layers.17.self_attn.v_proj.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8388608 }, { "name": "model.layers.18.mlp.gate_proj.ALinear_train.weight", "shape": [ 14336, 175 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5017600, "byteOffset": 8396800 }, { "name": "model.layers.18.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 1576, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12910592, "byteOffset": 13414400 }, { "name": "model.layers.18.mlp.gate_proj.BLinear_train.weight", "shape": [ 175, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 26324992 }, { "name": "model.layers.18.mlp.up_proj.ALinear_train.weight", "shape": [ 14336, 175 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5017600, "byteOffset": 27758592 } ], "md5sum": "d53a8c4e31c52b83d7cfed8eeb8ff3d8" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.19.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "65cefe11d4f83347591329bfb5e0015e" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 57544704, "records": [ { "name": "model.layers.19.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 14336, 2007 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 57544704, "byteOffset": 0 } ], "md5sum": "d9e71d36413d8fdd494ec4fae1c6bb66" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 31545344, "records": [ { "name": "model.layers.18.mlp.up_proj.BLinear_no_train.weight", "shape": [ 1576, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12910592, "byteOffset": 0 }, { "name": "model.layers.18.mlp.up_proj.BLinear_train.weight", "shape": [ 175, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 12910592 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 14344192 }, { "name": "model.layers.18.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 1024, 184 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 376832, "byteOffset": 14352384 }, { "name": "model.layers.18.self_attn.k_proj.ALinear_train.weight", "shape": [ 1024, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 14729216 }, { "name": "model.layers.18.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 184, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1507328, "byteOffset": 14770176 }, { "name": "model.layers.18.self_attn.k_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 16277504 }, { "name": "model.layers.18.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 4096, 461 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3776512, "byteOffset": 16441344 }, { "name": "model.layers.18.self_attn.o_proj.ALinear_train.weight", "shape": [ 4096, 51 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 417792, "byteOffset": 20217856 }, { "name": "model.layers.18.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 461, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3776512, "byteOffset": 20635648 }, { "name": "model.layers.18.self_attn.o_proj.BLinear_train.weight", "shape": [ 51, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 417792, "byteOffset": 24412160 }, { "name": "model.layers.18.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 4096, 185 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 24829952 }, { "name": "model.layers.18.self_attn.q_proj.ALinear_train.weight", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 26345472 }, { "name": "model.layers.18.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 185, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 26509312 }, { "name": "model.layers.18.self_attn.q_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 28024832 }, { "name": "model.layers.18.self_attn.v_proj.ALinear_no_train.weight", "shape": [ 1024, 295 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 604160, "byteOffset": 28188672 }, { "name": "model.layers.18.self_attn.v_proj.ALinear_train.weight", "shape": [ 1024, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 28792832 }, { "name": "model.layers.18.self_attn.v_proj.BLinear_no_train.weight", "shape": [ 295, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2416640, "byteOffset": 28858368 }, { "name": "model.layers.18.self_attn.v_proj.BLinear_train.weight", "shape": [ 32, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 31275008 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 31537152 } ], "md5sum": "9465eda6e97ed041864680f2de2d8d97" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 45187072, "records": [ { "name": "model.layers.19.mlp.up_proj.ALinear_no_train.weight", "shape": [ 14336, 1576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45187072, "byteOffset": 0 } ], "md5sum": "71b6856e7e136861f21a55e09f897cd4" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 29642752, "records": [ { "name": "model.layers.19.mlp.gate_proj.ALinear_train.weight", "shape": [ 14336, 222 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6365184, "byteOffset": 0 }, { "name": "model.layers.19.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 2007, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16441344, "byteOffset": 6365184 }, { "name": "model.layers.19.mlp.gate_proj.BLinear_train.weight", "shape": [ 222, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1818624, "byteOffset": 22806528 }, { "name": "model.layers.19.mlp.up_proj.ALinear_train.weight", "shape": [ 14336, 175 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5017600, "byteOffset": 24625152 } ], "md5sum": "31b8c2e7ced536adb304dc2cdf155cc9" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.20.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "87aebce6a8e31dc87311ff062879a275" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 57544704, "records": [ { "name": "model.layers.20.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 14336, 2007 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 57544704, "byteOffset": 0 } ], "md5sum": "a00be81517d741ec6fbdb6ec935f41cd" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 29122560, "records": [ { "name": "model.layers.19.mlp.up_proj.BLinear_no_train.weight", "shape": [ 1576, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12910592, "byteOffset": 0 }, { "name": "model.layers.19.mlp.up_proj.BLinear_train.weight", "shape": [ 175, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 12910592 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 14344192 }, { "name": "model.layers.19.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 1024, 74 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 151552, "byteOffset": 14352384 }, { "name": "model.layers.19.self_attn.k_proj.ALinear_train.weight", "shape": [ 1024, 8 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14503936 }, { "name": "model.layers.19.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 74, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 606208, "byteOffset": 14520320 }, { "name": "model.layers.19.self_attn.k_proj.BLinear_train.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 15126528 }, { "name": "model.layers.19.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 4096, 185 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 15192064 }, { "name": "model.layers.19.self_attn.o_proj.ALinear_train.weight", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 16707584 }, { "name": "model.layers.19.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 185, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 16871424 }, { "name": "model.layers.19.self_attn.o_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 18386944 }, { "name": "model.layers.19.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 4096, 185 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 18550784 }, { "name": "model.layers.19.self_attn.q_proj.ALinear_train.weight", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 20066304 }, { "name": "model.layers.19.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 185, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 20230144 }, { "name": "model.layers.19.self_attn.q_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 21745664 }, { "name": "model.layers.19.self_attn.v_proj.ALinear_no_train.weight", "shape": [ 1024, 74 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 151552, "byteOffset": 21909504 }, { "name": "model.layers.19.self_attn.v_proj.ALinear_train.weight", "shape": [ 1024, 8 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22061056 }, { "name": "model.layers.19.self_attn.v_proj.BLinear_no_train.weight", "shape": [ 74, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 606208, "byteOffset": 22077440 }, { "name": "model.layers.19.self_attn.v_proj.BLinear_train.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 22683648 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22749184 }, { "name": "model.layers.20.mlp.gate_proj.ALinear_train.weight", "shape": [ 14336, 222 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6365184, "byteOffset": 22757376 } ], "md5sum": "746c7541b4defecd49924eedb676e1f9" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 57544704, "records": [ { "name": "model.layers.20.mlp.up_proj.ALinear_no_train.weight", "shape": [ 14336, 2007 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 57544704, "byteOffset": 0 } ], "md5sum": "a26477cd51d8a23b27335cc019c00d7a" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 24625152, "records": [ { "name": "model.layers.20.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 2007, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16441344, "byteOffset": 0 }, { "name": "model.layers.20.mlp.gate_proj.BLinear_train.weight", "shape": [ 222, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1818624, "byteOffset": 16441344 }, { "name": "model.layers.20.mlp.up_proj.ALinear_train.weight", "shape": [ 14336, 222 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6365184, "byteOffset": 18259968 } ], "md5sum": "5f03f27726b2d57a479139bcbbde70c9" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.21.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "e377cd35e5fa59a0a207486cb4331bfc" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 57544704, "records": [ { "name": "model.layers.21.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 14336, 2007 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 57544704, "byteOffset": 0 } ], "md5sum": "c39a7bb66c99dbea59d781b8a16ecc5c" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 33038336, "records": [ { "name": "model.layers.20.mlp.up_proj.BLinear_no_train.weight", "shape": [ 2007, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16441344, "byteOffset": 0 }, { "name": "model.layers.20.mlp.up_proj.BLinear_train.weight", "shape": [ 222, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1818624, "byteOffset": 16441344 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18259968 }, { "name": "model.layers.20.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 1024, 74 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 151552, "byteOffset": 18268160 }, { "name": "model.layers.20.self_attn.k_proj.ALinear_train.weight", "shape": [ 1024, 8 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18419712 }, { "name": "model.layers.20.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 74, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 606208, "byteOffset": 18436096 }, { "name": "model.layers.20.self_attn.k_proj.BLinear_train.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 19042304 }, { "name": "model.layers.20.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 4096, 185 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 19107840 }, { "name": "model.layers.20.self_attn.o_proj.ALinear_train.weight", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 20623360 }, { "name": "model.layers.20.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 185, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 20787200 }, { "name": "model.layers.20.self_attn.o_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 22302720 }, { "name": "model.layers.20.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 4096, 185 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 22466560 }, { "name": "model.layers.20.self_attn.q_proj.ALinear_train.weight", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 23982080 }, { "name": "model.layers.20.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 185, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 24145920 }, { "name": "model.layers.20.self_attn.q_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 25661440 }, { "name": "model.layers.20.self_attn.v_proj.ALinear_no_train.weight", "shape": [ 1024, 74 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 151552, "byteOffset": 25825280 }, { "name": "model.layers.20.self_attn.v_proj.ALinear_train.weight", "shape": [ 1024, 8 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25976832 }, { "name": "model.layers.20.self_attn.v_proj.BLinear_no_train.weight", "shape": [ 74, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 606208, "byteOffset": 25993216 }, { "name": "model.layers.20.self_attn.v_proj.BLinear_train.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 26599424 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26664960 }, { "name": "model.layers.21.mlp.gate_proj.ALinear_train.weight", "shape": [ 14336, 222 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6365184, "byteOffset": 26673152 } ], "md5sum": "0589a97639a45eada8b93d8d04532eb1" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 45187072, "records": [ { "name": "model.layers.21.mlp.up_proj.ALinear_no_train.weight", "shape": [ 14336, 1576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45187072, "byteOffset": 0 } ], "md5sum": "4aba171654af27877edbf5ce610c83a4" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 23277568, "records": [ { "name": "model.layers.21.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 2007, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16441344, "byteOffset": 0 }, { "name": "model.layers.21.mlp.gate_proj.BLinear_train.weight", "shape": [ 222, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1818624, "byteOffset": 16441344 }, { "name": "model.layers.21.mlp.up_proj.ALinear_train.weight", "shape": [ 14336, 175 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5017600, "byteOffset": 18259968 } ], "md5sum": "432e1daf7f983bd8cbb880d680e25a27" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.22.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "da6185961ede7e8a16ba4772a47d1c06" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 45187072, "records": [ { "name": "model.layers.22.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 14336, 1576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45187072, "byteOffset": 0 } ], "md5sum": "ea1a3392ad661b402bf9d1037523b91f" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 31545344, "records": [ { "name": "model.layers.21.mlp.up_proj.BLinear_no_train.weight", "shape": [ 1576, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12910592, "byteOffset": 0 }, { "name": "model.layers.21.mlp.up_proj.BLinear_train.weight", "shape": [ 175, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 12910592 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 14344192 }, { "name": "model.layers.21.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 1024, 184 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 376832, "byteOffset": 14352384 }, { "name": "model.layers.21.self_attn.k_proj.ALinear_train.weight", "shape": [ 1024, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 14729216 }, { "name": "model.layers.21.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 184, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1507328, "byteOffset": 14770176 }, { "name": "model.layers.21.self_attn.k_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 16277504 }, { "name": "model.layers.21.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 4096, 461 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3776512, "byteOffset": 16441344 }, { "name": "model.layers.21.self_attn.o_proj.ALinear_train.weight", "shape": [ 4096, 51 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 417792, "byteOffset": 20217856 }, { "name": "model.layers.21.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 461, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3776512, "byteOffset": 20635648 }, { "name": "model.layers.21.self_attn.o_proj.BLinear_train.weight", "shape": [ 51, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 417792, "byteOffset": 24412160 }, { "name": "model.layers.21.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 4096, 185 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 24829952 }, { "name": "model.layers.21.self_attn.q_proj.ALinear_train.weight", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 26345472 }, { "name": "model.layers.21.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 185, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 26509312 }, { "name": "model.layers.21.self_attn.q_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 28024832 }, { "name": "model.layers.21.self_attn.v_proj.ALinear_no_train.weight", "shape": [ 1024, 295 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 604160, "byteOffset": 28188672 }, { "name": "model.layers.21.self_attn.v_proj.ALinear_train.weight", "shape": [ 1024, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 28792832 }, { "name": "model.layers.21.self_attn.v_proj.BLinear_no_train.weight", "shape": [ 295, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2416640, "byteOffset": 28858368 }, { "name": "model.layers.21.self_attn.v_proj.BLinear_train.weight", "shape": [ 32, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 31275008 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 31537152 } ], "md5sum": "680eed8a93fd5665638ab5b5258b46d2" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 45187072, "records": [ { "name": "model.layers.22.mlp.up_proj.ALinear_no_train.weight", "shape": [ 14336, 1576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45187072, "byteOffset": 0 } ], "md5sum": "094a4e340a5ad87262d3438971f98b90" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 24379392, "records": [ { "name": "model.layers.22.mlp.gate_proj.ALinear_train.weight", "shape": [ 14336, 175 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5017600, "byteOffset": 0 }, { "name": "model.layers.22.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 1576, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12910592, "byteOffset": 5017600 }, { "name": "model.layers.22.mlp.gate_proj.BLinear_train.weight", "shape": [ 175, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 17928192 }, { "name": "model.layers.22.mlp.up_proj.ALinear_train.weight", "shape": [ 14336, 175 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5017600, "byteOffset": 19361792 } ], "md5sum": "0581bde71ddf4946883c92d302fe2372" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 19963904, "records": [ { "name": "model.layers.23.mlp.down_proj.ALinear_no_train.weight", "shape": [ 4096, 2437 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19963904, "byteOffset": 0 } ], "md5sum": "31e7b5cb36e28a973d7c297c1deea756" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 69873664, "records": [ { "name": "model.layers.23.mlp.down_proj.BLinear_no_train.weight", "shape": [ 2437, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 69873664, "byteOffset": 0 } ], "md5sum": "b54aa3026a116f81ca33a647d6b08c1c" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 32507904, "records": [ { "name": "model.layers.22.mlp.up_proj.BLinear_no_train.weight", "shape": [ 1576, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12910592, "byteOffset": 0 }, { "name": "model.layers.22.mlp.up_proj.BLinear_train.weight", "shape": [ 175, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 12910592 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 14344192 }, { "name": "model.layers.22.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 1024, 74 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 151552, "byteOffset": 14352384 }, { "name": "model.layers.22.self_attn.k_proj.ALinear_train.weight", "shape": [ 1024, 8 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14503936 }, { "name": "model.layers.22.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 74, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 606208, "byteOffset": 14520320 }, { "name": "model.layers.22.self_attn.k_proj.BLinear_train.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 15126528 }, { "name": "model.layers.22.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 4096, 461 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3776512, "byteOffset": 15192064 }, { "name": "model.layers.22.self_attn.o_proj.ALinear_train.weight", "shape": [ 4096, 51 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 417792, "byteOffset": 18968576 }, { "name": "model.layers.22.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 461, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3776512, "byteOffset": 19386368 }, { "name": "model.layers.22.self_attn.o_proj.BLinear_train.weight", "shape": [ 51, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 417792, "byteOffset": 23162880 }, { "name": "model.layers.22.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 4096, 185 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 23580672 }, { "name": "model.layers.22.self_attn.q_proj.ALinear_train.weight", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 25096192 }, { "name": "model.layers.22.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 185, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 25260032 }, { "name": "model.layers.22.self_attn.q_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 26775552 }, { "name": "model.layers.22.self_attn.v_proj.ALinear_no_train.weight", "shape": [ 1024, 295 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 604160, "byteOffset": 26939392 }, { "name": "model.layers.22.self_attn.v_proj.ALinear_train.weight", "shape": [ 1024, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 27543552 }, { "name": "model.layers.22.self_attn.v_proj.BLinear_no_train.weight", "shape": [ 295, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2416640, "byteOffset": 27609088 }, { "name": "model.layers.22.self_attn.v_proj.BLinear_train.weight", "shape": [ 32, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 30025728 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30287872 }, { "name": "model.layers.23.mlp.down_proj.ALinear_train.weight", "shape": [ 4096, 270 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 30296064 } ], "md5sum": "a6b79df9060be3c43bee37a58e2fe75e" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 45187072, "records": [ { "name": "model.layers.23.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 14336, 1576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45187072, "byteOffset": 0 } ], "md5sum": "bc8555e1354f9b8e45422f92bf0de816" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 32858112, "records": [ { "name": "model.layers.23.mlp.up_proj.ALinear_no_train.weight", "shape": [ 14336, 1146 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32858112, "byteOffset": 0 } ], "md5sum": "859c9e47c5fe41b0b9565df94d901ba6" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 30744576, "records": [ { "name": "model.layers.23.mlp.down_proj.BLinear_train.weight", "shape": [ 270, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7741440, "byteOffset": 0 }, { "name": "model.layers.23.mlp.gate_proj.ALinear_train.weight", "shape": [ 14336, 175 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5017600, "byteOffset": 7741440 }, { "name": "model.layers.23.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 1576, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12910592, "byteOffset": 12759040 }, { "name": "model.layers.23.mlp.gate_proj.BLinear_train.weight", "shape": [ 175, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 25669632 }, { "name": "model.layers.23.mlp.up_proj.ALinear_train.weight", "shape": [ 14336, 127 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3641344, "byteOffset": 27103232 } ], "md5sum": "338ba456ee2655bb81489053cd2c6dc6" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 33001472, "records": [ { "name": "model.layers.23.mlp.up_proj.BLinear_no_train.weight", "shape": [ 1146, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9388032, "byteOffset": 0 }, { "name": "model.layers.23.mlp.up_proj.BLinear_train.weight", "shape": [ 127, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1040384, "byteOffset": 9388032 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10428416 }, { "name": "model.layers.23.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 1024, 74 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 151552, "byteOffset": 10436608 }, { "name": "model.layers.23.self_attn.k_proj.ALinear_train.weight", "shape": [ 1024, 8 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 10588160 }, { "name": "model.layers.23.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 74, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 606208, "byteOffset": 10604544 }, { "name": "model.layers.23.self_attn.k_proj.BLinear_train.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 11210752 }, { "name": "model.layers.23.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 4096, 185 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 11276288 }, { "name": "model.layers.23.self_attn.o_proj.ALinear_train.weight", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 12791808 }, { "name": "model.layers.23.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 185, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 12955648 }, { "name": "model.layers.23.self_attn.o_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 14471168 }, { "name": "model.layers.23.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 4096, 185 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 14635008 }, { "name": "model.layers.23.self_attn.q_proj.ALinear_train.weight", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 16150528 }, { "name": "model.layers.23.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 185, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 16314368 }, { "name": "model.layers.23.self_attn.q_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 17829888 }, { "name": "model.layers.23.self_attn.v_proj.ALinear_no_train.weight", "shape": [ 1024, 184 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 376832, "byteOffset": 17993728 }, { "name": "model.layers.23.self_attn.v_proj.ALinear_train.weight", "shape": [ 1024, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 18370560 }, { "name": "model.layers.23.self_attn.v_proj.BLinear_no_train.weight", "shape": [ 184, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1507328, "byteOffset": 18411520 }, { "name": "model.layers.23.self_attn.v_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 19918848 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20082688 }, { "name": "model.layers.24.mlp.down_proj.ALinear_no_train.weight", "shape": [ 4096, 1576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12910592, "byteOffset": 20090880 } ], "md5sum": "1bff5e6c364e691f6c120e7aa42a8be4" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 45187072, "records": [ { "name": "model.layers.24.mlp.down_proj.BLinear_no_train.weight", "shape": [ 1576, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45187072, "byteOffset": 0 } ], "md5sum": "9be8c53c12ae49f263adc9bbea5e3991" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 45187072, "records": [ { "name": "model.layers.24.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 14336, 1576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45187072, "byteOffset": 0 } ], "md5sum": "82aaaa1ac366054043da418bea3ce79e" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 45187072, "records": [ { "name": "model.layers.24.mlp.up_proj.ALinear_no_train.weight", "shape": [ 14336, 1576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45187072, "byteOffset": 0 } ], "md5sum": "6db7c7c88bcb2fc7dab5d483c0b8250c" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 30830592, "records": [ { "name": "model.layers.24.mlp.down_proj.ALinear_train.weight", "shape": [ 4096, 175 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 0 }, { "name": "model.layers.24.mlp.down_proj.BLinear_train.weight", "shape": [ 175, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5017600, "byteOffset": 1433600 }, { "name": "model.layers.24.mlp.gate_proj.ALinear_train.weight", "shape": [ 14336, 175 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5017600, "byteOffset": 6451200 }, { "name": "model.layers.24.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 1576, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12910592, "byteOffset": 11468800 }, { "name": "model.layers.24.mlp.gate_proj.BLinear_train.weight", "shape": [ 175, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 24379392 }, { "name": "model.layers.24.mlp.up_proj.ALinear_train.weight", "shape": [ 14336, 175 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5017600, "byteOffset": 25812992 } ], "md5sum": "e24147c04648967bd6fcbe8f9bc71a8c" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 19963904, "records": [ { "name": "model.layers.25.mlp.down_proj.ALinear_no_train.weight", "shape": [ 4096, 2437 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19963904, "byteOffset": 0 } ], "md5sum": "e354ebd7b0b3a111cf2cd50f6993f627" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 69873664, "records": [ { "name": "model.layers.25.mlp.down_proj.BLinear_no_train.weight", "shape": [ 2437, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 69873664, "byteOffset": 0 } ], "md5sum": "63d85a3367839222e8486985006ab481" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 45187072, "records": [ { "name": "model.layers.25.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 14336, 1576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45187072, "byteOffset": 0 } ], "md5sum": "854cd7a49d38279f09961f93df311a35" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 32710656, "records": [ { "name": "model.layers.24.mlp.up_proj.BLinear_no_train.weight", "shape": [ 1576, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12910592, "byteOffset": 0 }, { "name": "model.layers.24.mlp.up_proj.BLinear_train.weight", "shape": [ 175, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 12910592 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 14344192 }, { "name": "model.layers.24.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 1024, 74 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 151552, "byteOffset": 14352384 }, { "name": "model.layers.24.self_attn.k_proj.ALinear_train.weight", "shape": [ 1024, 8 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14503936 }, { "name": "model.layers.24.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 74, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 606208, "byteOffset": 14520320 }, { "name": "model.layers.24.self_attn.k_proj.BLinear_train.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 15126528 }, { "name": "model.layers.24.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 4096, 185 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 15192064 }, { "name": "model.layers.24.self_attn.o_proj.ALinear_train.weight", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 16707584 }, { "name": "model.layers.24.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 185, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 16871424 }, { "name": "model.layers.24.self_attn.o_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 18386944 }, { "name": "model.layers.24.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 4096, 185 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 18550784 }, { "name": "model.layers.24.self_attn.q_proj.ALinear_train.weight", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 20066304 }, { "name": "model.layers.24.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 185, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 20230144 }, { "name": "model.layers.24.self_attn.q_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 21745664 }, { "name": "model.layers.24.self_attn.v_proj.ALinear_no_train.weight", "shape": [ 1024, 74 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 151552, "byteOffset": 21909504 }, { "name": "model.layers.24.self_attn.v_proj.ALinear_train.weight", "shape": [ 1024, 8 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22061056 }, { "name": "model.layers.24.self_attn.v_proj.BLinear_no_train.weight", "shape": [ 74, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 606208, "byteOffset": 22077440 }, { "name": "model.layers.24.self_attn.v_proj.BLinear_train.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 22683648 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22749184 }, { "name": "model.layers.25.mlp.down_proj.ALinear_train.weight", "shape": [ 4096, 270 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 22757376 }, { "name": "model.layers.25.mlp.down_proj.BLinear_train.weight", "shape": [ 270, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7741440, "byteOffset": 24969216 } ], "md5sum": "ef5166fbd7f3cba59224944ee1832a14" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 45187072, "records": [ { "name": "model.layers.25.mlp.up_proj.ALinear_no_train.weight", "shape": [ 14336, 1576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45187072, "byteOffset": 0 } ], "md5sum": "306c69e42a15bdbb1d9932d078832658" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 24379392, "records": [ { "name": "model.layers.25.mlp.gate_proj.ALinear_train.weight", "shape": [ 14336, 175 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5017600, "byteOffset": 0 }, { "name": "model.layers.25.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 1576, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12910592, "byteOffset": 5017600 }, { "name": "model.layers.25.mlp.gate_proj.BLinear_train.weight", "shape": [ 175, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 17928192 }, { "name": "model.layers.25.mlp.up_proj.ALinear_train.weight", "shape": [ 14336, 175 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5017600, "byteOffset": 19361792 } ], "md5sum": "da871db74d43eb6fe42a83aefe1aeb91" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 33394688, "records": [ { "name": "model.layers.25.mlp.up_proj.BLinear_no_train.weight", "shape": [ 1576, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12910592, "byteOffset": 0 }, { "name": "model.layers.25.mlp.up_proj.BLinear_train.weight", "shape": [ 175, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 12910592 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 14344192 }, { "name": "model.layers.25.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 1024, 74 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 151552, "byteOffset": 14352384 }, { "name": "model.layers.25.self_attn.k_proj.ALinear_train.weight", "shape": [ 1024, 8 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14503936 }, { "name": "model.layers.25.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 74, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 606208, "byteOffset": 14520320 }, { "name": "model.layers.25.self_attn.k_proj.BLinear_train.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 15126528 }, { "name": "model.layers.25.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 4096, 185 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 15192064 }, { "name": "model.layers.25.self_attn.o_proj.ALinear_train.weight", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 16707584 }, { "name": "model.layers.25.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 185, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 16871424 }, { "name": "model.layers.25.self_attn.o_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 18386944 }, { "name": "model.layers.25.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 4096, 185 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 18550784 }, { "name": "model.layers.25.self_attn.q_proj.ALinear_train.weight", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 20066304 }, { "name": "model.layers.25.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 185, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 20230144 }, { "name": "model.layers.25.self_attn.q_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 21745664 }, { "name": "model.layers.25.self_attn.v_proj.ALinear_no_train.weight", "shape": [ 1024, 184 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 376832, "byteOffset": 21909504 }, { "name": "model.layers.25.self_attn.v_proj.ALinear_train.weight", "shape": [ 1024, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 22286336 }, { "name": "model.layers.25.self_attn.v_proj.BLinear_no_train.weight", "shape": [ 184, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1507328, "byteOffset": 22327296 }, { "name": "model.layers.25.self_attn.v_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 23834624 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23998464 }, { "name": "model.layers.26.mlp.down_proj.ALinear_no_train.weight", "shape": [ 4096, 1146 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9388032, "byteOffset": 24006656 } ], "md5sum": "8112c9090388534adf7bb9908d881cb7" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 32858112, "records": [ { "name": "model.layers.26.mlp.down_proj.BLinear_no_train.weight", "shape": [ 1146, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32858112, "byteOffset": 0 } ], "md5sum": "f810ed80fdbe13581b4733c74222ecb1" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 32858112, "records": [ { "name": "model.layers.26.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 14336, 1146 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32858112, "byteOffset": 0 } ], "md5sum": "0e3b9b8068a7d8cb7983358424de8832" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 32858112, "records": [ { "name": "model.layers.26.mlp.up_proj.ALinear_no_train.weight", "shape": [ 14336, 1146 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32858112, "byteOffset": 0 } ], "md5sum": "1d502a2a75341297bc005415220595d0" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 32997376, "records": [ { "name": "model.layers.26.mlp.down_proj.ALinear_train.weight", "shape": [ 4096, 127 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1040384, "byteOffset": 0 }, { "name": "model.layers.26.mlp.down_proj.BLinear_train.weight", "shape": [ 127, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3641344, "byteOffset": 1040384 }, { "name": "model.layers.26.mlp.gate_proj.ALinear_train.weight", "shape": [ 14336, 127 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3641344, "byteOffset": 4681728 }, { "name": "model.layers.26.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 1146, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9388032, "byteOffset": 8323072 }, { "name": "model.layers.26.mlp.gate_proj.BLinear_train.weight", "shape": [ 127, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1040384, "byteOffset": 17711104 }, { "name": "model.layers.26.mlp.up_proj.ALinear_train.weight", "shape": [ 14336, 127 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3641344, "byteOffset": 18751488 }, { "name": "model.layers.26.mlp.up_proj.BLinear_no_train.weight", "shape": [ 1146, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9388032, "byteOffset": 22392832 }, { "name": "model.layers.26.mlp.up_proj.BLinear_train.weight", "shape": [ 127, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1040384, "byteOffset": 31780864 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32821248 }, { "name": "model.layers.26.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 1024, 74 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 151552, "byteOffset": 32829440 }, { "name": "model.layers.26.self_attn.k_proj.ALinear_train.weight", "shape": [ 1024, 8 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32980992 } ], "md5sum": "a440d219197d9e88a21e9469d1abf75b" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 32858112, "records": [ { "name": "model.layers.27.mlp.down_proj.BLinear_no_train.weight", "shape": [ 1146, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32858112, "byteOffset": 0 } ], "md5sum": "227a61f2d8aa77acc88f9ddaba0877a2" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 32858112, "records": [ { "name": "model.layers.27.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 14336, 1146 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32858112, "byteOffset": 0 } ], "md5sum": "3093b8c2f13cba49bdc529b06a916028" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 25948160, "records": [ { "name": "model.layers.26.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 74, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 606208, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.k_proj.BLinear_train.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 606208 }, { "name": "model.layers.26.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 4096, 185 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 671744 }, { "name": "model.layers.26.self_attn.o_proj.ALinear_train.weight", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 2187264 }, { "name": "model.layers.26.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 185, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 2351104 }, { "name": "model.layers.26.self_attn.o_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 3866624 }, { "name": "model.layers.26.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 4096, 185 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 4030464 }, { "name": "model.layers.26.self_attn.q_proj.ALinear_train.weight", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 5545984 }, { "name": "model.layers.26.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 185, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 5709824 }, { "name": "model.layers.26.self_attn.q_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 7225344 }, { "name": "model.layers.26.self_attn.v_proj.ALinear_no_train.weight", "shape": [ 1024, 74 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 151552, "byteOffset": 7389184 }, { "name": "model.layers.26.self_attn.v_proj.ALinear_train.weight", "shape": [ 1024, 8 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7540736 }, { "name": "model.layers.26.self_attn.v_proj.BLinear_no_train.weight", "shape": [ 74, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 606208, "byteOffset": 7557120 }, { "name": "model.layers.26.self_attn.v_proj.BLinear_train.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 8163328 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8228864 }, { "name": "model.layers.27.mlp.down_proj.ALinear_no_train.weight", "shape": [ 4096, 1146 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9388032, "byteOffset": 8237056 }, { "name": "model.layers.27.mlp.down_proj.ALinear_train.weight", "shape": [ 4096, 127 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1040384, "byteOffset": 17625088 }, { "name": "model.layers.27.mlp.down_proj.BLinear_train.weight", "shape": [ 127, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3641344, "byteOffset": 18665472 }, { "name": "model.layers.27.mlp.gate_proj.ALinear_train.weight", "shape": [ 14336, 127 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3641344, "byteOffset": 22306816 } ], "md5sum": "6eab6ba4ac35713bf8d719e2873431c3" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 32858112, "records": [ { "name": "model.layers.27.mlp.up_proj.ALinear_no_train.weight", "shape": [ 14336, 1146 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32858112, "byteOffset": 0 } ], "md5sum": "5855c70ae0f8dc273b4d023313c2af4e" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 30789632, "records": [ { "name": "model.layers.27.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 1146, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9388032, "byteOffset": 0 }, { "name": "model.layers.27.mlp.gate_proj.BLinear_train.weight", "shape": [ 127, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1040384, "byteOffset": 9388032 }, { "name": "model.layers.27.mlp.up_proj.ALinear_train.weight", "shape": [ 14336, 127 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3641344, "byteOffset": 10428416 }, { "name": "model.layers.27.mlp.up_proj.BLinear_no_train.weight", "shape": [ 1146, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9388032, "byteOffset": 14069760 }, { "name": "model.layers.27.mlp.up_proj.BLinear_train.weight", "shape": [ 127, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1040384, "byteOffset": 23457792 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24498176 }, { "name": "model.layers.27.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 1024, 184 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 376832, "byteOffset": 24506368 }, { "name": "model.layers.27.self_attn.k_proj.ALinear_train.weight", "shape": [ 1024, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 24883200 }, { "name": "model.layers.27.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 184, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1507328, "byteOffset": 24924160 }, { "name": "model.layers.27.self_attn.k_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 26431488 }, { "name": "model.layers.27.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 4096, 461 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3776512, "byteOffset": 26595328 }, { "name": "model.layers.27.self_attn.o_proj.ALinear_train.weight", "shape": [ 4096, 51 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 417792, "byteOffset": 30371840 } ], "md5sum": "79e6897cc82b86e3dada4a039864a9a5" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 32858112, "records": [ { "name": "model.layers.28.mlp.down_proj.BLinear_no_train.weight", "shape": [ 1146, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32858112, "byteOffset": 0 } ], "md5sum": "43482199b478adbd3b871e2b81b7c002" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 32858112, "records": [ { "name": "model.layers.28.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 14336, 1146 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32858112, "byteOffset": 0 } ], "md5sum": "3a6c2221c34689bb41c016320a4b5445" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 27361280, "records": [ { "name": "model.layers.27.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 461, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3776512, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.o_proj.BLinear_train.weight", "shape": [ 51, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 417792, "byteOffset": 3776512 }, { "name": "model.layers.27.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 4096, 185 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 4194304 }, { "name": "model.layers.27.self_attn.q_proj.ALinear_train.weight", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 5709824 }, { "name": "model.layers.27.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 185, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 5873664 }, { "name": "model.layers.27.self_attn.q_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 7389184 }, { "name": "model.layers.27.self_attn.v_proj.ALinear_no_train.weight", "shape": [ 1024, 184 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 376832, "byteOffset": 7553024 }, { "name": "model.layers.27.self_attn.v_proj.ALinear_train.weight", "shape": [ 1024, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 7929856 }, { "name": "model.layers.27.self_attn.v_proj.BLinear_no_train.weight", "shape": [ 184, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1507328, "byteOffset": 7970816 }, { "name": "model.layers.27.self_attn.v_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 9478144 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9641984 }, { "name": "model.layers.28.mlp.down_proj.ALinear_no_train.weight", "shape": [ 4096, 1146 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9388032, "byteOffset": 9650176 }, { "name": "model.layers.28.mlp.down_proj.ALinear_train.weight", "shape": [ 4096, 127 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1040384, "byteOffset": 19038208 }, { "name": "model.layers.28.mlp.down_proj.BLinear_train.weight", "shape": [ 127, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3641344, "byteOffset": 20078592 }, { "name": "model.layers.28.mlp.gate_proj.ALinear_train.weight", "shape": [ 14336, 127 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3641344, "byteOffset": 23719936 } ], "md5sum": "19cd7bc56caf1e3b8b6737f83f49cda9" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 32858112, "records": [ { "name": "model.layers.28.mlp.up_proj.ALinear_no_train.weight", "shape": [ 14336, 1146 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32858112, "byteOffset": 0 } ], "md5sum": "1b44d1310a9f789982a06888ce2704c5" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 30789632, "records": [ { "name": "model.layers.28.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 1146, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9388032, "byteOffset": 0 }, { "name": "model.layers.28.mlp.gate_proj.BLinear_train.weight", "shape": [ 127, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1040384, "byteOffset": 9388032 }, { "name": "model.layers.28.mlp.up_proj.ALinear_train.weight", "shape": [ 14336, 127 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3641344, "byteOffset": 10428416 }, { "name": "model.layers.28.mlp.up_proj.BLinear_no_train.weight", "shape": [ 1146, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9388032, "byteOffset": 14069760 }, { "name": "model.layers.28.mlp.up_proj.BLinear_train.weight", "shape": [ 127, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1040384, "byteOffset": 23457792 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24498176 }, { "name": "model.layers.28.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 1024, 184 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 376832, "byteOffset": 24506368 }, { "name": "model.layers.28.self_attn.k_proj.ALinear_train.weight", "shape": [ 1024, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 24883200 }, { "name": "model.layers.28.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 184, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1507328, "byteOffset": 24924160 }, { "name": "model.layers.28.self_attn.k_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 26431488 }, { "name": "model.layers.28.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 4096, 461 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3776512, "byteOffset": 26595328 }, { "name": "model.layers.28.self_attn.o_proj.ALinear_train.weight", "shape": [ 4096, 51 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 417792, "byteOffset": 30371840 } ], "md5sum": "fdf7a98db78efaf9d19fd6dbb8eb64ee" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 32858112, "records": [ { "name": "model.layers.29.mlp.down_proj.BLinear_no_train.weight", "shape": [ 1146, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32858112, "byteOffset": 0 } ], "md5sum": "c63ab26149c8aa35188e9ce7769af810" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 45187072, "records": [ { "name": "model.layers.29.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 14336, 1576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45187072, "byteOffset": 0 } ], "md5sum": "9eb51831c2364d2c6af65e5bb930aad7" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 31256576, "records": [ { "name": "model.layers.28.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 461, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3776512, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.o_proj.BLinear_train.weight", "shape": [ 51, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 417792, "byteOffset": 3776512 }, { "name": "model.layers.28.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 4096, 185 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 4194304 }, { "name": "model.layers.28.self_attn.q_proj.ALinear_train.weight", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 5709824 }, { "name": "model.layers.28.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 185, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 5873664 }, { "name": "model.layers.28.self_attn.q_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 7389184 }, { "name": "model.layers.28.self_attn.v_proj.ALinear_no_train.weight", "shape": [ 1024, 405 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 829440, "byteOffset": 7553024 }, { "name": "model.layers.28.self_attn.v_proj.ALinear_train.weight", "shape": [ 1024, 45 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 92160, "byteOffset": 8382464 }, { "name": "model.layers.28.self_attn.v_proj.BLinear_no_train.weight", "shape": [ 405, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3317760, "byteOffset": 8474624 }, { "name": "model.layers.28.self_attn.v_proj.BLinear_train.weight", "shape": [ 45, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 368640, "byteOffset": 11792384 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 12161024 }, { "name": "model.layers.29.mlp.down_proj.ALinear_no_train.weight", "shape": [ 4096, 1146 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9388032, "byteOffset": 12169216 }, { "name": "model.layers.29.mlp.down_proj.ALinear_train.weight", "shape": [ 4096, 127 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1040384, "byteOffset": 21557248 }, { "name": "model.layers.29.mlp.down_proj.BLinear_train.weight", "shape": [ 127, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3641344, "byteOffset": 22597632 }, { "name": "model.layers.29.mlp.gate_proj.ALinear_train.weight", "shape": [ 14336, 175 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5017600, "byteOffset": 26238976 } ], "md5sum": "fc26526a9adc9a6d257dac4270624cce" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 33200128, "records": [ { "name": "model.layers.29.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 1576, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12910592, "byteOffset": 0 }, { "name": "model.layers.29.mlp.gate_proj.BLinear_train.weight", "shape": [ 175, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 12910592 }, { "name": "model.layers.29.mlp.up_proj.ALinear_no_train.weight", "shape": [ 14336, 287 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8228864, "byteOffset": 14344192 }, { "name": "model.layers.29.mlp.up_proj.ALinear_train.weight", "shape": [ 14336, 31 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 888832, "byteOffset": 22573056 }, { "name": "model.layers.29.mlp.up_proj.BLinear_no_train.weight", "shape": [ 287, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2351104, "byteOffset": 23461888 }, { "name": "model.layers.29.mlp.up_proj.BLinear_train.weight", "shape": [ 31, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 253952, "byteOffset": 25812992 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26066944 }, { "name": "model.layers.29.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 1024, 295 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 604160, "byteOffset": 26075136 }, { "name": "model.layers.29.self_attn.k_proj.ALinear_train.weight", "shape": [ 1024, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 26679296 }, { "name": "model.layers.29.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 295, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2416640, "byteOffset": 26744832 }, { "name": "model.layers.29.self_attn.k_proj.BLinear_train.weight", "shape": [ 32, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 29161472 }, { "name": "model.layers.29.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 4096, 461 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3776512, "byteOffset": 29423616 } ], "md5sum": "441fe6bf6e367d7e59f7ffda291be284" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 57544704, "records": [ { "name": "model.layers.30.mlp.down_proj.BLinear_no_train.weight", "shape": [ 2007, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 57544704, "byteOffset": 0 } ], "md5sum": "02c791bad68355e703037f67114a5e94" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 28327936, "records": [ { "name": "model.layers.29.self_attn.o_proj.ALinear_train.weight", "shape": [ 4096, 51 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 417792, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 461, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3776512, "byteOffset": 417792 }, { "name": "model.layers.29.self_attn.o_proj.BLinear_train.weight", "shape": [ 51, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 417792, "byteOffset": 4194304 }, { "name": "model.layers.29.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 4096, 185 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 4612096 }, { "name": "model.layers.29.self_attn.q_proj.ALinear_train.weight", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 6127616 }, { "name": "model.layers.29.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 185, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 6291456 }, { "name": "model.layers.29.self_attn.q_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 7806976 }, { "name": "model.layers.29.self_attn.v_proj.ALinear_no_train.weight", "shape": [ 1024, 184 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 376832, "byteOffset": 7970816 }, { "name": "model.layers.29.self_attn.v_proj.ALinear_train.weight", "shape": [ 1024, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 8347648 }, { "name": "model.layers.29.self_attn.v_proj.BLinear_no_train.weight", "shape": [ 184, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1507328, "byteOffset": 8388608 }, { "name": "model.layers.29.self_attn.v_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 9895936 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10059776 }, { "name": "model.layers.30.mlp.down_proj.ALinear_no_train.weight", "shape": [ 4096, 2007 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16441344, "byteOffset": 10067968 }, { "name": "model.layers.30.mlp.down_proj.ALinear_train.weight", "shape": [ 4096, 222 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1818624, "byteOffset": 26509312 } ], "md5sum": "34543c2c722fdf1c144e836ac8a28711" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 45187072, "records": [ { "name": "model.layers.30.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 14336, 1576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45187072, "byteOffset": 0 } ], "md5sum": "f23a4e5c8fad12d3225a5f0445a41d0b" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.30.mlp.up_proj.weight", "shape": [ 14336, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "91456ebd98372af841525e8dcf7e9c88" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 32018432, "records": [ { "name": "model.layers.30.mlp.down_proj.BLinear_train.weight", "shape": [ 222, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6365184, "byteOffset": 0 }, { "name": "model.layers.30.mlp.gate_proj.ALinear_train.weight", "shape": [ 14336, 175 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5017600, "byteOffset": 6365184 }, { "name": "model.layers.30.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 1576, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12910592, "byteOffset": 11382784 }, { "name": "model.layers.30.mlp.gate_proj.BLinear_train.weight", "shape": [ 175, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 24293376 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25726976 }, { "name": "model.layers.30.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 1024, 184 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 376832, "byteOffset": 25735168 }, { "name": "model.layers.30.self_attn.k_proj.ALinear_train.weight", "shape": [ 1024, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 26112000 }, { "name": "model.layers.30.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 184, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1507328, "byteOffset": 26152960 }, { "name": "model.layers.30.self_attn.k_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 27660288 }, { "name": "model.layers.30.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 4096, 461 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3776512, "byteOffset": 27824128 }, { "name": "model.layers.30.self_attn.o_proj.ALinear_train.weight", "shape": [ 4096, 51 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 417792, "byteOffset": 31600640 } ], "md5sum": "840913914627df0a3eea3daea2c4e694" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 45187072, "records": [ { "name": "model.layers.31.mlp.down_proj.BLinear_no_train.weight", "shape": [ 1576, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45187072, "byteOffset": 0 } ], "md5sum": "fc808fbbd60698d71ac38adf40919633" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 45187072, "records": [ { "name": "model.layers.31.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 14336, 1576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45187072, "byteOffset": 0 } ], "md5sum": "33398d9e90007dfd68e36c01f0884226" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 31531008, "records": [ { "name": "model.layers.30.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 461, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3776512, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.o_proj.BLinear_train.weight", "shape": [ 51, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 417792, "byteOffset": 3776512 }, { "name": "model.layers.30.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 4096, 185 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 4194304 }, { "name": "model.layers.30.self_attn.q_proj.ALinear_train.weight", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 5709824 }, { "name": "model.layers.30.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 185, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 5873664 }, { "name": "model.layers.30.self_attn.q_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 7389184 }, { "name": "model.layers.30.self_attn.v_proj.ALinear_no_train.weight", "shape": [ 1024, 405 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 829440, "byteOffset": 7553024 }, { "name": "model.layers.30.self_attn.v_proj.ALinear_train.weight", "shape": [ 1024, 45 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 92160, "byteOffset": 8382464 }, { "name": "model.layers.30.self_attn.v_proj.BLinear_no_train.weight", "shape": [ 405, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3317760, "byteOffset": 8474624 }, { "name": "model.layers.30.self_attn.v_proj.BLinear_train.weight", "shape": [ 45, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 368640, "byteOffset": 11792384 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 12161024 }, { "name": "model.layers.31.mlp.down_proj.ALinear_no_train.weight", "shape": [ 4096, 1576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12910592, "byteOffset": 12169216 }, { "name": "model.layers.31.mlp.down_proj.ALinear_train.weight", "shape": [ 4096, 175 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 25079808 }, { "name": "model.layers.31.mlp.down_proj.BLinear_train.weight", "shape": [ 175, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5017600, "byteOffset": 26513408 } ], "md5sum": "a3be5fb14e38a69cebba94807e979339" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.31.mlp.up_proj.weight", "shape": [ 14336, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "cadde26378dd00fc02f502f9855d0cd7" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 33206272, "records": [ { "name": "model.layers.31.mlp.gate_proj.ALinear_train.weight", "shape": [ 14336, 175 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5017600, "byteOffset": 0 }, { "name": "model.layers.31.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 1576, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12910592, "byteOffset": 5017600 }, { "name": "model.layers.31.mlp.gate_proj.BLinear_train.weight", "shape": [ 175, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 17928192 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 19361792 }, { "name": "model.layers.31.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 1024, 184 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 376832, "byteOffset": 19369984 }, { "name": "model.layers.31.self_attn.k_proj.ALinear_train.weight", "shape": [ 1024, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 19746816 }, { "name": "model.layers.31.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 184, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1507328, "byteOffset": 19787776 }, { "name": "model.layers.31.self_attn.k_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 21295104 }, { "name": "model.layers.31.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 4096, 461 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3776512, "byteOffset": 21458944 }, { "name": "model.layers.31.self_attn.o_proj.ALinear_train.weight", "shape": [ 4096, 51 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 417792, "byteOffset": 25235456 }, { "name": "model.layers.31.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 461, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3776512, "byteOffset": 25653248 }, { "name": "model.layers.31.self_attn.o_proj.BLinear_train.weight", "shape": [ 51, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 417792, "byteOffset": 29429760 }, { "name": "model.layers.31.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 4096, 185 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 29847552 }, { "name": "model.layers.31.self_attn.q_proj.ALinear_train.weight", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 31363072 }, { "name": "model.layers.31.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 185, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 31526912 }, { "name": "model.layers.31.self_attn.q_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 33042432 } ], "md5sum": "fe0bb663d991c1af99c024ce0cfc4b3c" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 632496128, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 77209, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 632496128, "byteOffset": 0 } ], "md5sum": "db46d16f7ab1ddd145a21b1b044e715c" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.0.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "97e538827be58d3a3f259b1c5ee9fade" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 57544704, "records": [ { "name": "model.layers.0.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 14336, 2007 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 57544704, "byteOffset": 0 } ], "md5sum": "c30f2177d2e490e4c6d96e569c42589a" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.0.mlp.up_proj.weight", "shape": [ 14336, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "66ea1708a0d893318fa38fa491598406" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 33206272, "records": [ { "name": "model.layers.31.self_attn.v_proj.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8388608 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8396800 }, { "name": "model.layers.0.mlp.gate_proj.ALinear_train.weight", "shape": [ 14336, 222 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6365184, "byteOffset": 8404992 }, { "name": "model.layers.0.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 2007, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16441344, "byteOffset": 14770176 }, { "name": "model.layers.0.mlp.gate_proj.BLinear_train.weight", "shape": [ 222, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1818624, "byteOffset": 31211520 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 33030144 }, { "name": "model.layers.0.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 1024, 74 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 151552, "byteOffset": 33038336 }, { "name": "model.layers.0.self_attn.k_proj.ALinear_train.weight", "shape": [ 1024, 8 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 33189888 } ], "md5sum": "b5d2e6b78a445becec6054098d2ed550" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.1.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "96e0e5dce75c68a524022c816070608b" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 45187072, "records": [ { "name": "model.layers.1.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 14336, 1576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45187072, "byteOffset": 0 } ], "md5sum": "eea208d470f4802fa7beed559b91534b" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 22052864, "records": [ { "name": "model.layers.0.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 74, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 606208, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.k_proj.BLinear_train.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 606208 }, { "name": "model.layers.0.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 4096, 461 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3776512, "byteOffset": 671744 }, { "name": "model.layers.0.self_attn.o_proj.ALinear_train.weight", "shape": [ 4096, 51 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 417792, "byteOffset": 4448256 }, { "name": "model.layers.0.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 461, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3776512, "byteOffset": 4866048 }, { "name": "model.layers.0.self_attn.o_proj.BLinear_train.weight", "shape": [ 51, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 417792, "byteOffset": 8642560 }, { "name": "model.layers.0.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 4096, 185 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 9060352 }, { "name": "model.layers.0.self_attn.q_proj.ALinear_train.weight", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 10575872 }, { "name": "model.layers.0.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 185, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 10739712 }, { "name": "model.layers.0.self_attn.q_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 12255232 }, { "name": "model.layers.0.self_attn.v_proj.ALinear_no_train.weight", "shape": [ 1024, 405 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 829440, "byteOffset": 12419072 }, { "name": "model.layers.0.self_attn.v_proj.ALinear_train.weight", "shape": [ 1024, 45 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 92160, "byteOffset": 13248512 }, { "name": "model.layers.0.self_attn.v_proj.BLinear_no_train.weight", "shape": [ 405, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3317760, "byteOffset": 13340672 }, { "name": "model.layers.0.self_attn.v_proj.BLinear_train.weight", "shape": [ 45, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 368640, "byteOffset": 16658432 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 17027072 }, { "name": "model.layers.1.mlp.gate_proj.ALinear_train.weight", "shape": [ 14336, 175 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5017600, "byteOffset": 17035264 } ], "md5sum": "bf7730261a4dc881062d486fc783a5b6" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 57544704, "records": [ { "name": "model.layers.1.mlp.up_proj.ALinear_no_train.weight", "shape": [ 14336, 2007 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 57544704, "byteOffset": 0 } ], "md5sum": "358d3546c14103f94e6a35295d4b1f7e" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 20709376, "records": [ { "name": "model.layers.1.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 1576, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12910592, "byteOffset": 0 }, { "name": "model.layers.1.mlp.gate_proj.BLinear_train.weight", "shape": [ 175, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 12910592 }, { "name": "model.layers.1.mlp.up_proj.ALinear_train.weight", "shape": [ 14336, 222 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6365184, "byteOffset": 14344192 } ], "md5sum": "4cac20b698c8355d1b0a5e316417f064" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 33112064, "records": [ { "name": "model.layers.1.mlp.up_proj.BLinear_no_train.weight", "shape": [ 2007, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16441344, "byteOffset": 0 }, { "name": "model.layers.1.mlp.up_proj.BLinear_train.weight", "shape": [ 222, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1818624, "byteOffset": 16441344 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18259968 }, { "name": "model.layers.1.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 1024, 184 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 376832, "byteOffset": 18268160 }, { "name": "model.layers.1.self_attn.k_proj.ALinear_train.weight", "shape": [ 1024, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 18644992 }, { "name": "model.layers.1.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 184, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1507328, "byteOffset": 18685952 }, { "name": "model.layers.1.self_attn.k_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 20193280 }, { "name": "model.layers.1.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 4096, 738 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6045696, "byteOffset": 20357120 }, { "name": "model.layers.1.self_attn.o_proj.ALinear_train.weight", "shape": [ 4096, 81 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 26402816 }, { "name": "model.layers.1.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 738, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6045696, "byteOffset": 27066368 } ], "md5sum": "0e095d8124ce5b232bcceddd2fa35241" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 32858112, "records": [ { "name": "model.layers.10.mlp.down_proj.BLinear_no_train.weight", "shape": [ 1146, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32858112, "byteOffset": 0 } ], "md5sum": "5624c691c0b80b4b5a2936f5bad79012" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 45187072, "records": [ { "name": "model.layers.10.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 14336, 1576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45187072, "byteOffset": 0 } ], "md5sum": "fdea1f25f0e513ebc3a7354cb7419378" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 26466304, "records": [ { "name": "model.layers.1.self_attn.o_proj.BLinear_train.weight", "shape": [ 81, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 4096, 185 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 663552 }, { "name": "model.layers.1.self_attn.q_proj.ALinear_train.weight", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 2179072 }, { "name": "model.layers.1.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 185, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 2342912 }, { "name": "model.layers.1.self_attn.q_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 3858432 }, { "name": "model.layers.1.self_attn.v_proj.ALinear_no_train.weight", "shape": [ 1024, 295 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 604160, "byteOffset": 4022272 }, { "name": "model.layers.1.self_attn.v_proj.ALinear_train.weight", "shape": [ 1024, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 4626432 }, { "name": "model.layers.1.self_attn.v_proj.BLinear_no_train.weight", "shape": [ 295, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2416640, "byteOffset": 4691968 }, { "name": "model.layers.1.self_attn.v_proj.BLinear_train.weight", "shape": [ 32, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 7108608 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7370752 }, { "name": "model.layers.10.mlp.down_proj.ALinear_no_train.weight", "shape": [ 4096, 1146 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9388032, "byteOffset": 7378944 }, { "name": "model.layers.10.mlp.down_proj.ALinear_train.weight", "shape": [ 4096, 127 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1040384, "byteOffset": 16766976 }, { "name": "model.layers.10.mlp.down_proj.BLinear_train.weight", "shape": [ 127, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3641344, "byteOffset": 17807360 }, { "name": "model.layers.10.mlp.gate_proj.ALinear_train.weight", "shape": [ 14336, 175 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5017600, "byteOffset": 21448704 } ], "md5sum": "4926c59c64f08349edf908a683b99aa0" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.10.mlp.up_proj.weight", "shape": [ 14336, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "a1ffdace09a300eaf127bd977356f05d" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 31545344, "records": [ { "name": "model.layers.10.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 1576, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12910592, "byteOffset": 0 }, { "name": "model.layers.10.mlp.gate_proj.BLinear_train.weight", "shape": [ 175, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 12910592 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 14344192 }, { "name": "model.layers.10.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 1024, 184 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 376832, "byteOffset": 14352384 }, { "name": "model.layers.10.self_attn.k_proj.ALinear_train.weight", "shape": [ 1024, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 14729216 }, { "name": "model.layers.10.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 184, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1507328, "byteOffset": 14770176 }, { "name": "model.layers.10.self_attn.k_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 16277504 }, { "name": "model.layers.10.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 4096, 461 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3776512, "byteOffset": 16441344 }, { "name": "model.layers.10.self_attn.o_proj.ALinear_train.weight", "shape": [ 4096, 51 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 417792, "byteOffset": 20217856 }, { "name": "model.layers.10.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 461, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3776512, "byteOffset": 20635648 }, { "name": "model.layers.10.self_attn.o_proj.BLinear_train.weight", "shape": [ 51, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 417792, "byteOffset": 24412160 }, { "name": "model.layers.10.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 4096, 185 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 24829952 }, { "name": "model.layers.10.self_attn.q_proj.ALinear_train.weight", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 26345472 }, { "name": "model.layers.10.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 185, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 26509312 }, { "name": "model.layers.10.self_attn.q_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 28024832 }, { "name": "model.layers.10.self_attn.v_proj.ALinear_no_train.weight", "shape": [ 1024, 295 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 604160, "byteOffset": 28188672 }, { "name": "model.layers.10.self_attn.v_proj.ALinear_train.weight", "shape": [ 1024, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 28792832 }, { "name": "model.layers.10.self_attn.v_proj.BLinear_no_train.weight", "shape": [ 295, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2416640, "byteOffset": 28858368 }, { "name": "model.layers.10.self_attn.v_proj.BLinear_train.weight", "shape": [ 32, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 31275008 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 31537152 } ], "md5sum": "a7b54cc1c88426b65e600ccdbd857ad5" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 45187072, "records": [ { "name": "model.layers.11.mlp.down_proj.BLinear_no_train.weight", "shape": [ 1576, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45187072, "byteOffset": 0 } ], "md5sum": "4f9326ec64524ef44c17fa6be318fc82" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.11.mlp.gate_proj.weight", "shape": [ 14336, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "d2c4232795a68b2c52ef033a00943ba5" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.11.mlp.up_proj.weight", "shape": [ 14336, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "35f4820abecb3749fe90d8f7864b1b10" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 31942656, "records": [ { "name": "model.layers.11.mlp.down_proj.ALinear_no_train.weight", "shape": [ 4096, 1576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12910592, "byteOffset": 0 }, { "name": "model.layers.11.mlp.down_proj.ALinear_train.weight", "shape": [ 4096, 175 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 12910592 }, { "name": "model.layers.11.mlp.down_proj.BLinear_train.weight", "shape": [ 175, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5017600, "byteOffset": 14344192 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 19361792 }, { "name": "model.layers.11.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 1024, 295 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 604160, "byteOffset": 19369984 }, { "name": "model.layers.11.self_attn.k_proj.ALinear_train.weight", "shape": [ 1024, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 19974144 }, { "name": "model.layers.11.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 295, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2416640, "byteOffset": 20039680 }, { "name": "model.layers.11.self_attn.k_proj.BLinear_train.weight", "shape": [ 32, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 22456320 }, { "name": "model.layers.11.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 4096, 1014 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8306688, "byteOffset": 22718464 }, { "name": "model.layers.11.self_attn.o_proj.ALinear_train.weight", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 31025152 } ], "md5sum": "580e3aae6aae8b3b6603a65d57c6ff86" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 22228992, "records": [ { "name": "model.layers.11.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 1014, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8306688, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.BLinear_train.weight", "shape": [ 112, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 8306688 }, { "name": "model.layers.11.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 4096, 461 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3776512, "byteOffset": 9224192 }, { "name": "model.layers.11.self_attn.q_proj.ALinear_train.weight", "shape": [ 4096, 51 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 417792, "byteOffset": 13000704 }, { "name": "model.layers.11.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 461, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3776512, "byteOffset": 13418496 }, { "name": "model.layers.11.self_attn.q_proj.BLinear_train.weight", "shape": [ 51, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 417792, "byteOffset": 17195008 }, { "name": "model.layers.11.self_attn.v_proj.ALinear_no_train.weight", "shape": [ 1024, 405 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 829440, "byteOffset": 17612800 }, { "name": "model.layers.11.self_attn.v_proj.ALinear_train.weight", "shape": [ 1024, 45 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 92160, "byteOffset": 18442240 }, { "name": "model.layers.11.self_attn.v_proj.BLinear_no_train.weight", "shape": [ 405, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3317760, "byteOffset": 18534400 }, { "name": "model.layers.11.self_attn.v_proj.BLinear_train.weight", "shape": [ 45, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 368640, "byteOffset": 21852160 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22220800 } ], "md5sum": "13e901dd45da8a78e06fad293836967c" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 45187072, "records": [ { "name": "model.layers.12.mlp.down_proj.BLinear_no_train.weight", "shape": [ 1576, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45187072, "byteOffset": 0 } ], "md5sum": "3d009b02ba72c340a972f52b61bfe747" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 32858112, "records": [ { "name": "model.layers.12.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 14336, 1146 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32858112, "byteOffset": 0 } ], "md5sum": "a4163394922f88fe1c02863422faf28e" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.12.mlp.up_proj.weight", "shape": [ 14336, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "ab12c429206a4171d50f5e4108598b0e" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 33439744, "records": [ { "name": "model.layers.12.mlp.down_proj.ALinear_no_train.weight", "shape": [ 4096, 1576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12910592, "byteOffset": 0 }, { "name": "model.layers.12.mlp.down_proj.ALinear_train.weight", "shape": [ 4096, 175 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 12910592 }, { "name": "model.layers.12.mlp.down_proj.BLinear_train.weight", "shape": [ 175, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5017600, "byteOffset": 14344192 }, { "name": "model.layers.12.mlp.gate_proj.ALinear_train.weight", "shape": [ 14336, 127 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3641344, "byteOffset": 19361792 }, { "name": "model.layers.12.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 1146, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9388032, "byteOffset": 23003136 }, { "name": "model.layers.12.mlp.gate_proj.BLinear_train.weight", "shape": [ 127, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1040384, "byteOffset": 32391168 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 33431552 } ], "md5sum": "fd17701cab391573abd07a4b193d1f14" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 30099456, "records": [ { "name": "model.layers.12.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 1024, 184 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 376832, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.k_proj.ALinear_train.weight", "shape": [ 1024, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 376832 }, { "name": "model.layers.12.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 184, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1507328, "byteOffset": 417792 }, { "name": "model.layers.12.self_attn.k_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 1925120 }, { "name": "model.layers.12.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 4096, 1014 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8306688, "byteOffset": 2088960 }, { "name": "model.layers.12.self_attn.o_proj.ALinear_train.weight", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 10395648 }, { "name": "model.layers.12.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 1014, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8306688, "byteOffset": 11313152 }, { "name": "model.layers.12.self_attn.o_proj.BLinear_train.weight", "shape": [ 112, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 19619840 }, { "name": "model.layers.12.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 4096, 461 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3776512, "byteOffset": 20537344 }, { "name": "model.layers.12.self_attn.q_proj.ALinear_train.weight", "shape": [ 4096, 51 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 417792, "byteOffset": 24313856 }, { "name": "model.layers.12.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 461, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3776512, "byteOffset": 24731648 }, { "name": "model.layers.12.self_attn.q_proj.BLinear_train.weight", "shape": [ 51, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 417792, "byteOffset": 28508160 }, { "name": "model.layers.12.self_attn.v_proj.ALinear_no_train.weight", "shape": [ 1024, 516 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1056768, "byteOffset": 28925952 }, { "name": "model.layers.12.self_attn.v_proj.ALinear_train.weight", "shape": [ 1024, 57 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 116736, "byteOffset": 29982720 } ], "md5sum": "cf428f9643d40060fe260c0fac0444ee" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.13.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "890e1b4a015d001c0f28d0585e7efff2" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 32858112, "records": [ { "name": "model.layers.13.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 14336, 1146 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32858112, "byteOffset": 0 } ], "md5sum": "e68991929d5b1164727e31b7a7f63f89" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 69873664, "records": [ { "name": "model.layers.13.mlp.up_proj.ALinear_no_train.weight", "shape": [ 14336, 2437 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 69873664, "byteOffset": 0 } ], "md5sum": "71a9b28b38171ad54175bb101e09a7a1" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 19963904, "records": [ { "name": "model.layers.13.mlp.up_proj.BLinear_no_train.weight", "shape": [ 2437, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19963904, "byteOffset": 0 } ], "md5sum": "822f21dbb1d257682174b6b3d5fd152a" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 28733440, "records": [ { "name": "model.layers.12.self_attn.v_proj.BLinear_no_train.weight", "shape": [ 516, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4227072, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.v_proj.BLinear_train.weight", "shape": [ 57, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 466944, "byteOffset": 4227072 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4694016 }, { "name": "model.layers.13.mlp.gate_proj.ALinear_train.weight", "shape": [ 14336, 127 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3641344, "byteOffset": 4702208 }, { "name": "model.layers.13.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 1146, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9388032, "byteOffset": 8343552 }, { "name": "model.layers.13.mlp.gate_proj.BLinear_train.weight", "shape": [ 127, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1040384, "byteOffset": 17731584 }, { "name": "model.layers.13.mlp.up_proj.ALinear_train.weight", "shape": [ 14336, 270 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7741440, "byteOffset": 18771968 }, { "name": "model.layers.13.mlp.up_proj.BLinear_train.weight", "shape": [ 270, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 26513408 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 28725248 } ], "md5sum": "0b769b15cc215b3cabf4a73c47484afd" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 33122304, "records": [ { "name": "model.layers.13.self_attn.k_proj.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 4096, 461 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3776512, "byteOffset": 8388608 }, { "name": "model.layers.13.self_attn.o_proj.ALinear_train.weight", "shape": [ 4096, 51 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 417792, "byteOffset": 12165120 }, { "name": "model.layers.13.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 461, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3776512, "byteOffset": 12582912 }, { "name": "model.layers.13.self_attn.o_proj.BLinear_train.weight", "shape": [ 51, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 417792, "byteOffset": 16359424 }, { "name": "model.layers.13.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 4096, 461 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3776512, "byteOffset": 16777216 }, { "name": "model.layers.13.self_attn.q_proj.ALinear_train.weight", "shape": [ 4096, 51 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 417792, "byteOffset": 20553728 }, { "name": "model.layers.13.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 461, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3776512, "byteOffset": 20971520 }, { "name": "model.layers.13.self_attn.q_proj.BLinear_train.weight", "shape": [ 51, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 417792, "byteOffset": 24748032 }, { "name": "model.layers.13.self_attn.v_proj.ALinear_no_train.weight", "shape": [ 1024, 184 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 376832, "byteOffset": 25165824 }, { "name": "model.layers.13.self_attn.v_proj.ALinear_train.weight", "shape": [ 1024, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 25542656 }, { "name": "model.layers.13.self_attn.v_proj.BLinear_no_train.weight", "shape": [ 184, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1507328, "byteOffset": 25583616 }, { "name": "model.layers.13.self_attn.v_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 27090944 }, { "name": "model.layers.14.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 1024, 516 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1056768, "byteOffset": 27254784 }, { "name": "model.layers.14.self_attn.k_proj.ALinear_train.weight", "shape": [ 1024, 57 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 116736, "byteOffset": 28311552 }, { "name": "model.layers.14.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 516, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4227072, "byteOffset": 28428288 }, { "name": "model.layers.14.self_attn.k_proj.BLinear_train.weight", "shape": [ 57, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 466944, "byteOffset": 32655360 } ], "md5sum": "c582df5670a032422592c8a07089ceea" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.2.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "7ccc467157705cc0f518d8ef6db0de2c" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.2.mlp.gate_proj.weight", "shape": [ 14336, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "84260b9534d9948fc453d0c2b6b3e101" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.2.mlp.up_proj.weight", "shape": [ 14336, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "e971e0d344be756e6913d6837bd137b3" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 32299008, "records": [ { "name": "model.layers.14.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 738, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6045696, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.BLinear_train.weight", "shape": [ 81, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 6045696 }, { "name": "model.layers.14.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 4096, 461 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3776512, "byteOffset": 6709248 }, { "name": "model.layers.14.self_attn.q_proj.ALinear_train.weight", "shape": [ 4096, 51 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 417792, "byteOffset": 10485760 }, { "name": "model.layers.14.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 461, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3776512, "byteOffset": 10903552 }, { "name": "model.layers.14.self_attn.q_proj.BLinear_train.weight", "shape": [ 51, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 417792, "byteOffset": 14680064 }, { "name": "model.layers.14.self_attn.v_proj.ALinear_no_train.weight", "shape": [ 1024, 295 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 604160, "byteOffset": 15097856 }, { "name": "model.layers.14.self_attn.v_proj.ALinear_train.weight", "shape": [ 1024, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 15702016 }, { "name": "model.layers.14.self_attn.v_proj.BLinear_no_train.weight", "shape": [ 295, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2416640, "byteOffset": 15767552 }, { "name": "model.layers.14.self_attn.v_proj.BLinear_train.weight", "shape": [ 32, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 18184192 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18446336 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18454528 }, { "name": "model.layers.2.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 1024, 184 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 376832, "byteOffset": 18462720 }, { "name": "model.layers.2.self_attn.k_proj.ALinear_train.weight", "shape": [ 1024, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 18839552 }, { "name": "model.layers.2.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 184, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1507328, "byteOffset": 18880512 }, { "name": "model.layers.2.self_attn.k_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 20387840 }, { "name": "model.layers.2.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 4096, 461 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3776512, "byteOffset": 20551680 }, { "name": "model.layers.2.self_attn.o_proj.ALinear_train.weight", "shape": [ 4096, 51 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 417792, "byteOffset": 24328192 }, { "name": "model.layers.2.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 461, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3776512, "byteOffset": 24745984 }, { "name": "model.layers.2.self_attn.o_proj.BLinear_train.weight", "shape": [ 51, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 417792, "byteOffset": 28522496 }, { "name": "model.layers.2.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 4096, 185 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 28940288 }, { "name": "model.layers.2.self_attn.q_proj.ALinear_train.weight", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 30455808 }, { "name": "model.layers.2.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 185, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 30619648 }, { "name": "model.layers.2.self_attn.q_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 32135168 } ], "md5sum": "37c1c4f2ba6005dfec583ecaa29e53c3" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.3.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "141dd3fe08dd454822eaa6a9370dc565" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 57544704, "records": [ { "name": "model.layers.3.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 14336, 2007 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 57544704, "byteOffset": 0 } ], "md5sum": "3dee1b8364c0c715bf297ffcbf100395" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.3.mlp.up_proj.weight", "shape": [ 14336, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "444e98536501571d05e383025fed9475" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.2.self_attn.v_proj.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8388608 }, { "name": "model.layers.3.mlp.gate_proj.ALinear_train.weight", "shape": [ 14336, 222 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6365184, "byteOffset": 8396800 }, { "name": "model.layers.3.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 2007, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16441344, "byteOffset": 14761984 }, { "name": "model.layers.3.mlp.gate_proj.BLinear_train.weight", "shape": [ 222, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1818624, "byteOffset": 31203328 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 33021952 } ], "md5sum": "7f0d5fa1487dc5e3ffff4e1755eae8aa" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 27674624, "records": [ { "name": "model.layers.3.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 1024, 516 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1056768, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.k_proj.ALinear_train.weight", "shape": [ 1024, 57 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 116736, "byteOffset": 1056768 }, { "name": "model.layers.3.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 516, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4227072, "byteOffset": 1173504 }, { "name": "model.layers.3.self_attn.k_proj.BLinear_train.weight", "shape": [ 57, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 466944, "byteOffset": 5400576 }, { "name": "model.layers.3.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 4096, 1014 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8306688, "byteOffset": 5867520 }, { "name": "model.layers.3.self_attn.o_proj.ALinear_train.weight", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 14174208 }, { "name": "model.layers.3.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 1014, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8306688, "byteOffset": 15091712 }, { "name": "model.layers.3.self_attn.o_proj.BLinear_train.weight", "shape": [ 112, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 23398400 }, { "name": "model.layers.3.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 4096, 185 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 24315904 }, { "name": "model.layers.3.self_attn.q_proj.ALinear_train.weight", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 25831424 }, { "name": "model.layers.3.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 185, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 25995264 }, { "name": "model.layers.3.self_attn.q_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 27510784 } ], "md5sum": "ff2fe6cc6a3673e2b1bbb769f312daa2" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.4.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "d42a66ee0525e8e1c887b39b0c00a098" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.4.mlp.gate_proj.weight", "shape": [ 14336, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "f322ec8beb5229f6ede819c908d993f2" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.4.mlp.up_proj.weight", "shape": [ 14336, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "73d7f78a2fb0a5cfd292c3ada7b18faa" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.3.self_attn.v_proj.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8388608 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8396800 }, { "name": "model.layers.4.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 1024, 184 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 376832, "byteOffset": 8404992 }, { "name": "model.layers.4.self_attn.k_proj.ALinear_train.weight", "shape": [ 1024, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 8781824 }, { "name": "model.layers.4.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 184, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1507328, "byteOffset": 8822784 }, { "name": "model.layers.4.self_attn.k_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 10330112 }, { "name": "model.layers.4.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 4096, 738 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6045696, "byteOffset": 10493952 }, { "name": "model.layers.4.self_attn.o_proj.ALinear_train.weight", "shape": [ 4096, 81 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 16539648 }, { "name": "model.layers.4.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 738, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6045696, "byteOffset": 17203200 }, { "name": "model.layers.4.self_attn.o_proj.BLinear_train.weight", "shape": [ 81, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 23248896 }, { "name": "model.layers.4.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 4096, 185 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 23912448 }, { "name": "model.layers.4.self_attn.q_proj.ALinear_train.weight", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 25427968 }, { "name": "model.layers.4.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 185, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 25591808 }, { "name": "model.layers.4.self_attn.q_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 27107328 } ], "md5sum": "a4fc572b2de583c668cd5fd4e73ab68a" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 45187072, "records": [ { "name": "model.layers.5.mlp.down_proj.BLinear_no_train.weight", "shape": [ 1576, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45187072, "byteOffset": 0 } ], "md5sum": "0b2346298dd57b5255f465bfcf5d5d13" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.5.mlp.gate_proj.weight", "shape": [ 14336, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "d2ed60850074fc7bf6c57ef49a8d3295" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 57544704, "records": [ { "name": "model.layers.5.mlp.up_proj.ALinear_no_train.weight", "shape": [ 14336, 2007 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 57544704, "byteOffset": 0 } ], "md5sum": "614f99554a2e45e1c8275f8a2508eac9" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 27758592, "records": [ { "name": "model.layers.4.self_attn.v_proj.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8388608 }, { "name": "model.layers.5.mlp.down_proj.ALinear_no_train.weight", "shape": [ 4096, 1576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12910592, "byteOffset": 8396800 }, { "name": "model.layers.5.mlp.down_proj.ALinear_train.weight", "shape": [ 4096, 175 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 21307392 }, { "name": "model.layers.5.mlp.down_proj.BLinear_train.weight", "shape": [ 175, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5017600, "byteOffset": 22740992 } ], "md5sum": "a50b1cf814e95969a6476dfca2a1e86e" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 32176128, "records": [ { "name": "model.layers.5.mlp.up_proj.ALinear_train.weight", "shape": [ 14336, 222 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6365184, "byteOffset": 0 }, { "name": "model.layers.5.mlp.up_proj.BLinear_no_train.weight", "shape": [ 2007, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16441344, "byteOffset": 6365184 }, { "name": "model.layers.5.mlp.up_proj.BLinear_train.weight", "shape": [ 222, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1818624, "byteOffset": 22806528 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24625152 }, { "name": "model.layers.5.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 1024, 295 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 604160, "byteOffset": 24633344 }, { "name": "model.layers.5.self_attn.k_proj.ALinear_train.weight", "shape": [ 1024, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 25237504 }, { "name": "model.layers.5.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 295, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2416640, "byteOffset": 25303040 }, { "name": "model.layers.5.self_attn.k_proj.BLinear_train.weight", "shape": [ 32, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 27719680 }, { "name": "model.layers.5.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 4096, 461 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3776512, "byteOffset": 27981824 }, { "name": "model.layers.5.self_attn.o_proj.ALinear_train.weight", "shape": [ 4096, 51 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 417792, "byteOffset": 31758336 } ], "md5sum": "78e5e2c0c6a034fc681a566942202c71" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.6.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "828332d1a70dd05f15df6a066ccdb362" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.6.mlp.gate_proj.weight", "shape": [ 14336, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "0c431fdd7222f38009954de8c492f5c2" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.6.mlp.up_proj.weight", "shape": [ 14336, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "d9a9b325a60ba62e5ec7eecb5d59a713" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 33478656, "records": [ { "name": "model.layers.5.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 461, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3776512, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.o_proj.BLinear_train.weight", "shape": [ 51, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 417792, "byteOffset": 3776512 }, { "name": "model.layers.5.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 4096, 185 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 4194304 }, { "name": "model.layers.5.self_attn.q_proj.ALinear_train.weight", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 5709824 }, { "name": "model.layers.5.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 185, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 5873664 }, { "name": "model.layers.5.self_attn.q_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 7389184 }, { "name": "model.layers.5.self_attn.v_proj.ALinear_no_train.weight", "shape": [ 1024, 184 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 376832, "byteOffset": 7553024 }, { "name": "model.layers.5.self_attn.v_proj.ALinear_train.weight", "shape": [ 1024, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 7929856 }, { "name": "model.layers.5.self_attn.v_proj.BLinear_no_train.weight", "shape": [ 184, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1507328, "byteOffset": 7970816 }, { "name": "model.layers.5.self_attn.v_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 9478144 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9641984 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9650176 }, { "name": "model.layers.6.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 1024, 74 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 151552, "byteOffset": 9658368 }, { "name": "model.layers.6.self_attn.k_proj.ALinear_train.weight", "shape": [ 1024, 8 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9809920 }, { "name": "model.layers.6.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 74, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 606208, "byteOffset": 9826304 }, { "name": "model.layers.6.self_attn.k_proj.BLinear_train.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 10432512 }, { "name": "model.layers.6.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 4096, 1014 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8306688, "byteOffset": 10498048 }, { "name": "model.layers.6.self_attn.o_proj.ALinear_train.weight", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 18804736 }, { "name": "model.layers.6.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 1014, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8306688, "byteOffset": 19722240 }, { "name": "model.layers.6.self_attn.o_proj.BLinear_train.weight", "shape": [ 112, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 28028928 }, { "name": "model.layers.6.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 4096, 185 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 28946432 }, { "name": "model.layers.6.self_attn.q_proj.ALinear_train.weight", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 30461952 }, { "name": "model.layers.6.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 185, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 30625792 }, { "name": "model.layers.6.self_attn.q_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 32141312 }, { "name": "model.layers.6.self_attn.v_proj.ALinear_no_train.weight", "shape": [ 1024, 516 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1056768, "byteOffset": 32305152 }, { "name": "model.layers.6.self_attn.v_proj.ALinear_train.weight", "shape": [ 1024, 57 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 116736, "byteOffset": 33361920 } ], "md5sum": "d1a3a36afa2da5fcedc26a88e1778ee6" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.7.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "5946324047a49ac7a39954d584cda1bd" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.7.mlp.gate_proj.weight", "shape": [ 14336, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "27cd50d1ab1155cc9da376bf4cca9019" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.7.mlp.up_proj.weight", "shape": [ 14336, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "687d4f15f426f41a03b7c0ea58c3c40b" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 31547392, "records": [ { "name": "model.layers.6.self_attn.v_proj.BLinear_no_train.weight", "shape": [ 516, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4227072, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.v_proj.BLinear_train.weight", "shape": [ 57, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 466944, "byteOffset": 4227072 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4694016 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4702208 }, { "name": "model.layers.7.self_attn.k_proj.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 4710400 }, { "name": "model.layers.7.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 4096, 1014 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8306688, "byteOffset": 13099008 }, { "name": "model.layers.7.self_attn.o_proj.ALinear_train.weight", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21405696 }, { "name": "model.layers.7.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 1014, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8306688, "byteOffset": 22323200 }, { "name": "model.layers.7.self_attn.o_proj.BLinear_train.weight", "shape": [ 112, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 30629888 } ], "md5sum": "906ee48f0cf3d9681386c972931452b1" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 20557824, "records": [ { "name": "model.layers.8.mlp.down_proj.BLinear_no_train.weight", "shape": [ 717, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20557824, "byteOffset": 0 } ], "md5sum": "aa2b852c5117910dd9d82b0b59028a64" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 20557824, "records": [ { "name": "model.layers.8.mlp.gate_proj.ALinear_no_train.weight", "shape": [ 14336, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20557824, "byteOffset": 0 } ], "md5sum": "0855c82f4270a6bd1607d1c2e96a60b0" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 32858112, "records": [ { "name": "model.layers.8.mlp.up_proj.ALinear_no_train.weight", "shape": [ 14336, 1146 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32858112, "byteOffset": 0 } ], "md5sum": "ca8f74997880eb093b3c62f41818f1ac" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 30576640, "records": [ { "name": "model.layers.7.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 4096, 461 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3776512, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.q_proj.ALinear_train.weight", "shape": [ 4096, 51 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 417792, "byteOffset": 3776512 }, { "name": "model.layers.7.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 461, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3776512, "byteOffset": 4194304 }, { "name": "model.layers.7.self_attn.q_proj.BLinear_train.weight", "shape": [ 51, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 417792, "byteOffset": 7970816 }, { "name": "model.layers.7.self_attn.v_proj.ALinear_no_train.weight", "shape": [ 1024, 405 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 829440, "byteOffset": 8388608 }, { "name": "model.layers.7.self_attn.v_proj.ALinear_train.weight", "shape": [ 1024, 45 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 92160, "byteOffset": 9218048 }, { "name": "model.layers.7.self_attn.v_proj.BLinear_no_train.weight", "shape": [ 405, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3317760, "byteOffset": 9310208 }, { "name": "model.layers.7.self_attn.v_proj.BLinear_train.weight", "shape": [ 45, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 368640, "byteOffset": 12627968 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 12996608 }, { "name": "model.layers.8.mlp.down_proj.ALinear_no_train.weight", "shape": [ 4096, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5873664, "byteOffset": 13004800 }, { "name": "model.layers.8.mlp.down_proj.ALinear_train.weight", "shape": [ 4096, 79 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 647168, "byteOffset": 18878464 }, { "name": "model.layers.8.mlp.down_proj.BLinear_train.weight", "shape": [ 79, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2265088, "byteOffset": 19525632 }, { "name": "model.layers.8.mlp.gate_proj.ALinear_train.weight", "shape": [ 14336, 79 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2265088, "byteOffset": 21790720 }, { "name": "model.layers.8.mlp.gate_proj.BLinear_no_train.weight", "shape": [ 717, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5873664, "byteOffset": 24055808 }, { "name": "model.layers.8.mlp.gate_proj.BLinear_train.weight", "shape": [ 79, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 647168, "byteOffset": 29929472 } ], "md5sum": "81335797c09350f7e7745232dcb87979" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 22482944, "records": [ { "name": "model.layers.8.mlp.up_proj.ALinear_train.weight", "shape": [ 14336, 127 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3641344, "byteOffset": 0 }, { "name": "model.layers.8.mlp.up_proj.BLinear_no_train.weight", "shape": [ 1146, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9388032, "byteOffset": 3641344 }, { "name": "model.layers.8.mlp.up_proj.BLinear_train.weight", "shape": [ 127, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1040384, "byteOffset": 13029376 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 14069760 }, { "name": "model.layers.8.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 1024, 74 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 151552, "byteOffset": 14077952 }, { "name": "model.layers.8.self_attn.k_proj.ALinear_train.weight", "shape": [ 1024, 8 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14229504 }, { "name": "model.layers.8.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 74, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 606208, "byteOffset": 14245888 }, { "name": "model.layers.8.self_attn.k_proj.BLinear_train.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 14852096 }, { "name": "model.layers.8.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 4096, 185 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 14917632 }, { "name": "model.layers.8.self_attn.o_proj.ALinear_train.weight", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 16433152 }, { "name": "model.layers.8.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 185, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 16596992 }, { "name": "model.layers.8.self_attn.o_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 18112512 }, { "name": "model.layers.8.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 4096, 185 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 18276352 }, { "name": "model.layers.8.self_attn.q_proj.ALinear_train.weight", "shape": [ 4096, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 19791872 }, { "name": "model.layers.8.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 185, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1515520, "byteOffset": 19955712 }, { "name": "model.layers.8.self_attn.q_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 21471232 }, { "name": "model.layers.8.self_attn.v_proj.ALinear_no_train.weight", "shape": [ 1024, 74 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 151552, "byteOffset": 21635072 }, { "name": "model.layers.8.self_attn.v_proj.ALinear_train.weight", "shape": [ 1024, 8 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 21786624 }, { "name": "model.layers.8.self_attn.v_proj.BLinear_no_train.weight", "shape": [ 74, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 606208, "byteOffset": 21803008 }, { "name": "model.layers.8.self_attn.v_proj.BLinear_train.weight", "shape": [ 8, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 65536, "byteOffset": 22409216 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22474752 } ], "md5sum": "98d0199562c7d2db8b879dfe48f7d9f9" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 57544704, "records": [ { "name": "model.layers.9.mlp.down_proj.BLinear_no_train.weight", "shape": [ 2007, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 57544704, "byteOffset": 0 } ], "md5sum": "93cf700ff3cc979cb32a61942464f518" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.9.mlp.gate_proj.weight", "shape": [ 14336, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "d30c0a89b1ecd47e3e35a43a9801330a" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 57544704, "records": [ { "name": "model.layers.9.mlp.up_proj.ALinear_no_train.weight", "shape": [ 14336, 2007 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 57544704, "byteOffset": 0 } ], "md5sum": "3267cbdecb8e57b595018a1ba874fa29" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 30990336, "records": [ { "name": "model.layers.9.mlp.down_proj.ALinear_no_train.weight", "shape": [ 4096, 2007 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16441344, "byteOffset": 0 }, { "name": "model.layers.9.mlp.down_proj.ALinear_train.weight", "shape": [ 4096, 222 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1818624, "byteOffset": 16441344 }, { "name": "model.layers.9.mlp.down_proj.BLinear_train.weight", "shape": [ 222, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6365184, "byteOffset": 18259968 }, { "name": "model.layers.9.mlp.up_proj.ALinear_train.weight", "shape": [ 14336, 222 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6365184, "byteOffset": 24625152 } ], "md5sum": "8704cfc783c8c93c1d123540c3c3a74a" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 32940032, "records": [ { "name": "model.layers.9.mlp.up_proj.BLinear_no_train.weight", "shape": [ 2007, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16441344, "byteOffset": 0 }, { "name": "model.layers.9.mlp.up_proj.BLinear_train.weight", "shape": [ 222, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1818624, "byteOffset": 16441344 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18259968 }, { "name": "model.layers.9.self_attn.k_proj.ALinear_no_train.weight", "shape": [ 1024, 184 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 376832, "byteOffset": 18268160 }, { "name": "model.layers.9.self_attn.k_proj.ALinear_train.weight", "shape": [ 1024, 20 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 40960, "byteOffset": 18644992 }, { "name": "model.layers.9.self_attn.k_proj.BLinear_no_train.weight", "shape": [ 184, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1507328, "byteOffset": 18685952 }, { "name": "model.layers.9.self_attn.k_proj.BLinear_train.weight", "shape": [ 20, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 163840, "byteOffset": 20193280 }, { "name": "model.layers.9.self_attn.o_proj.ALinear_no_train.weight", "shape": [ 4096, 461 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3776512, "byteOffset": 20357120 }, { "name": "model.layers.9.self_attn.o_proj.ALinear_train.weight", "shape": [ 4096, 51 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 417792, "byteOffset": 24133632 }, { "name": "model.layers.9.self_attn.o_proj.BLinear_no_train.weight", "shape": [ 461, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3776512, "byteOffset": 24551424 }, { "name": "model.layers.9.self_attn.o_proj.BLinear_train.weight", "shape": [ 51, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 417792, "byteOffset": 28327936 }, { "name": "model.layers.9.self_attn.q_proj.ALinear_no_train.weight", "shape": [ 4096, 461 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3776512, "byteOffset": 28745728 }, { "name": "model.layers.9.self_attn.q_proj.ALinear_train.weight", "shape": [ 4096, 51 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 417792, "byteOffset": 32522240 } ], "md5sum": "e00db5cb97371f25a1db8937520487d5" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 10061824, "records": [ { "name": "model.layers.9.self_attn.q_proj.BLinear_no_train.weight", "shape": [ 461, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3776512, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.q_proj.BLinear_train.weight", "shape": [ 51, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 417792, "byteOffset": 3776512 }, { "name": "model.layers.9.self_attn.v_proj.ALinear_no_train.weight", "shape": [ 1024, 516 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1056768, "byteOffset": 4194304 }, { "name": "model.layers.9.self_attn.v_proj.ALinear_train.weight", "shape": [ 1024, 57 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 116736, "byteOffset": 5251072 }, { "name": "model.layers.9.self_attn.v_proj.BLinear_no_train.weight", "shape": [ 516, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4227072, "byteOffset": 5367808 }, { "name": "model.layers.9.self_attn.v_proj.BLinear_train.weight", "shape": [ 57, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 466944, "byteOffset": 9594880 } ], "md5sum": "87a0d2be330aa6485820abc05b8b3512" } ] }