{ "metadata": { "ParamSize": 325, "ParamBytes": 4300707840.0, "BitsPerParam": 16.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 262144000, "records": [ { "name": "lm_head.linear.weight", "shape": [ 51200, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144000, "byteOffset": 0 } ], "md5sum": "d975f6e26151b33f352a07426b0e0b04" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.31.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "f23dbb82fd6dd312dcf6143e22b7ed11" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.31.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "1da1926a2468df6a44a5a5b6466f343b" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 262144000, "records": [ { "name": "transformer.embd.weight", "shape": [ 51200, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144000, "byteOffset": 0 } ], "md5sum": "92e1da58eb645448e09ca03417db18f3" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.0.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "06643c4cf1355512c432025490149331" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.0.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "16176aff35a8d755aac87792370132d3" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.1.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "159f4e35177c8e668ce734407e469bcf" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.1.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "a5d4b12f35eeb85f2ef3bd17ed299f96" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.10.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "80634adb38332c934930f1b28c1bccfd" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.10.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "90ab9865764843adfbd08975e606c4f6" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 29774080, "records": [ { "name": "lm_head.linear.bias", "shape": [ 51200 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 102400, "byteOffset": 0 }, { "name": "lm_head.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 102400 }, { "name": "lm_head.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 107520 }, { "name": "transformer.h.31.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 112640 }, { "name": "transformer.h.31.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 117760 }, { "name": "transformer.h.31.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 122880 }, { "name": "transformer.h.31.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 143360 }, { "name": "transformer.h.0.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 148480 }, { "name": "transformer.h.0.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 153600 }, { "name": "transformer.h.0.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 158720 }, { "name": "transformer.h.0.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 179200 }, { "name": "transformer.h.0.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 184320 }, { "name": "transformer.h.0.mixer.out_proj.weight", "shape": [ 2560, 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 189440 }, { "name": "transformer.h.0.mixer.Wqkv.bias", "shape": [ 1920 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3840, "byteOffset": 3466240 }, { "name": "transformer.h.0.mixer.Wqkv.weight", "shape": [ 1920, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 3470080 }, { "name": "transformer.h.1.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13300480 }, { "name": "transformer.h.1.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13305600 }, { "name": "transformer.h.1.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 13310720 }, { "name": "transformer.h.1.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13331200 }, { "name": "transformer.h.1.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13336320 }, { "name": "transformer.h.1.mixer.out_proj.weight", "shape": [ 2560, 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 13341440 }, { "name": "transformer.h.1.mixer.Wqkv.bias", "shape": [ 1920 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3840, "byteOffset": 16618240 }, { "name": "transformer.h.1.mixer.Wqkv.weight", "shape": [ 1920, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 16622080 }, { "name": "transformer.h.10.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26452480 }, { "name": "transformer.h.10.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26457600 }, { "name": "transformer.h.10.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 26462720 }, { "name": "transformer.h.10.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26483200 }, { "name": "transformer.h.10.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26488320 }, { "name": "transformer.h.10.mixer.out_proj.weight", "shape": [ 2560, 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 26493440 }, { "name": "transformer.h.10.mixer.Wqkv.bias", "shape": [ 1920 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3840, "byteOffset": 29770240 } ], "md5sum": "3916d5f402a8cac121576445b23853e4" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.11.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "542fdb89d63a08d4e54b3ef4c99e79d7" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.11.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "409edfe7217787b66d516287d16135e4" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.12.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "4b54c6cc75911ea29a6fd512bb9a6a85" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.12.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "30a8025916cde9e6bf881befcaba59ba" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 26304000, "records": [ { "name": "transformer.h.10.mixer.Wqkv.weight", "shape": [ 1920, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "transformer.h.11.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9830400 }, { "name": "transformer.h.11.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9835520 }, { "name": "transformer.h.11.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 9840640 }, { "name": "transformer.h.11.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9861120 }, { "name": "transformer.h.11.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9866240 }, { "name": "transformer.h.11.mixer.out_proj.weight", "shape": [ 2560, 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9871360 }, { "name": "transformer.h.11.mixer.Wqkv.bias", "shape": [ 1920 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3840, "byteOffset": 13148160 }, { "name": "transformer.h.11.mixer.Wqkv.weight", "shape": [ 1920, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 13152000 }, { "name": "transformer.h.12.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 22982400 }, { "name": "transformer.h.12.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 22987520 }, { "name": "transformer.h.12.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 22992640 }, { "name": "transformer.h.12.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23013120 }, { "name": "transformer.h.12.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23018240 }, { "name": "transformer.h.12.mixer.out_proj.weight", "shape": [ 2560, 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 23023360 }, { "name": "transformer.h.12.mixer.Wqkv.bias", "shape": [ 1920 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3840, "byteOffset": 26300160 } ], "md5sum": "23ee52fd56c33fe11da594c34969a6f8" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.13.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "9713853cc85ec38b24d4b548444ee76b" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.13.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "74130282439ed4aafba2acdf09e04b7f" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.14.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "94cee659b45a7ef5191b2a92f94c6c08" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 26288640, "records": [ { "name": "transformer.h.12.mixer.Wqkv.weight", "shape": [ 1920, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "transformer.h.13.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9830400 }, { "name": "transformer.h.13.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9835520 }, { "name": "transformer.h.13.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 9840640 }, { "name": "transformer.h.13.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9861120 }, { "name": "transformer.h.13.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9866240 }, { "name": "transformer.h.13.mixer.out_proj.weight", "shape": [ 2560, 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9871360 }, { "name": "transformer.h.13.mixer.Wqkv.bias", "shape": [ 1920 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3840, "byteOffset": 13148160 }, { "name": "transformer.h.13.mixer.Wqkv.weight", "shape": [ 1920, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 13152000 }, { "name": "transformer.h.14.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 22982400 }, { "name": "transformer.h.14.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23002880 }, { "name": "transformer.h.14.mixer.out_proj.weight", "shape": [ 2560, 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 23008000 }, { "name": "transformer.h.14.mixer.Wqkv.bias", "shape": [ 1920 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3840, "byteOffset": 26284800 } ], "md5sum": "08512f46e09cbf3826555c354c9f26ae" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.2.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "6ba164fe25bdbc50bf6741eff134b325" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.2.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "aa7186d1325ce49118dadb569e3314d8" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.3.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "0b73ad242f4eee635f8cdedc345b9283" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.3.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "007c2eab956fecfd8b5c5de82066fe1f" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 26304000, "records": [ { "name": "transformer.h.14.mixer.Wqkv.weight", "shape": [ 1920, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "transformer.h.2.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9830400 }, { "name": "transformer.h.2.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9835520 }, { "name": "transformer.h.2.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 9840640 }, { "name": "transformer.h.2.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9861120 }, { "name": "transformer.h.2.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9866240 }, { "name": "transformer.h.2.mixer.out_proj.weight", "shape": [ 2560, 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9871360 }, { "name": "transformer.h.2.mixer.Wqkv.bias", "shape": [ 1920 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3840, "byteOffset": 13148160 }, { "name": "transformer.h.2.mixer.Wqkv.weight", "shape": [ 1920, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 13152000 }, { "name": "transformer.h.3.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 22982400 }, { "name": "transformer.h.3.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 22987520 }, { "name": "transformer.h.3.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 22992640 }, { "name": "transformer.h.3.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23013120 }, { "name": "transformer.h.3.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23018240 }, { "name": "transformer.h.3.mixer.out_proj.weight", "shape": [ 2560, 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 23023360 }, { "name": "transformer.h.3.mixer.Wqkv.bias", "shape": [ 1920 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3840, "byteOffset": 26300160 } ], "md5sum": "73ffc8bbdafdf7510685fa4fbf6fe65f" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.4.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "d9cb3496b819790cfa6d61edf9b5dbe4" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.4.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "82a2a1233c6289df34b78e2133a858fc" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.5.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "7dfeae57c9eeb8d6642f80c2230bb5f7" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.5.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "03bcab4f0956bd1bbbe05f16a3389f27" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 26304000, "records": [ { "name": "transformer.h.3.mixer.Wqkv.weight", "shape": [ 1920, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "transformer.h.4.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9830400 }, { "name": "transformer.h.4.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9835520 }, { "name": "transformer.h.4.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 9840640 }, { "name": "transformer.h.4.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9861120 }, { "name": "transformer.h.4.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9866240 }, { "name": "transformer.h.4.mixer.out_proj.weight", "shape": [ 2560, 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9871360 }, { "name": "transformer.h.4.mixer.Wqkv.bias", "shape": [ 1920 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3840, "byteOffset": 13148160 }, { "name": "transformer.h.4.mixer.Wqkv.weight", "shape": [ 1920, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 13152000 }, { "name": "transformer.h.5.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 22982400 }, { "name": "transformer.h.5.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 22987520 }, { "name": "transformer.h.5.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 22992640 }, { "name": "transformer.h.5.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23013120 }, { "name": "transformer.h.5.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23018240 }, { "name": "transformer.h.5.mixer.out_proj.weight", "shape": [ 2560, 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 23023360 }, { "name": "transformer.h.5.mixer.Wqkv.bias", "shape": [ 1920 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3840, "byteOffset": 26300160 } ], "md5sum": "9cc5df02378d5f4e9401c87a828b5f42" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.6.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "fb1ece1d04b2ad5d2551ba295bdb84fc" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.6.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "9a05a675400198338c0257076995605a" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.7.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "ef5fda286fffac394564c3cd8d58b9ad" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.7.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "1865e0949a7c3c44cf5e951b7e1ce459" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 26304000, "records": [ { "name": "transformer.h.5.mixer.Wqkv.weight", "shape": [ 1920, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "transformer.h.6.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9830400 }, { "name": "transformer.h.6.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9835520 }, { "name": "transformer.h.6.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 9840640 }, { "name": "transformer.h.6.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9861120 }, { "name": "transformer.h.6.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9866240 }, { "name": "transformer.h.6.mixer.out_proj.weight", "shape": [ 2560, 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9871360 }, { "name": "transformer.h.6.mixer.Wqkv.bias", "shape": [ 1920 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3840, "byteOffset": 13148160 }, { "name": "transformer.h.6.mixer.Wqkv.weight", "shape": [ 1920, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 13152000 }, { "name": "transformer.h.7.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 22982400 }, { "name": "transformer.h.7.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 22987520 }, { "name": "transformer.h.7.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 22992640 }, { "name": "transformer.h.7.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23013120 }, { "name": "transformer.h.7.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23018240 }, { "name": "transformer.h.7.mixer.out_proj.weight", "shape": [ 2560, 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 23023360 }, { "name": "transformer.h.7.mixer.Wqkv.bias", "shape": [ 1920 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3840, "byteOffset": 26300160 } ], "md5sum": "94ba6f873c00c81cec9389a135172a94" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.8.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "327309c4a1ec9e5f3df05bebde532c70" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.8.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "34fe66a01091b3a28ad3ef1153551b25" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.9.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "c7f2bbf214d1edc8b19c2f006af0afdd" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.9.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "1a231d574ff8a3826c19b7d9e825f2b4" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 26304000, "records": [ { "name": "transformer.h.7.mixer.Wqkv.weight", "shape": [ 1920, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "transformer.h.8.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9830400 }, { "name": "transformer.h.8.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9835520 }, { "name": "transformer.h.8.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 9840640 }, { "name": "transformer.h.8.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9861120 }, { "name": "transformer.h.8.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9866240 }, { "name": "transformer.h.8.mixer.out_proj.weight", "shape": [ 2560, 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9871360 }, { "name": "transformer.h.8.mixer.Wqkv.bias", "shape": [ 1920 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3840, "byteOffset": 13148160 }, { "name": "transformer.h.8.mixer.Wqkv.weight", "shape": [ 1920, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 13152000 }, { "name": "transformer.h.9.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 22982400 }, { "name": "transformer.h.9.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 22987520 }, { "name": "transformer.h.9.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 22992640 }, { "name": "transformer.h.9.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23013120 }, { "name": "transformer.h.9.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23018240 }, { "name": "transformer.h.9.mixer.out_proj.weight", "shape": [ 2560, 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 23023360 }, { "name": "transformer.h.9.mixer.Wqkv.bias", "shape": [ 1920 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3840, "byteOffset": 26300160 } ], "md5sum": "3c2fafaa741c7ebe478cb19365f52442" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.14.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "5872e8dbd1cdb4845470d1d0474ed2da" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.15.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "ebb1819399ea2ba7255684558b22d044" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.15.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "beb90ecf97840c25388feaae5b3a1eeb" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.16.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "64e2abc845f69a976d24f2068cdd477c" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.16.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "b5e5beed9f784121eaa4b70c78efb76b" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 26319360, "records": [ { "name": "transformer.h.9.mixer.Wqkv.weight", "shape": [ 1920, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "transformer.h.14.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9830400 }, { "name": "transformer.h.14.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9835520 }, { "name": "transformer.h.14.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9840640 }, { "name": "transformer.h.15.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9845760 }, { "name": "transformer.h.15.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9850880 }, { "name": "transformer.h.15.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 9856000 }, { "name": "transformer.h.15.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9876480 }, { "name": "transformer.h.15.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9881600 }, { "name": "transformer.h.15.mixer.out_proj.weight", "shape": [ 2560, 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9886720 }, { "name": "transformer.h.15.mixer.Wqkv.bias", "shape": [ 1920 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3840, "byteOffset": 13163520 }, { "name": "transformer.h.15.mixer.Wqkv.weight", "shape": [ 1920, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 13167360 }, { "name": "transformer.h.16.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 22997760 }, { "name": "transformer.h.16.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23002880 }, { "name": "transformer.h.16.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 23008000 }, { "name": "transformer.h.16.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23028480 }, { "name": "transformer.h.16.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23033600 }, { "name": "transformer.h.16.mixer.out_proj.weight", "shape": [ 2560, 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 23038720 }, { "name": "transformer.h.16.mixer.Wqkv.bias", "shape": [ 1920 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3840, "byteOffset": 26315520 } ], "md5sum": "e273300e40456e3a550b4d9a78c48c2b" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.17.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "6f3b18780cad91797c93fa45b85a86a7" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.17.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "86e52c30a3f11bf17d863d12761e4b7b" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.18.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "b913cbe31eb05080139ecff883c69864" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.18.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "fae9535f797876d0666edf53e6db6449" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 26304000, "records": [ { "name": "transformer.h.16.mixer.Wqkv.weight", "shape": [ 1920, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "transformer.h.17.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9830400 }, { "name": "transformer.h.17.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9835520 }, { "name": "transformer.h.17.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 9840640 }, { "name": "transformer.h.17.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9861120 }, { "name": "transformer.h.17.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9866240 }, { "name": "transformer.h.17.mixer.out_proj.weight", "shape": [ 2560, 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9871360 }, { "name": "transformer.h.17.mixer.Wqkv.bias", "shape": [ 1920 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3840, "byteOffset": 13148160 }, { "name": "transformer.h.17.mixer.Wqkv.weight", "shape": [ 1920, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 13152000 }, { "name": "transformer.h.18.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 22982400 }, { "name": "transformer.h.18.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 22987520 }, { "name": "transformer.h.18.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 22992640 }, { "name": "transformer.h.18.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23013120 }, { "name": "transformer.h.18.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23018240 }, { "name": "transformer.h.18.mixer.out_proj.weight", "shape": [ 2560, 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 23023360 }, { "name": "transformer.h.18.mixer.Wqkv.bias", "shape": [ 1920 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3840, "byteOffset": 26300160 } ], "md5sum": "2ba1eafa04c4caf1bf129aa6db290ca6" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.19.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "e2c902dcb420f5c8cc55e678376ebdc4" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.19.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "71ea52dea7b1b442a2f1da92156d9d3f" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.20.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "343e7eef37a242ef1719f27c2c662c60" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.20.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "14cdda37b9dcccf904d96cf04280404d" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 26304000, "records": [ { "name": "transformer.h.18.mixer.Wqkv.weight", "shape": [ 1920, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "transformer.h.19.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9830400 }, { "name": "transformer.h.19.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9835520 }, { "name": "transformer.h.19.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 9840640 }, { "name": "transformer.h.19.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9861120 }, { "name": "transformer.h.19.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9866240 }, { "name": "transformer.h.19.mixer.out_proj.weight", "shape": [ 2560, 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9871360 }, { "name": "transformer.h.19.mixer.Wqkv.bias", "shape": [ 1920 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3840, "byteOffset": 13148160 }, { "name": "transformer.h.19.mixer.Wqkv.weight", "shape": [ 1920, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 13152000 }, { "name": "transformer.h.20.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 22982400 }, { "name": "transformer.h.20.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 22987520 }, { "name": "transformer.h.20.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 22992640 }, { "name": "transformer.h.20.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23013120 }, { "name": "transformer.h.20.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23018240 }, { "name": "transformer.h.20.mixer.out_proj.weight", "shape": [ 2560, 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 23023360 }, { "name": "transformer.h.20.mixer.Wqkv.bias", "shape": [ 1920 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3840, "byteOffset": 26300160 } ], "md5sum": "0ad45f216f510ecb47aaf7d28d2c998c" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.21.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "99879209630b84ada840c1157bb92324" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.21.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "03b469bafc73b560b206a2cb0211e725" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.22.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "5bd5b0d996dbac57ef1c61ce4f5a774d" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.22.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "4c942820a1410dec5bf15cbb15357e11" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 26304000, "records": [ { "name": "transformer.h.20.mixer.Wqkv.weight", "shape": [ 1920, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "transformer.h.21.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9830400 }, { "name": "transformer.h.21.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9835520 }, { "name": "transformer.h.21.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 9840640 }, { "name": "transformer.h.21.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9861120 }, { "name": "transformer.h.21.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9866240 }, { "name": "transformer.h.21.mixer.out_proj.weight", "shape": [ 2560, 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9871360 }, { "name": "transformer.h.21.mixer.Wqkv.bias", "shape": [ 1920 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3840, "byteOffset": 13148160 }, { "name": "transformer.h.21.mixer.Wqkv.weight", "shape": [ 1920, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 13152000 }, { "name": "transformer.h.22.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 22982400 }, { "name": "transformer.h.22.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 22987520 }, { "name": "transformer.h.22.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 22992640 }, { "name": "transformer.h.22.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23013120 }, { "name": "transformer.h.22.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23018240 }, { "name": "transformer.h.22.mixer.out_proj.weight", "shape": [ 2560, 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 23023360 }, { "name": "transformer.h.22.mixer.Wqkv.bias", "shape": [ 1920 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3840, "byteOffset": 26300160 } ], "md5sum": "8b7ebd68a6c0ee436af6566c39af6b42" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.23.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "7eaeee3a74339fe092fd73d561b35710" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.23.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "34e8de47310f3d87008c085ba70ff33c" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.24.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "f57b3e81eb281f16aaa4c9d55203da7f" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.24.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "f31736218cfe9ad3479c692756a2aca6" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 26304000, "records": [ { "name": "transformer.h.22.mixer.Wqkv.weight", "shape": [ 1920, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "transformer.h.23.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9830400 }, { "name": "transformer.h.23.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9835520 }, { "name": "transformer.h.23.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 9840640 }, { "name": "transformer.h.23.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9861120 }, { "name": "transformer.h.23.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9866240 }, { "name": "transformer.h.23.mixer.out_proj.weight", "shape": [ 2560, 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9871360 }, { "name": "transformer.h.23.mixer.Wqkv.bias", "shape": [ 1920 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3840, "byteOffset": 13148160 }, { "name": "transformer.h.23.mixer.Wqkv.weight", "shape": [ 1920, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 13152000 }, { "name": "transformer.h.24.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 22982400 }, { "name": "transformer.h.24.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 22987520 }, { "name": "transformer.h.24.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 22992640 }, { "name": "transformer.h.24.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23013120 }, { "name": "transformer.h.24.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23018240 }, { "name": "transformer.h.24.mixer.out_proj.weight", "shape": [ 2560, 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 23023360 }, { "name": "transformer.h.24.mixer.Wqkv.bias", "shape": [ 1920 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3840, "byteOffset": 26300160 } ], "md5sum": "e0364658dc38ff9dd0c11c2f35a2b308" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.25.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "de0b9bfb5aca71fc8645f7f73b0a40f4" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.25.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "344532f2b63189d1207727bb5a0cdceb" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.26.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "bd493e86bd7af49ace1c0f5c59b5e16d" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.26.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "c09105f6dc6fa66c08e3904452773660" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 26304000, "records": [ { "name": "transformer.h.24.mixer.Wqkv.weight", "shape": [ 1920, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "transformer.h.25.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9830400 }, { "name": "transformer.h.25.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9835520 }, { "name": "transformer.h.25.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 9840640 }, { "name": "transformer.h.25.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9861120 }, { "name": "transformer.h.25.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9866240 }, { "name": "transformer.h.25.mixer.out_proj.weight", "shape": [ 2560, 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9871360 }, { "name": "transformer.h.25.mixer.Wqkv.bias", "shape": [ 1920 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3840, "byteOffset": 13148160 }, { "name": "transformer.h.25.mixer.Wqkv.weight", "shape": [ 1920, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 13152000 }, { "name": "transformer.h.26.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 22982400 }, { "name": "transformer.h.26.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 22987520 }, { "name": "transformer.h.26.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 22992640 }, { "name": "transformer.h.26.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23013120 }, { "name": "transformer.h.26.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23018240 }, { "name": "transformer.h.26.mixer.out_proj.weight", "shape": [ 2560, 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 23023360 }, { "name": "transformer.h.26.mixer.Wqkv.bias", "shape": [ 1920 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3840, "byteOffset": 26300160 } ], "md5sum": "67cc5b7deb7a6ab658bc8a61d5d3218c" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.27.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "9c70aa55124f7ac2583c3ad70846eabd" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.27.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "accc65787d53e2825f2db36dfd12ae20" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.28.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "d1ab9190ecc43be15da3e2f39326651f" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.28.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "c25a5b813451672307ac3ec8487a9fca" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 26304000, "records": [ { "name": "transformer.h.26.mixer.Wqkv.weight", "shape": [ 1920, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "transformer.h.27.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9830400 }, { "name": "transformer.h.27.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9835520 }, { "name": "transformer.h.27.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 9840640 }, { "name": "transformer.h.27.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9861120 }, { "name": "transformer.h.27.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9866240 }, { "name": "transformer.h.27.mixer.out_proj.weight", "shape": [ 2560, 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9871360 }, { "name": "transformer.h.27.mixer.Wqkv.bias", "shape": [ 1920 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3840, "byteOffset": 13148160 }, { "name": "transformer.h.27.mixer.Wqkv.weight", "shape": [ 1920, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 13152000 }, { "name": "transformer.h.28.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 22982400 }, { "name": "transformer.h.28.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 22987520 }, { "name": "transformer.h.28.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 22992640 }, { "name": "transformer.h.28.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23013120 }, { "name": "transformer.h.28.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23018240 }, { "name": "transformer.h.28.mixer.out_proj.weight", "shape": [ 2560, 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 23023360 }, { "name": "transformer.h.28.mixer.Wqkv.bias", "shape": [ 1920 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3840, "byteOffset": 26300160 } ], "md5sum": "112f1bdd0398b672709c03aa83cfe677" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.29.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "2bf8fdd040827bf1ce443d974b155ea2" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.29.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "14fd5ce1ad0eaaaa45ecc7173169af95" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.30.mlp.fc1.weight", "shape": [ 10240, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "97b87e746f7d162db5bef69b670d83d9" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "transformer.h.30.mlp.fc2.weight", "shape": [ 2560, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "e43c1723fdb3ea46ffa7b6c0b9b65d7b" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 26304000, "records": [ { "name": "transformer.h.28.mixer.Wqkv.weight", "shape": [ 1920, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "transformer.h.29.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9830400 }, { "name": "transformer.h.29.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9835520 }, { "name": "transformer.h.29.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 9840640 }, { "name": "transformer.h.29.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9861120 }, { "name": "transformer.h.29.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9866240 }, { "name": "transformer.h.29.mixer.out_proj.weight", "shape": [ 2560, 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9871360 }, { "name": "transformer.h.29.mixer.Wqkv.bias", "shape": [ 1920 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3840, "byteOffset": 13148160 }, { "name": "transformer.h.29.mixer.Wqkv.weight", "shape": [ 1920, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 13152000 }, { "name": "transformer.h.30.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 22982400 }, { "name": "transformer.h.30.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 22987520 }, { "name": "transformer.h.30.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 22992640 }, { "name": "transformer.h.30.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23013120 }, { "name": "transformer.h.30.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 23018240 }, { "name": "transformer.h.30.mixer.out_proj.weight", "shape": [ 2560, 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 23023360 }, { "name": "transformer.h.30.mixer.Wqkv.bias", "shape": [ 1920 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3840, "byteOffset": 26300160 } ], "md5sum": "108cc3c01c0da74fedf752f3cd37a088" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 22946560, "records": [ { "name": "transformer.h.30.mixer.Wqkv.weight", "shape": [ 1920, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "transformer.h.31.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9830400 }, { "name": "transformer.h.31.mixer.out_proj.weight", "shape": [ 2560, 640 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9835520 }, { "name": "transformer.h.31.mixer.Wqkv.bias", "shape": [ 1920 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3840, "byteOffset": 13112320 }, { "name": "transformer.h.31.mixer.Wqkv.weight", "shape": [ 1920, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 13116160 } ], "md5sum": "af432cb8e09a87fa3dda25cd617d0ae7" } ] }