{ "metadata": { "ParamSize": 195, "ParamBytes": 4351336448.0, "BitsPerParam": 16.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 1050673152, "records": [ { "name": "lm_head.weight", "shape": [ 128256, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1050673152, "byteOffset": 0 } ], "md5sum": "a6945d49d5d076ecd0be132b67ce3cc2" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 1050673152, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 128256, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1050673152, "byteOffset": 0 } ], "md5sum": "f2d28da53eec7a4bfb2c19fb349ee23d" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 32882688, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.weight", "shape": [ 4014, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32882688, "byteOffset": 0 } ], "md5sum": "4218ddc28a6c0151e5458ba7060bc329" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 29040640, "records": [ { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 0 }, { "name": "model.layers.0.mlp.down_proj.weight", "shape": [ 4096, 2007 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16441344, "byteOffset": 8192 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16449536 }, { "name": "model.layers.0.self_attn.qkv_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 16457728 } ], "md5sum": "090ec36bde00654f6601bc09247a1b65" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 32882688, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.weight", "shape": [ 4014, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32882688, "byteOffset": 0 } ], "md5sum": "e7759d97a1d4cc522112872d2c8161a2" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 24846336, "records": [ { "name": "model.layers.0.self_attn.o_proj.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8388608 }, { "name": "model.layers.1.mlp.down_proj.weight", "shape": [ 4096, 2007 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16441344, "byteOffset": 8396800 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24838144 } ], "md5sum": "9809570a155894469f3cc79405dc5a8d" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_proj.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20971520 } ], "md5sum": "291eda3b1e4b9f31532375e52c9407e7" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 32882688, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.weight", "shape": [ 4014, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32882688, "byteOffset": 0 } ], "md5sum": "561cf7d055b90f2cfd15bdc5db58a259" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 29032448, "records": [ { "name": "model.layers.10.mlp.down_proj.weight", "shape": [ 4096, 2007 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16441344, "byteOffset": 0 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16441344 }, { "name": "model.layers.10.self_attn.qkv_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 16449536 } ], "md5sum": "c1a523898a3dfb8bfef948dde4cf7470" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 32882688, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.weight", "shape": [ 4014, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32882688, "byteOffset": 0 } ], "md5sum": "9c53f05ac65676773f007f265304887c" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 24846336, "records": [ { "name": "model.layers.10.self_attn.o_proj.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8388608 }, { "name": "model.layers.11.mlp.down_proj.weight", "shape": [ 4096, 2007 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16441344, "byteOffset": 8396800 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24838144 } ], "md5sum": "c653f88e4a80f89211dbe7fd5ff3c4d6" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20971520 } ], "md5sum": "a06d69662fe87e6d7a19eb249b498fb1" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 32882688, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.weight", "shape": [ 4014, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32882688, "byteOffset": 0 } ], "md5sum": "f84ebe5cbed27d71559a7e534acdc581" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 29032448, "records": [ { "name": "model.layers.12.mlp.down_proj.weight", "shape": [ 4096, 2007 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16441344, "byteOffset": 0 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16441344 }, { "name": "model.layers.12.self_attn.qkv_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 16449536 } ], "md5sum": "9ebb199a29e44baa71cfff1e721fa134" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 29368320, "records": [ { "name": "model.layers.12.self_attn.o_proj.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.qkv_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 8388608 }, { "name": "model.layers.13.self_attn.o_proj.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 20971520 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29360128 } ], "md5sum": "0bb56185c02b5b8c711e2425d8025d01" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 32882688, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.weight", "shape": [ 4014, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32882688, "byteOffset": 0 } ], "md5sum": "aef8de5e993758b15eb5b6bd644e9e2c" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 29032448, "records": [ { "name": "model.layers.2.mlp.down_proj.weight", "shape": [ 4096, 2007 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16441344, "byteOffset": 0 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16441344 }, { "name": "model.layers.2.self_attn.qkv_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 16449536 } ], "md5sum": "1f955ff730579ec27c0ba9f2725af6a8" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 32882688, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.weight", "shape": [ 4014, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32882688, "byteOffset": 0 } ], "md5sum": "fb91b80d103ac9c19a0fe9014c8906f3" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 24846336, "records": [ { "name": "model.layers.2.self_attn.o_proj.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8388608 }, { "name": "model.layers.3.mlp.down_proj.weight", "shape": [ 4096, 2007 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16441344, "byteOffset": 8396800 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24838144 } ], "md5sum": "d8afc57a17e14926dfed09115af611ea" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.o_proj.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20971520 } ], "md5sum": "7f9df5d17af89ac535a773afa6ed0aad" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 32882688, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.weight", "shape": [ 4014, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32882688, "byteOffset": 0 } ], "md5sum": "8fbbae308233d5b8dcefdf356b59ac36" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 29032448, "records": [ { "name": "model.layers.4.mlp.down_proj.weight", "shape": [ 4096, 2007 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16441344, "byteOffset": 0 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16441344 }, { "name": "model.layers.4.self_attn.qkv_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 16449536 } ], "md5sum": "7b84900cc51857acc42b967e3d17436d" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 32882688, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.weight", "shape": [ 4014, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32882688, "byteOffset": 0 } ], "md5sum": "5782254d44ead3738e149c25e65c894e" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 24846336, "records": [ { "name": "model.layers.4.self_attn.o_proj.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8388608 }, { "name": "model.layers.5.mlp.down_proj.weight", "shape": [ 4096, 2007 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16441344, "byteOffset": 8396800 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24838144 } ], "md5sum": "1bbb109c73616ee7dc5ad55db9278bec" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.o_proj.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20971520 } ], "md5sum": "5a9ff46dec05f8dfed5ec08c03df20a4" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 32882688, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.weight", "shape": [ 4014, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32882688, "byteOffset": 0 } ], "md5sum": "c556c40103406d7b625b4cef4b6e4e0c" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 29032448, "records": [ { "name": "model.layers.6.mlp.down_proj.weight", "shape": [ 4096, 2007 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16441344, "byteOffset": 0 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16441344 }, { "name": "model.layers.6.self_attn.qkv_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 16449536 } ], "md5sum": "8b2c3718766480f0b7ead3c0aa0f3194" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 32882688, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.weight", "shape": [ 4014, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32882688, "byteOffset": 0 } ], "md5sum": "7e5fcef182a8fc479eafd028327f3029" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 24846336, "records": [ { "name": "model.layers.6.self_attn.o_proj.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8388608 }, { "name": "model.layers.7.mlp.down_proj.weight", "shape": [ 4096, 2007 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16441344, "byteOffset": 8396800 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24838144 } ], "md5sum": "a09ce869e92a907c189ac390c43cdb42" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.o_proj.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20971520 } ], "md5sum": "fe059ab06443ff0d20b60761f2947c05" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 32882688, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.weight", "shape": [ 4014, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32882688, "byteOffset": 0 } ], "md5sum": "0f1ccdced576f38fd1984f81f4e6a86e" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 29032448, "records": [ { "name": "model.layers.8.mlp.down_proj.weight", "shape": [ 4096, 2007 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16441344, "byteOffset": 0 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16441344 }, { "name": "model.layers.8.self_attn.qkv_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 16449536 } ], "md5sum": "14b13e11322c24b9d2fb5d91cbbc1bcb" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 32882688, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.weight", "shape": [ 4014, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32882688, "byteOffset": 0 } ], "md5sum": "7ce20ffcea44113c0e4da6eff81ab645" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 24846336, "records": [ { "name": "model.layers.8.self_attn.o_proj.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8388608 }, { "name": "model.layers.9.mlp.down_proj.weight", "shape": [ 4096, 2007 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16441344, "byteOffset": 8396800 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24838144 } ], "md5sum": "3e5c42660a330f61d5ea023f9ff21112" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.o_proj.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20971520 } ], "md5sum": "b0e99aa7cf74a0f7059e845dd938a7c3" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 32882688, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.weight", "shape": [ 4014, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32882688, "byteOffset": 0 } ], "md5sum": "329bfb01649ae4fd1572e51c24fae37a" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 32882688, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.weight", "shape": [ 4014, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32882688, "byteOffset": 0 } ], "md5sum": "9ead9b7622937a99bfc19fb6add515e5" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 32907264, "records": [ { "name": "model.layers.13.mlp.down_proj.weight", "shape": [ 4096, 2007 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16441344, "byteOffset": 0 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16441344 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16449536 }, { "name": "model.layers.14.mlp.down_proj.weight", "shape": [ 4096, 2007 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16441344, "byteOffset": 16457728 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32899072 } ], "md5sum": "334a516d63f4ecc4b90a282302f7e344" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20971520 } ], "md5sum": "12b8fd93a259e49cc1da9888bff7dcb9" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 32882688, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.weight", "shape": [ 4014, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32882688, "byteOffset": 0 } ], "md5sum": "eeb68f51f4a8fd42ef0b1bbf6bbc0445" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 29032448, "records": [ { "name": "model.layers.15.mlp.down_proj.weight", "shape": [ 4096, 2007 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16441344, "byteOffset": 0 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16441344 }, { "name": "model.layers.15.self_attn.qkv_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 16449536 } ], "md5sum": "4f51374c1bdaaa7ea44f1ce54ab93ed5" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 32882688, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.weight", "shape": [ 4014, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32882688, "byteOffset": 0 } ], "md5sum": "b7df8e3f453390e6639429ed65796e9e" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 24846336, "records": [ { "name": "model.layers.15.self_attn.o_proj.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8388608 }, { "name": "model.layers.16.mlp.down_proj.weight", "shape": [ 4096, 2007 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16441344, "byteOffset": 8396800 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24838144 } ], "md5sum": "10490442548d03e5bdd4794992f558e1" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.o_proj.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20971520 } ], "md5sum": "d888f03e1c6f16df666fd1573e62117b" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 32882688, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.weight", "shape": [ 4014, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32882688, "byteOffset": 0 } ], "md5sum": "c6acc6f3919997f064b0d3e13d43f89e" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 29032448, "records": [ { "name": "model.layers.17.mlp.down_proj.weight", "shape": [ 4096, 2007 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16441344, "byteOffset": 0 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16441344 }, { "name": "model.layers.17.self_attn.qkv_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 16449536 } ], "md5sum": "aa361d5bf8afbb6d768c6717ea06b172" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 32882688, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.weight", "shape": [ 4014, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32882688, "byteOffset": 0 } ], "md5sum": "2a775fb2ffb4d399f6f2504b1ee9eab1" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 24846336, "records": [ { "name": "model.layers.17.self_attn.o_proj.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8388608 }, { "name": "model.layers.18.mlp.down_proj.weight", "shape": [ 4096, 2007 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16441344, "byteOffset": 8396800 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24838144 } ], "md5sum": "76a7fbef1c176da272685519016863aa" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.o_proj.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20971520 } ], "md5sum": "3e6f4454d15d71856777d4c48f4ecc09" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 32882688, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.weight", "shape": [ 4014, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32882688, "byteOffset": 0 } ], "md5sum": "2b65d9043261724c95b5508a0fef14c5" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 29032448, "records": [ { "name": "model.layers.19.mlp.down_proj.weight", "shape": [ 4096, 2007 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16441344, "byteOffset": 0 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16441344 }, { "name": "model.layers.19.self_attn.qkv_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 16449536 } ], "md5sum": "44a2689a1c921c4d80b8e6684f25cd8f" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 32882688, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.weight", "shape": [ 4014, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32882688, "byteOffset": 0 } ], "md5sum": "738c9fed2a8d44d354553619baa2ddee" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 24846336, "records": [ { "name": "model.layers.19.self_attn.o_proj.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8388608 }, { "name": "model.layers.20.mlp.down_proj.weight", "shape": [ 4096, 2007 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16441344, "byteOffset": 8396800 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24838144 } ], "md5sum": "ef142e9f5988f1daab6502bd02c5f07d" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.o_proj.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20971520 } ], "md5sum": "a5c7a2cf1b431a400ac8101f6f940f5e" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 32882688, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.weight", "shape": [ 4014, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32882688, "byteOffset": 0 } ], "md5sum": "460612c5574cb3060b7218c717120af1" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 29032448, "records": [ { "name": "model.layers.21.mlp.down_proj.weight", "shape": [ 4096, 2007 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16441344, "byteOffset": 0 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16441344 }, { "name": "model.layers.21.self_attn.qkv_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 16449536 } ], "md5sum": "c70b0882804570266a24b2df87e4384a" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 32882688, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.weight", "shape": [ 4014, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32882688, "byteOffset": 0 } ], "md5sum": "6a1174b9cd0306313f1d4f823d2fa5f7" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 24846336, "records": [ { "name": "model.layers.21.self_attn.o_proj.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8388608 }, { "name": "model.layers.22.mlp.down_proj.weight", "shape": [ 4096, 2007 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16441344, "byteOffset": 8396800 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24838144 } ], "md5sum": "a629ec45894de8675367d79a49af73df" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.o_proj.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20971520 } ], "md5sum": "d4c35f96a6ff96d674a1592ab87a128d" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 32882688, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.weight", "shape": [ 4014, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32882688, "byteOffset": 0 } ], "md5sum": "29bbbc27a9ce703927cc98bcbdab12f4" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 29032448, "records": [ { "name": "model.layers.23.mlp.down_proj.weight", "shape": [ 4096, 2007 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16441344, "byteOffset": 0 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16441344 }, { "name": "model.layers.23.self_attn.qkv_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 16449536 } ], "md5sum": "5c803b0d1c1099f21ce2a8b6ad1c8d74" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 32882688, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.weight", "shape": [ 4014, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32882688, "byteOffset": 0 } ], "md5sum": "5127783bfc9f7c2a33dc77583fa40856" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 24846336, "records": [ { "name": "model.layers.23.self_attn.o_proj.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8388608 }, { "name": "model.layers.24.mlp.down_proj.weight", "shape": [ 4096, 2007 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16441344, "byteOffset": 8396800 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24838144 } ], "md5sum": "7eda786cee32e8afef5af73a4fa4d5ff" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.o_proj.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20971520 } ], "md5sum": "bba3a157abf834b900707a51b31da797" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 32882688, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.weight", "shape": [ 4014, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32882688, "byteOffset": 0 } ], "md5sum": "a7a7026dbe640ad0ce0e57590607999c" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 29032448, "records": [ { "name": "model.layers.25.mlp.down_proj.weight", "shape": [ 4096, 2007 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16441344, "byteOffset": 0 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16441344 }, { "name": "model.layers.25.self_attn.qkv_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 16449536 } ], "md5sum": "c9f61caa7b4abfd58f59e155b52d9cfc" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 32882688, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.weight", "shape": [ 4014, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32882688, "byteOffset": 0 } ], "md5sum": "de1e6c2f8abdb1f5e4140d9136c6601c" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 24846336, "records": [ { "name": "model.layers.25.self_attn.o_proj.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8388608 }, { "name": "model.layers.26.mlp.down_proj.weight", "shape": [ 4096, 2007 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16441344, "byteOffset": 8396800 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24838144 } ], "md5sum": "4bd1412e5f2385ef7b522688f1a5bfe2" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.o_proj.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20971520 } ], "md5sum": "74eec2216834e1dc51ac51fc2231f274" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 32882688, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.weight", "shape": [ 4014, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32882688, "byteOffset": 0 } ], "md5sum": "83df1774febbc8c39919872ccd3db2e9" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 29032448, "records": [ { "name": "model.layers.27.mlp.down_proj.weight", "shape": [ 4096, 2007 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16441344, "byteOffset": 0 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16441344 }, { "name": "model.layers.27.self_attn.qkv_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 16449536 } ], "md5sum": "54eba7af61e9fef7f41ec1930d34db26" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 32882688, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.weight", "shape": [ 4014, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32882688, "byteOffset": 0 } ], "md5sum": "25b9a6790c83cf06cc5912d3df0885f3" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 24846336, "records": [ { "name": "model.layers.27.self_attn.o_proj.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8388608 }, { "name": "model.layers.28.mlp.down_proj.weight", "shape": [ 4096, 2007 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16441344, "byteOffset": 8396800 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24838144 } ], "md5sum": "7c599262b59fc75d06bb1944aba844b7" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.o_proj.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20971520 } ], "md5sum": "aceb741d6e80f0951a4192738aa79822" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 32882688, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.weight", "shape": [ 4014, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32882688, "byteOffset": 0 } ], "md5sum": "d2c5e9964d28e1cb9aadde6a73b27e30" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 29032448, "records": [ { "name": "model.layers.29.mlp.down_proj.weight", "shape": [ 4096, 2007 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16441344, "byteOffset": 0 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16441344 }, { "name": "model.layers.29.self_attn.qkv_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 16449536 } ], "md5sum": "400237f6c3166c8be47ce13a6a65fff4" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 32882688, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.weight", "shape": [ 4014, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32882688, "byteOffset": 0 } ], "md5sum": "623ddc2870e252822e9857b22f2a7b8f" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 24846336, "records": [ { "name": "model.layers.29.self_attn.o_proj.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8388608 }, { "name": "model.layers.30.mlp.down_proj.weight", "shape": [ 4096, 2007 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16441344, "byteOffset": 8396800 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24838144 } ], "md5sum": "3cc73317e96fc604524871e150a15676" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.o_proj.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20971520 } ], "md5sum": "94c1e7223dab086b2a0e221ce7372dce" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 32882688, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.weight", "shape": [ 4014, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32882688, "byteOffset": 0 } ], "md5sum": "4f58d86fd51763a2812ee020c952c543" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 29032448, "records": [ { "name": "model.layers.31.mlp.down_proj.weight", "shape": [ 4096, 2007 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16441344, "byteOffset": 0 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16441344 }, { "name": "model.layers.31.self_attn.qkv_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 16449536 } ], "md5sum": "e5be9dbae7631dd8d285a9261efd7ea5" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 8396800, "records": [ { "name": "model.layers.31.self_attn.o_proj.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8388608 } ], "md5sum": "d622b6c2b03f084636e78fc408f5a3b4" } ] }