| { | |
| "metadata": { | |
| "ParamSize": 195, | |
| "ParamBytes": 4351336448.0, | |
| "BitsPerParam": 16.0 | |
| }, | |
| "records": [ | |
| { | |
| "dataPath": "params_shard_0.bin", | |
| "format": "raw-shard", | |
| "nbytes": 1050673152, | |
| "records": [ | |
| { | |
| "name": "lm_head.weight", | |
| "shape": [ | |
| 128256, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1050673152, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a6945d49d5d076ecd0be132b67ce3cc2" | |
| }, | |
| { | |
| "dataPath": "params_shard_1.bin", | |
| "format": "raw-shard", | |
| "nbytes": 1050673152, | |
| "records": [ | |
| { | |
| "name": "model.embed_tokens.weight", | |
| "shape": [ | |
| 128256, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1050673152, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f2d28da53eec7a4bfb2c19fb349ee23d" | |
| }, | |
| { | |
| "dataPath": "params_shard_2.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32882688, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 4014, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32882688, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4218ddc28a6c0151e5458ba7060bc329" | |
| }, | |
| { | |
| "dataPath": "params_shard_3.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29040640, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.0.mlp.down_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 2007 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16441344, | |
| "byteOffset": 8192 | |
| }, | |
| { | |
| "name": "model.layers.0.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 16449536 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 16457728 | |
| } | |
| ], | |
| "md5sum": "090ec36bde00654f6601bc09247a1b65" | |
| }, | |
| { | |
| "dataPath": "params_shard_4.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32882688, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 4014, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32882688, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e7759d97a1d4cc522112872d2c8161a2" | |
| }, | |
| { | |
| "dataPath": "params_shard_5.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24846336, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.self_attn.o_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.1.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.1.mlp.down_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 2007 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16441344, | |
| "byteOffset": 8396800 | |
| }, | |
| { | |
| "name": "model.layers.1.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24838144 | |
| } | |
| ], | |
| "md5sum": "9809570a155894469f3cc79405dc5a8d" | |
| }, | |
| { | |
| "dataPath": "params_shard_6.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20979712, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.o_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.10.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 20971520 | |
| } | |
| ], | |
| "md5sum": "291eda3b1e4b9f31532375e52c9407e7" | |
| }, | |
| { | |
| "dataPath": "params_shard_7.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32882688, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 4014, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32882688, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "561cf7d055b90f2cfd15bdc5db58a259" | |
| }, | |
| { | |
| "dataPath": "params_shard_8.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29032448, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.mlp.down_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 2007 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16441344, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.10.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 16441344 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 16449536 | |
| } | |
| ], | |
| "md5sum": "c1a523898a3dfb8bfef948dde4cf7470" | |
| }, | |
| { | |
| "dataPath": "params_shard_9.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32882688, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 4014, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32882688, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9c53f05ac65676773f007f265304887c" | |
| }, | |
| { | |
| "dataPath": "params_shard_10.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24846336, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.self_attn.o_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.11.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.11.mlp.down_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 2007 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16441344, | |
| "byteOffset": 8396800 | |
| }, | |
| { | |
| "name": "model.layers.11.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24838144 | |
| } | |
| ], | |
| "md5sum": "c653f88e4a80f89211dbe7fd5ff3c4d6" | |
| }, | |
| { | |
| "dataPath": "params_shard_11.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20979712, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.o_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.12.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 20971520 | |
| } | |
| ], | |
| "md5sum": "a06d69662fe87e6d7a19eb249b498fb1" | |
| }, | |
| { | |
| "dataPath": "params_shard_12.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32882688, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 4014, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32882688, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f84ebe5cbed27d71559a7e534acdc581" | |
| }, | |
| { | |
| "dataPath": "params_shard_13.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29032448, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.mlp.down_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 2007 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16441344, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.12.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 16441344 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 16449536 | |
| } | |
| ], | |
| "md5sum": "9ebb199a29e44baa71cfff1e721fa134" | |
| }, | |
| { | |
| "dataPath": "params_shard_14.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29368320, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.self_attn.o_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.o_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 20971520 | |
| }, | |
| { | |
| "name": "model.layers.2.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 29360128 | |
| } | |
| ], | |
| "md5sum": "0bb56185c02b5b8c711e2425d8025d01" | |
| }, | |
| { | |
| "dataPath": "params_shard_15.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32882688, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 4014, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32882688, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "aef8de5e993758b15eb5b6bd644e9e2c" | |
| }, | |
| { | |
| "dataPath": "params_shard_16.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29032448, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.mlp.down_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 2007 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16441344, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.2.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 16441344 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 16449536 | |
| } | |
| ], | |
| "md5sum": "1f955ff730579ec27c0ba9f2725af6a8" | |
| }, | |
| { | |
| "dataPath": "params_shard_17.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32882688, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 4014, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32882688, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "fb91b80d103ac9c19a0fe9014c8906f3" | |
| }, | |
| { | |
| "dataPath": "params_shard_18.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24846336, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.self_attn.o_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.3.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.3.mlp.down_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 2007 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16441344, | |
| "byteOffset": 8396800 | |
| }, | |
| { | |
| "name": "model.layers.3.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24838144 | |
| } | |
| ], | |
| "md5sum": "d8afc57a17e14926dfed09115af611ea" | |
| }, | |
| { | |
| "dataPath": "params_shard_19.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20979712, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.o_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.4.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 20971520 | |
| } | |
| ], | |
| "md5sum": "7f9df5d17af89ac535a773afa6ed0aad" | |
| }, | |
| { | |
| "dataPath": "params_shard_20.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32882688, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 4014, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32882688, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8fbbae308233d5b8dcefdf356b59ac36" | |
| }, | |
| { | |
| "dataPath": "params_shard_21.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29032448, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.mlp.down_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 2007 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16441344, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.4.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 16441344 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 16449536 | |
| } | |
| ], | |
| "md5sum": "7b84900cc51857acc42b967e3d17436d" | |
| }, | |
| { | |
| "dataPath": "params_shard_22.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32882688, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 4014, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32882688, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5782254d44ead3738e149c25e65c894e" | |
| }, | |
| { | |
| "dataPath": "params_shard_23.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24846336, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.self_attn.o_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.5.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.5.mlp.down_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 2007 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16441344, | |
| "byteOffset": 8396800 | |
| }, | |
| { | |
| "name": "model.layers.5.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24838144 | |
| } | |
| ], | |
| "md5sum": "1bbb109c73616ee7dc5ad55db9278bec" | |
| }, | |
| { | |
| "dataPath": "params_shard_24.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20979712, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.o_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.6.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 20971520 | |
| } | |
| ], | |
| "md5sum": "5a9ff46dec05f8dfed5ec08c03df20a4" | |
| }, | |
| { | |
| "dataPath": "params_shard_25.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32882688, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 4014, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32882688, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c556c40103406d7b625b4cef4b6e4e0c" | |
| }, | |
| { | |
| "dataPath": "params_shard_26.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29032448, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.mlp.down_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 2007 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16441344, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.6.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 16441344 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 16449536 | |
| } | |
| ], | |
| "md5sum": "8b2c3718766480f0b7ead3c0aa0f3194" | |
| }, | |
| { | |
| "dataPath": "params_shard_27.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32882688, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 4014, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32882688, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "7e5fcef182a8fc479eafd028327f3029" | |
| }, | |
| { | |
| "dataPath": "params_shard_28.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24846336, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.self_attn.o_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.7.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.7.mlp.down_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 2007 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16441344, | |
| "byteOffset": 8396800 | |
| }, | |
| { | |
| "name": "model.layers.7.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24838144 | |
| } | |
| ], | |
| "md5sum": "a09ce869e92a907c189ac390c43cdb42" | |
| }, | |
| { | |
| "dataPath": "params_shard_29.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20979712, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.o_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.8.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 20971520 | |
| } | |
| ], | |
| "md5sum": "fe059ab06443ff0d20b60761f2947c05" | |
| }, | |
| { | |
| "dataPath": "params_shard_30.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32882688, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 4014, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32882688, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0f1ccdced576f38fd1984f81f4e6a86e" | |
| }, | |
| { | |
| "dataPath": "params_shard_31.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29032448, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.mlp.down_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 2007 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16441344, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.8.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 16441344 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 16449536 | |
| } | |
| ], | |
| "md5sum": "14b13e11322c24b9d2fb5d91cbbc1bcb" | |
| }, | |
| { | |
| "dataPath": "params_shard_32.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32882688, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 4014, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32882688, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "7ce20ffcea44113c0e4da6eff81ab645" | |
| }, | |
| { | |
| "dataPath": "params_shard_33.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24846336, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.self_attn.o_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.9.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.9.mlp.down_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 2007 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16441344, | |
| "byteOffset": 8396800 | |
| }, | |
| { | |
| "name": "model.layers.9.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24838144 | |
| } | |
| ], | |
| "md5sum": "3e5c42660a330f61d5ea023f9ff21112" | |
| }, | |
| { | |
| "dataPath": "params_shard_34.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20979712, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.o_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.13.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 20971520 | |
| } | |
| ], | |
| "md5sum": "b0e99aa7cf74a0f7059e845dd938a7c3" | |
| }, | |
| { | |
| "dataPath": "params_shard_35.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32882688, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 4014, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32882688, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "329bfb01649ae4fd1572e51c24fae37a" | |
| }, | |
| { | |
| "dataPath": "params_shard_36.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32882688, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 4014, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32882688, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9ead9b7622937a99bfc19fb6add515e5" | |
| }, | |
| { | |
| "dataPath": "params_shard_37.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32907264, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.mlp.down_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 2007 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16441344, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.13.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 16441344 | |
| }, | |
| { | |
| "name": "model.layers.14.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 16449536 | |
| }, | |
| { | |
| "name": "model.layers.14.mlp.down_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 2007 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16441344, | |
| "byteOffset": 16457728 | |
| }, | |
| { | |
| "name": "model.layers.14.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 32899072 | |
| } | |
| ], | |
| "md5sum": "334a516d63f4ecc4b90a282302f7e344" | |
| }, | |
| { | |
| "dataPath": "params_shard_38.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20979712, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.o_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.15.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 20971520 | |
| } | |
| ], | |
| "md5sum": "12b8fd93a259e49cc1da9888bff7dcb9" | |
| }, | |
| { | |
| "dataPath": "params_shard_39.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32882688, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 4014, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32882688, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "eeb68f51f4a8fd42ef0b1bbf6bbc0445" | |
| }, | |
| { | |
| "dataPath": "params_shard_40.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29032448, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.mlp.down_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 2007 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16441344, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.15.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 16441344 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 16449536 | |
| } | |
| ], | |
| "md5sum": "4f51374c1bdaaa7ea44f1ce54ab93ed5" | |
| }, | |
| { | |
| "dataPath": "params_shard_41.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32882688, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 4014, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32882688, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b7df8e3f453390e6639429ed65796e9e" | |
| }, | |
| { | |
| "dataPath": "params_shard_42.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24846336, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.self_attn.o_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.16.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.16.mlp.down_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 2007 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16441344, | |
| "byteOffset": 8396800 | |
| }, | |
| { | |
| "name": "model.layers.16.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24838144 | |
| } | |
| ], | |
| "md5sum": "10490442548d03e5bdd4794992f558e1" | |
| }, | |
| { | |
| "dataPath": "params_shard_43.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20979712, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.o_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.17.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 20971520 | |
| } | |
| ], | |
| "md5sum": "d888f03e1c6f16df666fd1573e62117b" | |
| }, | |
| { | |
| "dataPath": "params_shard_44.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32882688, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 4014, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32882688, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c6acc6f3919997f064b0d3e13d43f89e" | |
| }, | |
| { | |
| "dataPath": "params_shard_45.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29032448, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.mlp.down_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 2007 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16441344, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.17.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 16441344 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 16449536 | |
| } | |
| ], | |
| "md5sum": "aa361d5bf8afbb6d768c6717ea06b172" | |
| }, | |
| { | |
| "dataPath": "params_shard_46.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32882688, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 4014, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32882688, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2a775fb2ffb4d399f6f2504b1ee9eab1" | |
| }, | |
| { | |
| "dataPath": "params_shard_47.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24846336, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.self_attn.o_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.18.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.18.mlp.down_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 2007 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16441344, | |
| "byteOffset": 8396800 | |
| }, | |
| { | |
| "name": "model.layers.18.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24838144 | |
| } | |
| ], | |
| "md5sum": "76a7fbef1c176da272685519016863aa" | |
| }, | |
| { | |
| "dataPath": "params_shard_48.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20979712, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.o_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.19.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 20971520 | |
| } | |
| ], | |
| "md5sum": "3e6f4454d15d71856777d4c48f4ecc09" | |
| }, | |
| { | |
| "dataPath": "params_shard_49.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32882688, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 4014, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32882688, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2b65d9043261724c95b5508a0fef14c5" | |
| }, | |
| { | |
| "dataPath": "params_shard_50.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29032448, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.mlp.down_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 2007 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16441344, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.19.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 16441344 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 16449536 | |
| } | |
| ], | |
| "md5sum": "44a2689a1c921c4d80b8e6684f25cd8f" | |
| }, | |
| { | |
| "dataPath": "params_shard_51.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32882688, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 4014, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32882688, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "738c9fed2a8d44d354553619baa2ddee" | |
| }, | |
| { | |
| "dataPath": "params_shard_52.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24846336, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.self_attn.o_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.20.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.20.mlp.down_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 2007 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16441344, | |
| "byteOffset": 8396800 | |
| }, | |
| { | |
| "name": "model.layers.20.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24838144 | |
| } | |
| ], | |
| "md5sum": "ef142e9f5988f1daab6502bd02c5f07d" | |
| }, | |
| { | |
| "dataPath": "params_shard_53.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20979712, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.o_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.21.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 20971520 | |
| } | |
| ], | |
| "md5sum": "a5c7a2cf1b431a400ac8101f6f940f5e" | |
| }, | |
| { | |
| "dataPath": "params_shard_54.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32882688, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 4014, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32882688, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "460612c5574cb3060b7218c717120af1" | |
| }, | |
| { | |
| "dataPath": "params_shard_55.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29032448, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.mlp.down_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 2007 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16441344, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.21.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 16441344 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 16449536 | |
| } | |
| ], | |
| "md5sum": "c70b0882804570266a24b2df87e4384a" | |
| }, | |
| { | |
| "dataPath": "params_shard_56.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32882688, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 4014, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32882688, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6a1174b9cd0306313f1d4f823d2fa5f7" | |
| }, | |
| { | |
| "dataPath": "params_shard_57.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24846336, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.self_attn.o_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.22.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.22.mlp.down_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 2007 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16441344, | |
| "byteOffset": 8396800 | |
| }, | |
| { | |
| "name": "model.layers.22.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24838144 | |
| } | |
| ], | |
| "md5sum": "a629ec45894de8675367d79a49af73df" | |
| }, | |
| { | |
| "dataPath": "params_shard_58.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20979712, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.o_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.23.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 20971520 | |
| } | |
| ], | |
| "md5sum": "d4c35f96a6ff96d674a1592ab87a128d" | |
| }, | |
| { | |
| "dataPath": "params_shard_59.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32882688, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 4014, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32882688, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "29bbbc27a9ce703927cc98bcbdab12f4" | |
| }, | |
| { | |
| "dataPath": "params_shard_60.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29032448, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.mlp.down_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 2007 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16441344, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.23.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 16441344 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 16449536 | |
| } | |
| ], | |
| "md5sum": "5c803b0d1c1099f21ce2a8b6ad1c8d74" | |
| }, | |
| { | |
| "dataPath": "params_shard_61.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32882688, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 4014, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32882688, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5127783bfc9f7c2a33dc77583fa40856" | |
| }, | |
| { | |
| "dataPath": "params_shard_62.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24846336, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.self_attn.o_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.24.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.24.mlp.down_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 2007 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16441344, | |
| "byteOffset": 8396800 | |
| }, | |
| { | |
| "name": "model.layers.24.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24838144 | |
| } | |
| ], | |
| "md5sum": "7eda786cee32e8afef5af73a4fa4d5ff" | |
| }, | |
| { | |
| "dataPath": "params_shard_63.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20979712, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.o_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.25.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 20971520 | |
| } | |
| ], | |
| "md5sum": "bba3a157abf834b900707a51b31da797" | |
| }, | |
| { | |
| "dataPath": "params_shard_64.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32882688, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 4014, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32882688, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a7a7026dbe640ad0ce0e57590607999c" | |
| }, | |
| { | |
| "dataPath": "params_shard_65.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29032448, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.mlp.down_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 2007 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16441344, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.25.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 16441344 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 16449536 | |
| } | |
| ], | |
| "md5sum": "c9f61caa7b4abfd58f59e155b52d9cfc" | |
| }, | |
| { | |
| "dataPath": "params_shard_66.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32882688, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 4014, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32882688, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "de1e6c2f8abdb1f5e4140d9136c6601c" | |
| }, | |
| { | |
| "dataPath": "params_shard_67.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24846336, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.self_attn.o_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.26.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.26.mlp.down_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 2007 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16441344, | |
| "byteOffset": 8396800 | |
| }, | |
| { | |
| "name": "model.layers.26.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24838144 | |
| } | |
| ], | |
| "md5sum": "4bd1412e5f2385ef7b522688f1a5bfe2" | |
| }, | |
| { | |
| "dataPath": "params_shard_68.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20979712, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.o_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.27.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 20971520 | |
| } | |
| ], | |
| "md5sum": "74eec2216834e1dc51ac51fc2231f274" | |
| }, | |
| { | |
| "dataPath": "params_shard_69.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32882688, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 4014, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32882688, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "83df1774febbc8c39919872ccd3db2e9" | |
| }, | |
| { | |
| "dataPath": "params_shard_70.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29032448, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.mlp.down_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 2007 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16441344, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.27.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 16441344 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 16449536 | |
| } | |
| ], | |
| "md5sum": "54eba7af61e9fef7f41ec1930d34db26" | |
| }, | |
| { | |
| "dataPath": "params_shard_71.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32882688, | |
| "records": [ | |
| { | |
| "name": "model.layers.28.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 4014, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32882688, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "25b9a6790c83cf06cc5912d3df0885f3" | |
| }, | |
| { | |
| "dataPath": "params_shard_72.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24846336, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.self_attn.o_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.28.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.28.mlp.down_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 2007 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16441344, | |
| "byteOffset": 8396800 | |
| }, | |
| { | |
| "name": "model.layers.28.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24838144 | |
| } | |
| ], | |
| "md5sum": "7c599262b59fc75d06bb1944aba844b7" | |
| }, | |
| { | |
| "dataPath": "params_shard_73.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20979712, | |
| "records": [ | |
| { | |
| "name": "model.layers.28.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.28.self_attn.o_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.29.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 20971520 | |
| } | |
| ], | |
| "md5sum": "aceb741d6e80f0951a4192738aa79822" | |
| }, | |
| { | |
| "dataPath": "params_shard_74.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32882688, | |
| "records": [ | |
| { | |
| "name": "model.layers.29.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 4014, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32882688, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d2c5e9964d28e1cb9aadde6a73b27e30" | |
| }, | |
| { | |
| "dataPath": "params_shard_75.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29032448, | |
| "records": [ | |
| { | |
| "name": "model.layers.29.mlp.down_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 2007 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16441344, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.29.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 16441344 | |
| }, | |
| { | |
| "name": "model.layers.29.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 16449536 | |
| } | |
| ], | |
| "md5sum": "400237f6c3166c8be47ce13a6a65fff4" | |
| }, | |
| { | |
| "dataPath": "params_shard_76.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32882688, | |
| "records": [ | |
| { | |
| "name": "model.layers.30.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 4014, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32882688, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "623ddc2870e252822e9857b22f2a7b8f" | |
| }, | |
| { | |
| "dataPath": "params_shard_77.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24846336, | |
| "records": [ | |
| { | |
| "name": "model.layers.29.self_attn.o_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.30.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.30.mlp.down_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 2007 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16441344, | |
| "byteOffset": 8396800 | |
| }, | |
| { | |
| "name": "model.layers.30.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24838144 | |
| } | |
| ], | |
| "md5sum": "3cc73317e96fc604524871e150a15676" | |
| }, | |
| { | |
| "dataPath": "params_shard_78.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20979712, | |
| "records": [ | |
| { | |
| "name": "model.layers.30.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.30.self_attn.o_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.31.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 20971520 | |
| } | |
| ], | |
| "md5sum": "94c1e7223dab086b2a0e221ce7372dce" | |
| }, | |
| { | |
| "dataPath": "params_shard_79.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32882688, | |
| "records": [ | |
| { | |
| "name": "model.layers.31.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 4014, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32882688, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4f58d86fd51763a2812ee020c952c543" | |
| }, | |
| { | |
| "dataPath": "params_shard_80.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29032448, | |
| "records": [ | |
| { | |
| "name": "model.layers.31.mlp.down_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 2007 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16441344, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.31.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 16441344 | |
| }, | |
| { | |
| "name": "model.layers.31.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 1536, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 16449536 | |
| } | |
| ], | |
| "md5sum": "e5be9dbae7631dd8d285a9261efd7ea5" | |
| }, | |
| { | |
| "dataPath": "params_shard_81.bin", | |
| "format": "raw-shard", | |
| "nbytes": 8396800, | |
| "records": [ | |
| { | |
| "name": "model.layers.31.self_attn.o_proj.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.norm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 8388608 | |
| } | |
| ], | |
| "md5sum": "d622b6c2b03f084636e78fc408f5a3b4" | |
| } | |
| ] | |
| } |