| { | |
| "metadata": { | |
| "ParamSize": 110, | |
| "ParamBytes": 5012344832.0, | |
| "BitsPerParam": 16.0 | |
| }, | |
| "records": [ | |
| { | |
| "dataPath": "params_shard_0.bin", | |
| "format": "raw-shard", | |
| "nbytes": 1048576000, | |
| "records": [ | |
| { | |
| "name": "model.embed_tokens.weight", | |
| "shape": [ | |
| 256000, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576000, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "206485d87a22f62128d5b2494bafe7db" | |
| }, | |
| { | |
| "dataPath": "params_shard_1.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67108864, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.mlp.down_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67108864, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0af16f8f82b18456b292f26c2e72f63b" | |
| }, | |
| { | |
| "dataPath": "params_shard_2.bin", | |
| "format": "raw-shard", | |
| "nbytes": 134217728, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 32768, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 134217728, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2ba31ef002c5e642855d91dbaf959171" | |
| }, | |
| { | |
| "dataPath": "params_shard_3.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67108864, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.mlp.down_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67108864, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c7d1d36c62d66bea4696e965fff1917e" | |
| }, | |
| { | |
| "dataPath": "params_shard_4.bin", | |
| "format": "raw-shard", | |
| "nbytes": 134217728, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 32768, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 134217728, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "362ff38d501c9881a63569d0cfa88605" | |
| }, | |
| { | |
| "dataPath": "params_shard_5.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29376512, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.input_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.0.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 4096 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10485760, | |
| "byteOffset": 8192 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 10493952 | |
| }, | |
| { | |
| "name": "model.layers.1.input_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18882560 | |
| }, | |
| { | |
| "name": "model.layers.1.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18886656 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10485760, | |
| "byteOffset": 18890752 | |
| } | |
| ], | |
| "md5sum": "5d30e8d1452ab145a0564794c26cd791" | |
| }, | |
| { | |
| "dataPath": "params_shard_6.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67108864, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.mlp.down_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67108864, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "fe63b3dbe2543296f8112fac2ade5290" | |
| }, | |
| { | |
| "dataPath": "params_shard_7.bin", | |
| "format": "raw-shard", | |
| "nbytes": 134217728, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 32768, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 134217728, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "629331e7d43d063ae03d036060febb5f" | |
| }, | |
| { | |
| "dataPath": "params_shard_8.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67108864, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.mlp.down_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67108864, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "cd3f99f63fc39b1170222cb6f4717b72" | |
| }, | |
| { | |
| "dataPath": "params_shard_9.bin", | |
| "format": "raw-shard", | |
| "nbytes": 134217728, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 32768, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 134217728, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c9193e83b6a383f990b1d7d703a6f80f" | |
| }, | |
| { | |
| "dataPath": "params_shard_10.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27279360, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.10.input_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.10.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8392704 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10485760, | |
| "byteOffset": 8396800 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 18882560 | |
| }, | |
| { | |
| "name": "model.layers.11.input_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 27271168 | |
| }, | |
| { | |
| "name": "model.layers.11.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 27275264 | |
| } | |
| ], | |
| "md5sum": "59d0b4f1bd55ceeda6e5ab96633e9451" | |
| }, | |
| { | |
| "dataPath": "params_shard_11.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67108864, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.mlp.down_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67108864, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "820b534e89d40aa248659ca0081f0674" | |
| }, | |
| { | |
| "dataPath": "params_shard_12.bin", | |
| "format": "raw-shard", | |
| "nbytes": 134217728, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 32768, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 134217728, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "256dabcc684ebacdfdbbcfa0adc035e1" | |
| }, | |
| { | |
| "dataPath": "params_shard_13.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29368320, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10485760, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 10485760 | |
| }, | |
| { | |
| "name": "model.layers.12.input_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18874368 | |
| }, | |
| { | |
| "name": "model.layers.12.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18878464 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10485760, | |
| "byteOffset": 18882560 | |
| } | |
| ], | |
| "md5sum": "7d790e2cfa6142474ac62a0ec5aea10c" | |
| }, | |
| { | |
| "dataPath": "params_shard_14.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67108864, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.mlp.down_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67108864, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "908e1855eb654380dfcd3cfd85a1e631" | |
| }, | |
| { | |
| "dataPath": "params_shard_15.bin", | |
| "format": "raw-shard", | |
| "nbytes": 134217728, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 32768, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 134217728, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ea87b38d7fde1e77b55f18aa11c0ed31" | |
| }, | |
| { | |
| "dataPath": "params_shard_16.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67108864, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.mlp.down_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67108864, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f795202d6edb9f663561fa0cf03fa27a" | |
| }, | |
| { | |
| "dataPath": "params_shard_17.bin", | |
| "format": "raw-shard", | |
| "nbytes": 134217728, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 32768, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 134217728, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "88378b8b2905d8b91937c001c73403cb" | |
| }, | |
| { | |
| "dataPath": "params_shard_18.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27279360, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.13.input_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.13.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8392704 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10485760, | |
| "byteOffset": 8396800 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 18882560 | |
| }, | |
| { | |
| "name": "model.layers.14.input_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 27271168 | |
| }, | |
| { | |
| "name": "model.layers.14.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 27275264 | |
| } | |
| ], | |
| "md5sum": "071c71fc33170a2d3a1a2ae1f03b140d" | |
| }, | |
| { | |
| "dataPath": "params_shard_19.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67108864, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.mlp.down_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67108864, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b58913c0a9d042f1469b2c4929efb8d3" | |
| }, | |
| { | |
| "dataPath": "params_shard_20.bin", | |
| "format": "raw-shard", | |
| "nbytes": 134217728, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 32768, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 134217728, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "7f3594a3e54595cf7f92b5901e0f6465" | |
| }, | |
| { | |
| "dataPath": "params_shard_21.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29368320, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10485760, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 10485760 | |
| }, | |
| { | |
| "name": "model.layers.15.input_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18874368 | |
| }, | |
| { | |
| "name": "model.layers.15.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18878464 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10485760, | |
| "byteOffset": 18882560 | |
| } | |
| ], | |
| "md5sum": "b7c100b48dd50547c5c74f03a68863aa" | |
| }, | |
| { | |
| "dataPath": "params_shard_22.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67108864, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.mlp.down_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67108864, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "497e575ba76a1ae6e33393fd7c9c4c12" | |
| }, | |
| { | |
| "dataPath": "params_shard_23.bin", | |
| "format": "raw-shard", | |
| "nbytes": 134217728, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 32768, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 134217728, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "7e13a326187148b0ea54878998af1dbe" | |
| }, | |
| { | |
| "dataPath": "params_shard_24.bin", | |
| "format": "raw-shard", | |
| "nbytes": 134217728, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 32768, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 134217728, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "3e8d823c128094965bc0b63996bd30e0" | |
| }, | |
| { | |
| "dataPath": "params_shard_25.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27271168, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.16.input_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.16.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8392704 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10485760, | |
| "byteOffset": 8396800 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 18882560 | |
| } | |
| ], | |
| "md5sum": "c51345fdd445c9c55206b86ee31d704a" | |
| }, | |
| { | |
| "dataPath": "params_shard_26.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67108864, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.mlp.down_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67108864, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "708872d176d1deddd88435c817e93d2f" | |
| }, | |
| { | |
| "dataPath": "params_shard_27.bin", | |
| "format": "raw-shard", | |
| "nbytes": 134217728, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 32768, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 134217728, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0eef3929b28890b79faf9d02dfa2487f" | |
| }, | |
| { | |
| "dataPath": "params_shard_28.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29368320, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10485760, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 10485760 | |
| }, | |
| { | |
| "name": "model.layers.2.input_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18874368 | |
| }, | |
| { | |
| "name": "model.layers.2.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18878464 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10485760, | |
| "byteOffset": 18882560 | |
| } | |
| ], | |
| "md5sum": "6328df0ffee1d1160e0fc6fa300f4c9a" | |
| }, | |
| { | |
| "dataPath": "params_shard_29.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67108864, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.mlp.down_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67108864, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "30f748096a7ec18a6e349c49a7d14822" | |
| }, | |
| { | |
| "dataPath": "params_shard_30.bin", | |
| "format": "raw-shard", | |
| "nbytes": 134217728, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 32768, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 134217728, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a15a4fe50c6c5b78f36ec6351732a345" | |
| }, | |
| { | |
| "dataPath": "params_shard_31.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67108864, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.mlp.down_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67108864, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5b697bbb99453187d0eee6b6127d2008" | |
| }, | |
| { | |
| "dataPath": "params_shard_32.bin", | |
| "format": "raw-shard", | |
| "nbytes": 134217728, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 32768, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 134217728, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "76327282d65a9c80614e17129cb9e5f2" | |
| }, | |
| { | |
| "dataPath": "params_shard_33.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27279360, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.3.input_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.3.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8392704 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10485760, | |
| "byteOffset": 8396800 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 18882560 | |
| }, | |
| { | |
| "name": "model.layers.4.input_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 27271168 | |
| }, | |
| { | |
| "name": "model.layers.4.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 27275264 | |
| } | |
| ], | |
| "md5sum": "1f73ecde85ffc44fa98fba7534c55356" | |
| }, | |
| { | |
| "dataPath": "params_shard_34.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67108864, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.mlp.down_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67108864, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c983dfde03cb18ee85a7d80d50f4d165" | |
| }, | |
| { | |
| "dataPath": "params_shard_35.bin", | |
| "format": "raw-shard", | |
| "nbytes": 134217728, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 32768, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 134217728, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "afb45d7056b90f804e2c75489be5792e" | |
| }, | |
| { | |
| "dataPath": "params_shard_36.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29368320, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10485760, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 10485760 | |
| }, | |
| { | |
| "name": "model.layers.5.input_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18874368 | |
| }, | |
| { | |
| "name": "model.layers.5.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18878464 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10485760, | |
| "byteOffset": 18882560 | |
| } | |
| ], | |
| "md5sum": "a8c9cd2cc4d6179ffa0d181051ef4f7e" | |
| }, | |
| { | |
| "dataPath": "params_shard_37.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67108864, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.mlp.down_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67108864, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "89e90749de11be2bc1ee29c5450a4f3d" | |
| }, | |
| { | |
| "dataPath": "params_shard_38.bin", | |
| "format": "raw-shard", | |
| "nbytes": 134217728, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 32768, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 134217728, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "bde36e490b2c3a905f40e1285f4873dd" | |
| }, | |
| { | |
| "dataPath": "params_shard_39.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67108864, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.mlp.down_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67108864, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "eefc32a2ccf311c1a95e01ca979aaa9f" | |
| }, | |
| { | |
| "dataPath": "params_shard_40.bin", | |
| "format": "raw-shard", | |
| "nbytes": 134217728, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 32768, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 134217728, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c31b3371a8c2294135e993653a25d153" | |
| }, | |
| { | |
| "dataPath": "params_shard_41.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27279360, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.6.input_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.6.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8392704 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10485760, | |
| "byteOffset": 8396800 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 18882560 | |
| }, | |
| { | |
| "name": "model.layers.7.input_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 27271168 | |
| }, | |
| { | |
| "name": "model.layers.7.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 27275264 | |
| } | |
| ], | |
| "md5sum": "a9a738d8b541d5d1517049e641a72c6f" | |
| }, | |
| { | |
| "dataPath": "params_shard_42.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67108864, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.mlp.down_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67108864, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b4d55621d295f8693b39a501ea581847" | |
| }, | |
| { | |
| "dataPath": "params_shard_43.bin", | |
| "format": "raw-shard", | |
| "nbytes": 134217728, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 32768, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 134217728, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1031e7308cb30a67747b65b4fdaa94af" | |
| }, | |
| { | |
| "dataPath": "params_shard_44.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29368320, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10485760, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 10485760 | |
| }, | |
| { | |
| "name": "model.layers.8.input_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18874368 | |
| }, | |
| { | |
| "name": "model.layers.8.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18878464 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10485760, | |
| "byteOffset": 18882560 | |
| } | |
| ], | |
| "md5sum": "f74050232fb038c2c717ca99af7cdada" | |
| }, | |
| { | |
| "dataPath": "params_shard_45.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67108864, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.mlp.down_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67108864, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2639e2fc57901dac06e9d4183ce2ee34" | |
| }, | |
| { | |
| "dataPath": "params_shard_46.bin", | |
| "format": "raw-shard", | |
| "nbytes": 134217728, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 32768, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 134217728, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "cdcce0c795b9df58125fb24aa22f62e8" | |
| }, | |
| { | |
| "dataPath": "params_shard_47.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67108864, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.mlp.down_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67108864, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "135969d29acd56ae5e9ac553cb476528" | |
| }, | |
| { | |
| "dataPath": "params_shard_48.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27283456, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.9.input_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.9.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8392704 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.qkv_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10485760, | |
| "byteOffset": 8396800 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2048, | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 18882560 | |
| }, | |
| { | |
| "name": "model.layers.17.input_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 27271168 | |
| }, | |
| { | |
| "name": "model.layers.17.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 27275264 | |
| }, | |
| { | |
| "name": "model.norm.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 27279360 | |
| } | |
| ], | |
| "md5sum": "7bca8fca53ef5ba8a977e4733d66824a" | |
| } | |
| ] | |
| } |