| { |
| "metadata": { |
| "ParamSize": 325, |
| "ParamBytes": 3790815232.0, |
| "BitsPerParam": 4.5004543650347415 |
| }, |
| "records": [ |
| { |
| "dataPath": "params_shard_0.bin", |
| "format": "raw-shard", |
| "nbytes": 65568768, |
| "records": [ |
| { |
| "name": "lm_head.q_weight", |
| "shape": [ |
| 32016, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 65568768, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "0b40c4f40719f541ac1d1ce8664c05a6" |
| }, |
| { |
| "dataPath": "params_shard_1.bin", |
| "format": "raw-shard", |
| "nbytes": 30748672, |
| "records": [ |
| { |
| "name": "lm_head.q_scale", |
| "shape": [ |
| 32016, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8196096, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.23.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 8196096 |
| }, |
| { |
| "name": "model.layers.23.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1376 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 22544384, |
| "byteOffset": 8204288 |
| } |
| ], |
| "md5sum": "b033ee68c284f58ec94e0fd5c3f9074a" |
| }, |
| { |
| "dataPath": "params_shard_2.bin", |
| "format": "raw-shard", |
| "nbytes": 45088768, |
| "records": [ |
| { |
| "name": "model.layers.24.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 45088768, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b537fdcc746d385fc23c662c255e7875" |
| }, |
| { |
| "dataPath": "params_shard_3.bin", |
| "format": "raw-shard", |
| "nbytes": 28196864, |
| "records": [ |
| { |
| "name": "model.layers.23.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 344 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2818048, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.23.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 2818048 |
| }, |
| { |
| "name": "model.layers.24.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 2826240 |
| }, |
| { |
| "name": "model.layers.24.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1376 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 22544384, |
| "byteOffset": 2834432 |
| }, |
| { |
| "name": "model.layers.24.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 344 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2818048, |
| "byteOffset": 25378816 |
| } |
| ], |
| "md5sum": "21ebc7956b910f02288bc82c2c524d0f" |
| }, |
| { |
| "dataPath": "params_shard_4.bin", |
| "format": "raw-shard", |
| "nbytes": 30810112, |
| "records": [ |
| { |
| "name": "model.layers.24.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 5636096, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.24.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 5636096 |
| }, |
| { |
| "name": "model.layers.24.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 5644288 |
| } |
| ], |
| "md5sum": "eee8d28ed5fbbec56da84e207177b03a" |
| }, |
| { |
| "dataPath": "params_shard_5.bin", |
| "format": "raw-shard", |
| "nbytes": 22544384, |
| "records": [ |
| { |
| "name": "model.layers.25.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1376 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 22544384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "3d1a8cbd4569f1557d365da03beba032" |
| }, |
| { |
| "dataPath": "params_shard_6.bin", |
| "format": "raw-shard", |
| "nbytes": 45088768, |
| "records": [ |
| { |
| "name": "model.layers.25.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 45088768, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "e982dab41729e0629307d3958c37198c" |
| }, |
| { |
| "dataPath": "params_shard_7.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "model.layers.25.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "6953d552fff006243bf12ae86f2fb1e0" |
| }, |
| { |
| "dataPath": "params_shard_8.bin", |
| "format": "raw-shard", |
| "nbytes": 32587776, |
| "records": [ |
| { |
| "name": "model.layers.24.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.24.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 3145728 |
| }, |
| { |
| "name": "model.layers.24.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 11534336 |
| }, |
| { |
| "name": "model.layers.25.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 12582912 |
| }, |
| { |
| "name": "model.layers.25.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 344 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2818048, |
| "byteOffset": 12591104 |
| }, |
| { |
| "name": "model.layers.25.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 5636096, |
| "byteOffset": 15409152 |
| }, |
| { |
| "name": "model.layers.25.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 21045248 |
| }, |
| { |
| "name": "model.layers.25.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 21053440 |
| }, |
| { |
| "name": "model.layers.25.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 24199168 |
| } |
| ], |
| "md5sum": "04763dd1fc3c8dc325b242c3454803f2" |
| }, |
| { |
| "dataPath": "params_shard_9.bin", |
| "format": "raw-shard", |
| "nbytes": 45088768, |
| "records": [ |
| { |
| "name": "model.layers.26.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 45088768, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f01292364644db047ca9f33343365783" |
| }, |
| { |
| "dataPath": "params_shard_10.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "model.layers.26.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "9e6a3989385106de5de09728679a1859" |
| }, |
| { |
| "dataPath": "params_shard_11.bin", |
| "format": "raw-shard", |
| "nbytes": 32063488, |
| "records": [ |
| { |
| "name": "model.layers.25.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.26.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 1048576 |
| }, |
| { |
| "name": "model.layers.26.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1376 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 22544384, |
| "byteOffset": 1056768 |
| }, |
| { |
| "name": "model.layers.26.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 344 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2818048, |
| "byteOffset": 23601152 |
| }, |
| { |
| "name": "model.layers.26.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 5636096, |
| "byteOffset": 26419200 |
| }, |
| { |
| "name": "model.layers.26.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 32055296 |
| } |
| ], |
| "md5sum": "fc2cc41a16f88ad1c58f2861a15a06e9" |
| }, |
| { |
| "dataPath": "params_shard_12.bin", |
| "format": "raw-shard", |
| "nbytes": 22544384, |
| "records": [ |
| { |
| "name": "model.layers.27.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1376 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 22544384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "87ec7dc9a2824a6fe850e085d6918c45" |
| }, |
| { |
| "dataPath": "params_shard_13.bin", |
| "format": "raw-shard", |
| "nbytes": 45088768, |
| "records": [ |
| { |
| "name": "model.layers.27.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 45088768, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "2860483be8d8f37820432faf319bff0f" |
| }, |
| { |
| "dataPath": "params_shard_14.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "model.layers.27.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "86e2e0144bf09700d351a886ea430744" |
| }, |
| { |
| "dataPath": "params_shard_15.bin", |
| "format": "raw-shard", |
| "nbytes": 32587776, |
| "records": [ |
| { |
| "name": "model.layers.26.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.26.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 3145728 |
| }, |
| { |
| "name": "model.layers.26.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 11534336 |
| }, |
| { |
| "name": "model.layers.27.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 12582912 |
| }, |
| { |
| "name": "model.layers.27.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 344 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2818048, |
| "byteOffset": 12591104 |
| }, |
| { |
| "name": "model.layers.27.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 5636096, |
| "byteOffset": 15409152 |
| }, |
| { |
| "name": "model.layers.27.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 21045248 |
| }, |
| { |
| "name": "model.layers.27.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 21053440 |
| }, |
| { |
| "name": "model.layers.27.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 24199168 |
| } |
| ], |
| "md5sum": "a6eed458a8c9e0e225abb31c02a87584" |
| }, |
| { |
| "dataPath": "params_shard_16.bin", |
| "format": "raw-shard", |
| "nbytes": 45088768, |
| "records": [ |
| { |
| "name": "model.layers.28.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 45088768, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "76aaed755fc3871a247f76a6a8d55494" |
| }, |
| { |
| "dataPath": "params_shard_17.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "model.layers.28.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "0975bae092526cff8507553b5e3afcf1" |
| }, |
| { |
| "dataPath": "params_shard_18.bin", |
| "format": "raw-shard", |
| "nbytes": 32063488, |
| "records": [ |
| { |
| "name": "model.layers.27.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.28.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 1048576 |
| }, |
| { |
| "name": "model.layers.28.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1376 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 22544384, |
| "byteOffset": 1056768 |
| }, |
| { |
| "name": "model.layers.28.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 344 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2818048, |
| "byteOffset": 23601152 |
| }, |
| { |
| "name": "model.layers.28.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 5636096, |
| "byteOffset": 26419200 |
| }, |
| { |
| "name": "model.layers.28.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 32055296 |
| } |
| ], |
| "md5sum": "4968ee74f90636db5f830a26b4fafe6d" |
| }, |
| { |
| "dataPath": "params_shard_19.bin", |
| "format": "raw-shard", |
| "nbytes": 22544384, |
| "records": [ |
| { |
| "name": "model.layers.29.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1376 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 22544384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b1b9545696960a317656bbf4a9d8128a" |
| }, |
| { |
| "dataPath": "params_shard_20.bin", |
| "format": "raw-shard", |
| "nbytes": 45088768, |
| "records": [ |
| { |
| "name": "model.layers.29.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 45088768, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "e51a4319945c8d5ce052a50fd66f87b4" |
| }, |
| { |
| "dataPath": "params_shard_21.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "model.layers.29.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "935b4f43432433166630a4fed8bf329b" |
| }, |
| { |
| "dataPath": "params_shard_22.bin", |
| "format": "raw-shard", |
| "nbytes": 32587776, |
| "records": [ |
| { |
| "name": "model.layers.28.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.28.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 3145728 |
| }, |
| { |
| "name": "model.layers.28.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 11534336 |
| }, |
| { |
| "name": "model.layers.29.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 12582912 |
| }, |
| { |
| "name": "model.layers.29.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 344 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2818048, |
| "byteOffset": 12591104 |
| }, |
| { |
| "name": "model.layers.29.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 5636096, |
| "byteOffset": 15409152 |
| }, |
| { |
| "name": "model.layers.29.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 21045248 |
| }, |
| { |
| "name": "model.layers.29.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 21053440 |
| }, |
| { |
| "name": "model.layers.29.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 24199168 |
| } |
| ], |
| "md5sum": "40aba240b3cd2340aca41ff63b69ed01" |
| }, |
| { |
| "dataPath": "params_shard_23.bin", |
| "format": "raw-shard", |
| "nbytes": 45088768, |
| "records": [ |
| { |
| "name": "model.layers.30.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 45088768, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f0b0e31b2135abea8e45e5a47f5070ea" |
| }, |
| { |
| "dataPath": "params_shard_24.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "model.layers.30.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "598701b5958f27ce8e301bd857cd9389" |
| }, |
| { |
| "dataPath": "params_shard_25.bin", |
| "format": "raw-shard", |
| "nbytes": 32063488, |
| "records": [ |
| { |
| "name": "model.layers.29.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.30.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 1048576 |
| }, |
| { |
| "name": "model.layers.30.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1376 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 22544384, |
| "byteOffset": 1056768 |
| }, |
| { |
| "name": "model.layers.30.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 344 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2818048, |
| "byteOffset": 23601152 |
| }, |
| { |
| "name": "model.layers.30.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 5636096, |
| "byteOffset": 26419200 |
| }, |
| { |
| "name": "model.layers.30.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 32055296 |
| } |
| ], |
| "md5sum": "106a0f14f43fb3422dbca252fb2a4d3a" |
| }, |
| { |
| "dataPath": "params_shard_26.bin", |
| "format": "raw-shard", |
| "nbytes": 22544384, |
| "records": [ |
| { |
| "name": "model.layers.31.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1376 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 22544384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "148102f5c2296b49bf82fc88b0f01e5a" |
| }, |
| { |
| "dataPath": "params_shard_27.bin", |
| "format": "raw-shard", |
| "nbytes": 45088768, |
| "records": [ |
| { |
| "name": "model.layers.31.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 45088768, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "2e21f134ac8aae17f39a6ba05e0590fb" |
| }, |
| { |
| "dataPath": "params_shard_28.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "model.layers.31.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "e51446a00c00de35dc06d62c72715890" |
| }, |
| { |
| "dataPath": "params_shard_29.bin", |
| "format": "raw-shard", |
| "nbytes": 32587776, |
| "records": [ |
| { |
| "name": "model.layers.30.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.30.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 3145728 |
| }, |
| { |
| "name": "model.layers.30.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 11534336 |
| }, |
| { |
| "name": "model.layers.31.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 12582912 |
| }, |
| { |
| "name": "model.layers.31.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 344 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2818048, |
| "byteOffset": 12591104 |
| }, |
| { |
| "name": "model.layers.31.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 5636096, |
| "byteOffset": 15409152 |
| }, |
| { |
| "name": "model.layers.31.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 21045248 |
| }, |
| { |
| "name": "model.layers.31.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 21053440 |
| }, |
| { |
| "name": "model.layers.31.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 24199168 |
| } |
| ], |
| "md5sum": "e8f234a0d13c97a2e7665fb9601b6767" |
| }, |
| { |
| "dataPath": "params_shard_30.bin", |
| "format": "raw-shard", |
| "nbytes": 65568768, |
| "records": [ |
| { |
| "name": "model.embed_tokens.q_weight", |
| "shape": [ |
| 32016, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 65568768, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "8101e906dc2754662c17d738a420f4df" |
| }, |
| { |
| "dataPath": "params_shard_31.bin", |
| "format": "raw-shard", |
| "nbytes": 31805440, |
| "records": [ |
| { |
| "name": "model.layers.31.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.norm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 1048576 |
| }, |
| { |
| "name": "model.embed_tokens.q_scale", |
| "shape": [ |
| 32016, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8196096, |
| "byteOffset": 1056768 |
| }, |
| { |
| "name": "model.layers.0.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 9252864 |
| }, |
| { |
| "name": "model.layers.0.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1376 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 22544384, |
| "byteOffset": 9261056 |
| } |
| ], |
| "md5sum": "3d1a49fefa6eae3b6b122357e64e2c83" |
| }, |
| { |
| "dataPath": "params_shard_32.bin", |
| "format": "raw-shard", |
| "nbytes": 45088768, |
| "records": [ |
| { |
| "name": "model.layers.0.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 45088768, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "34ce63d6c534e49f09d1c1690080beb6" |
| }, |
| { |
| "dataPath": "params_shard_33.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "model.layers.0.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "cd2aa732097919700614786ab02c622e" |
| }, |
| { |
| "dataPath": "params_shard_34.bin", |
| "format": "raw-shard", |
| "nbytes": 22544384, |
| "records": [ |
| { |
| "name": "model.layers.1.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1376 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 22544384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "3a25d819356fc440bfd6e89aec0f0ab8" |
| }, |
| { |
| "dataPath": "params_shard_35.bin", |
| "format": "raw-shard", |
| "nbytes": 45088768, |
| "records": [ |
| { |
| "name": "model.layers.1.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 45088768, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ee08a0284ac09431fa1d97e55f19ccf9" |
| }, |
| { |
| "dataPath": "params_shard_36.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "model.layers.1.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "0b1e23429a6d65ced6bc9a12bd2b0530" |
| }, |
| { |
| "dataPath": "params_shard_37.bin", |
| "format": "raw-shard", |
| "nbytes": 32661504, |
| "records": [ |
| { |
| "name": "model.layers.0.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 344 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2818048, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.0.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 5636096, |
| "byteOffset": 2818048 |
| }, |
| { |
| "name": "model.layers.0.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 8454144 |
| }, |
| { |
| "name": "model.layers.0.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 8462336 |
| }, |
| { |
| "name": "model.layers.0.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 11608064 |
| }, |
| { |
| "name": "model.layers.0.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 19996672 |
| }, |
| { |
| "name": "model.layers.1.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 21045248 |
| }, |
| { |
| "name": "model.layers.1.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 344 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2818048, |
| "byteOffset": 21053440 |
| }, |
| { |
| "name": "model.layers.1.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 5636096, |
| "byteOffset": 23871488 |
| }, |
| { |
| "name": "model.layers.1.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 29507584 |
| }, |
| { |
| "name": "model.layers.1.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 29515776 |
| } |
| ], |
| "md5sum": "b5317cc011b1bb8a851c01aea36e0949" |
| }, |
| { |
| "dataPath": "params_shard_38.bin", |
| "format": "raw-shard", |
| "nbytes": 31989760, |
| "records": [ |
| { |
| "name": "model.layers.1.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.1.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 8388608 |
| }, |
| { |
| "name": "model.layers.10.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "model.layers.10.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1376 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 22544384, |
| "byteOffset": 9445376 |
| } |
| ], |
| "md5sum": "b762636bed4234802b415c102d0ccd58" |
| }, |
| { |
| "dataPath": "params_shard_39.bin", |
| "format": "raw-shard", |
| "nbytes": 45088768, |
| "records": [ |
| { |
| "name": "model.layers.10.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 45088768, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "d96ec7d6aea42dcf48fabf382c9bac3a" |
| }, |
| { |
| "dataPath": "params_shard_40.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "model.layers.10.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "9b7301e5be3f8f32f8cec838f264a9c7" |
| }, |
| { |
| "dataPath": "params_shard_41.bin", |
| "format": "raw-shard", |
| "nbytes": 45088768, |
| "records": [ |
| { |
| "name": "model.layers.11.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 45088768, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "54008fa41d165568577e3e6721879d50" |
| }, |
| { |
| "dataPath": "params_shard_42.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "model.layers.11.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "6d2e3d84a0b1276ab0d6b723219b37f5" |
| }, |
| { |
| "dataPath": "params_shard_43.bin", |
| "format": "raw-shard", |
| "nbytes": 29827072, |
| "records": [ |
| { |
| "name": "model.layers.10.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 344 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2818048, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.10.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 5636096, |
| "byteOffset": 2818048 |
| }, |
| { |
| "name": "model.layers.10.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 8454144 |
| }, |
| { |
| "name": "model.layers.10.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 8462336 |
| }, |
| { |
| "name": "model.layers.10.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 11608064 |
| }, |
| { |
| "name": "model.layers.10.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 19996672 |
| }, |
| { |
| "name": "model.layers.11.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 5636096, |
| "byteOffset": 21045248 |
| }, |
| { |
| "name": "model.layers.11.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 26681344 |
| } |
| ], |
| "md5sum": "8b5c5080696e61635aaeb715d63c5d1b" |
| }, |
| { |
| "dataPath": "params_shard_44.bin", |
| "format": "raw-shard", |
| "nbytes": 31989760, |
| "records": [ |
| { |
| "name": "model.layers.11.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.11.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 8388608 |
| }, |
| { |
| "name": "model.layers.2.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "model.layers.2.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1376 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 22544384, |
| "byteOffset": 9445376 |
| } |
| ], |
| "md5sum": "945f16c6a649dfc4ee9fe7e4d09f4dfd" |
| }, |
| { |
| "dataPath": "params_shard_45.bin", |
| "format": "raw-shard", |
| "nbytes": 45088768, |
| "records": [ |
| { |
| "name": "model.layers.2.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 45088768, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "59eaf7cb937aa43acf4b1fcc8b4ce219" |
| }, |
| { |
| "dataPath": "params_shard_46.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "model.layers.2.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "2ac9fb02e1bd94a3fbaab1de9fcafa49" |
| }, |
| { |
| "dataPath": "params_shard_47.bin", |
| "format": "raw-shard", |
| "nbytes": 22544384, |
| "records": [ |
| { |
| "name": "model.layers.3.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1376 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 22544384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "108bc77847ca8b1260e1501234489c78" |
| }, |
| { |
| "dataPath": "params_shard_48.bin", |
| "format": "raw-shard", |
| "nbytes": 45088768, |
| "records": [ |
| { |
| "name": "model.layers.3.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 45088768, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "6b0f55a328a7e76d8ecc8ba5bb823a5d" |
| }, |
| { |
| "dataPath": "params_shard_49.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "model.layers.3.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "7efde6a522a8aeff8878037d2be4f51b" |
| }, |
| { |
| "dataPath": "params_shard_50.bin", |
| "format": "raw-shard", |
| "nbytes": 32661504, |
| "records": [ |
| { |
| "name": "model.layers.2.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 344 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2818048, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.2.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 5636096, |
| "byteOffset": 2818048 |
| }, |
| { |
| "name": "model.layers.2.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 8454144 |
| }, |
| { |
| "name": "model.layers.2.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 8462336 |
| }, |
| { |
| "name": "model.layers.2.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 11608064 |
| }, |
| { |
| "name": "model.layers.2.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 19996672 |
| }, |
| { |
| "name": "model.layers.3.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 21045248 |
| }, |
| { |
| "name": "model.layers.3.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 344 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2818048, |
| "byteOffset": 21053440 |
| }, |
| { |
| "name": "model.layers.3.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 5636096, |
| "byteOffset": 23871488 |
| }, |
| { |
| "name": "model.layers.3.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 29507584 |
| }, |
| { |
| "name": "model.layers.3.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 29515776 |
| } |
| ], |
| "md5sum": "449e5d923b5d5b8c4d558382f6989157" |
| }, |
| { |
| "dataPath": "params_shard_51.bin", |
| "format": "raw-shard", |
| "nbytes": 31989760, |
| "records": [ |
| { |
| "name": "model.layers.3.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.3.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 8388608 |
| }, |
| { |
| "name": "model.layers.4.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "model.layers.4.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1376 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 22544384, |
| "byteOffset": 9445376 |
| } |
| ], |
| "md5sum": "5a0c6db67004cf1aaa0b06a0ad22ede1" |
| }, |
| { |
| "dataPath": "params_shard_52.bin", |
| "format": "raw-shard", |
| "nbytes": 45088768, |
| "records": [ |
| { |
| "name": "model.layers.4.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 45088768, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "bd8c642d86406a1cf9819b5092b9ff09" |
| }, |
| { |
| "dataPath": "params_shard_53.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "model.layers.4.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "828bd4a7c40bcfc426a60ddc8c907582" |
| }, |
| { |
| "dataPath": "params_shard_54.bin", |
| "format": "raw-shard", |
| "nbytes": 22544384, |
| "records": [ |
| { |
| "name": "model.layers.5.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1376 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 22544384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "107059db812767c3a22ca4c9fce7f137" |
| }, |
| { |
| "dataPath": "params_shard_55.bin", |
| "format": "raw-shard", |
| "nbytes": 45088768, |
| "records": [ |
| { |
| "name": "model.layers.5.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 45088768, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "389535ef554e39c5aebf9ac5d70b9146" |
| }, |
| { |
| "dataPath": "params_shard_56.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "model.layers.5.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "41a42a71ed053fa0fa9654a3534efde7" |
| }, |
| { |
| "dataPath": "params_shard_57.bin", |
| "format": "raw-shard", |
| "nbytes": 32661504, |
| "records": [ |
| { |
| "name": "model.layers.4.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 344 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2818048, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.4.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 5636096, |
| "byteOffset": 2818048 |
| }, |
| { |
| "name": "model.layers.4.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 8454144 |
| }, |
| { |
| "name": "model.layers.4.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 8462336 |
| }, |
| { |
| "name": "model.layers.4.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 11608064 |
| }, |
| { |
| "name": "model.layers.4.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 19996672 |
| }, |
| { |
| "name": "model.layers.5.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 21045248 |
| }, |
| { |
| "name": "model.layers.5.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 344 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2818048, |
| "byteOffset": 21053440 |
| }, |
| { |
| "name": "model.layers.5.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 5636096, |
| "byteOffset": 23871488 |
| }, |
| { |
| "name": "model.layers.5.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 29507584 |
| }, |
| { |
| "name": "model.layers.5.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 29515776 |
| } |
| ], |
| "md5sum": "ff4d378e4c925c454389961f885dfcd1" |
| }, |
| { |
| "dataPath": "params_shard_58.bin", |
| "format": "raw-shard", |
| "nbytes": 31989760, |
| "records": [ |
| { |
| "name": "model.layers.5.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.5.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 8388608 |
| }, |
| { |
| "name": "model.layers.6.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "model.layers.6.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1376 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 22544384, |
| "byteOffset": 9445376 |
| } |
| ], |
| "md5sum": "cd72af0c54cf8e47ba3afdcaff90092e" |
| }, |
| { |
| "dataPath": "params_shard_59.bin", |
| "format": "raw-shard", |
| "nbytes": 45088768, |
| "records": [ |
| { |
| "name": "model.layers.6.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 45088768, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "60784475ffab01a96c54624b83bbf48e" |
| }, |
| { |
| "dataPath": "params_shard_60.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "model.layers.6.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "99a700327658aa604df7b87bf6d64dad" |
| }, |
| { |
| "dataPath": "params_shard_61.bin", |
| "format": "raw-shard", |
| "nbytes": 22544384, |
| "records": [ |
| { |
| "name": "model.layers.7.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1376 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 22544384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "53305d115ee1412aa68ef5695fcc6b70" |
| }, |
| { |
| "dataPath": "params_shard_62.bin", |
| "format": "raw-shard", |
| "nbytes": 45088768, |
| "records": [ |
| { |
| "name": "model.layers.7.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 45088768, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "c4ac9ca8d824583a811b1142e5b3a4d9" |
| }, |
| { |
| "dataPath": "params_shard_63.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "model.layers.7.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "7546468d71840060cbca9b3f80945e61" |
| }, |
| { |
| "dataPath": "params_shard_64.bin", |
| "format": "raw-shard", |
| "nbytes": 32661504, |
| "records": [ |
| { |
| "name": "model.layers.6.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 344 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2818048, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.6.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 5636096, |
| "byteOffset": 2818048 |
| }, |
| { |
| "name": "model.layers.6.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 8454144 |
| }, |
| { |
| "name": "model.layers.6.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 8462336 |
| }, |
| { |
| "name": "model.layers.6.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 11608064 |
| }, |
| { |
| "name": "model.layers.6.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 19996672 |
| }, |
| { |
| "name": "model.layers.7.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 21045248 |
| }, |
| { |
| "name": "model.layers.7.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 344 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2818048, |
| "byteOffset": 21053440 |
| }, |
| { |
| "name": "model.layers.7.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 5636096, |
| "byteOffset": 23871488 |
| }, |
| { |
| "name": "model.layers.7.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 29507584 |
| }, |
| { |
| "name": "model.layers.7.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 29515776 |
| } |
| ], |
| "md5sum": "f589c06eacfff997ccbfbbbb53c72ecb" |
| }, |
| { |
| "dataPath": "params_shard_65.bin", |
| "format": "raw-shard", |
| "nbytes": 31989760, |
| "records": [ |
| { |
| "name": "model.layers.7.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.7.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 8388608 |
| }, |
| { |
| "name": "model.layers.8.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "model.layers.8.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1376 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 22544384, |
| "byteOffset": 9445376 |
| } |
| ], |
| "md5sum": "180a26e3aecbee023d2859ef1c60a8de" |
| }, |
| { |
| "dataPath": "params_shard_66.bin", |
| "format": "raw-shard", |
| "nbytes": 45088768, |
| "records": [ |
| { |
| "name": "model.layers.8.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 45088768, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "dd52db494c1e8e5b8c39fb15b5a21ac5" |
| }, |
| { |
| "dataPath": "params_shard_67.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "model.layers.8.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "7b0e81b062e2adaaf278724a1b215ebf" |
| }, |
| { |
| "dataPath": "params_shard_68.bin", |
| "format": "raw-shard", |
| "nbytes": 22544384, |
| "records": [ |
| { |
| "name": "model.layers.9.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1376 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 22544384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "48e5126e6b09caabe441021b58323a7e" |
| }, |
| { |
| "dataPath": "params_shard_69.bin", |
| "format": "raw-shard", |
| "nbytes": 45088768, |
| "records": [ |
| { |
| "name": "model.layers.9.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 45088768, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "503697be28d835545739b59807afc5bc" |
| }, |
| { |
| "dataPath": "params_shard_70.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "model.layers.9.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "9dab6833897e606bdd9e76b3fb08e57d" |
| }, |
| { |
| "dataPath": "params_shard_71.bin", |
| "format": "raw-shard", |
| "nbytes": 32661504, |
| "records": [ |
| { |
| "name": "model.layers.8.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 344 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2818048, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.8.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 5636096, |
| "byteOffset": 2818048 |
| }, |
| { |
| "name": "model.layers.8.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 8454144 |
| }, |
| { |
| "name": "model.layers.8.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 8462336 |
| }, |
| { |
| "name": "model.layers.8.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 11608064 |
| }, |
| { |
| "name": "model.layers.8.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 19996672 |
| }, |
| { |
| "name": "model.layers.9.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 21045248 |
| }, |
| { |
| "name": "model.layers.9.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 344 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2818048, |
| "byteOffset": 21053440 |
| }, |
| { |
| "name": "model.layers.9.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 5636096, |
| "byteOffset": 23871488 |
| }, |
| { |
| "name": "model.layers.9.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 29507584 |
| }, |
| { |
| "name": "model.layers.9.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 29515776 |
| } |
| ], |
| "md5sum": "47889f68be08133e72830433fb9a3694" |
| }, |
| { |
| "dataPath": "params_shard_72.bin", |
| "format": "raw-shard", |
| "nbytes": 31989760, |
| "records": [ |
| { |
| "name": "model.layers.9.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.9.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 8388608 |
| }, |
| { |
| "name": "model.layers.11.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "model.layers.11.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1376 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 22544384, |
| "byteOffset": 9445376 |
| } |
| ], |
| "md5sum": "ef59be6703b90c86384bd51581cc10ca" |
| }, |
| { |
| "dataPath": "params_shard_73.bin", |
| "format": "raw-shard", |
| "nbytes": 45088768, |
| "records": [ |
| { |
| "name": "model.layers.12.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 45088768, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "9290491153ebb3b82db62ec9585fc64e" |
| }, |
| { |
| "dataPath": "params_shard_74.bin", |
| "format": "raw-shard", |
| "nbytes": 28196864, |
| "records": [ |
| { |
| "name": "model.layers.11.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 344 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2818048, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.11.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 2818048 |
| }, |
| { |
| "name": "model.layers.12.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 2826240 |
| }, |
| { |
| "name": "model.layers.12.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1376 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 22544384, |
| "byteOffset": 2834432 |
| }, |
| { |
| "name": "model.layers.12.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 344 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2818048, |
| "byteOffset": 25378816 |
| } |
| ], |
| "md5sum": "91472b27f3809c6feb3dc8b4a90664b9" |
| }, |
| { |
| "dataPath": "params_shard_75.bin", |
| "format": "raw-shard", |
| "nbytes": 30810112, |
| "records": [ |
| { |
| "name": "model.layers.12.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 5636096, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.12.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 5636096 |
| }, |
| { |
| "name": "model.layers.12.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 5644288 |
| } |
| ], |
| "md5sum": "15c060738bb3eaa68851a5615e36a55c" |
| }, |
| { |
| "dataPath": "params_shard_76.bin", |
| "format": "raw-shard", |
| "nbytes": 22544384, |
| "records": [ |
| { |
| "name": "model.layers.13.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1376 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 22544384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b1d08aaf2e32dc1850211c1cf1edd35c" |
| }, |
| { |
| "dataPath": "params_shard_77.bin", |
| "format": "raw-shard", |
| "nbytes": 45088768, |
| "records": [ |
| { |
| "name": "model.layers.13.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 45088768, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a2defa5e8d7bd70e197deb42ddd0653e" |
| }, |
| { |
| "dataPath": "params_shard_78.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "model.layers.13.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "24d78516eaca36081a6da477a0fd4b94" |
| }, |
| { |
| "dataPath": "params_shard_79.bin", |
| "format": "raw-shard", |
| "nbytes": 32587776, |
| "records": [ |
| { |
| "name": "model.layers.12.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.12.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 3145728 |
| }, |
| { |
| "name": "model.layers.12.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 11534336 |
| }, |
| { |
| "name": "model.layers.13.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 12582912 |
| }, |
| { |
| "name": "model.layers.13.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 344 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2818048, |
| "byteOffset": 12591104 |
| }, |
| { |
| "name": "model.layers.13.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 5636096, |
| "byteOffset": 15409152 |
| }, |
| { |
| "name": "model.layers.13.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 21045248 |
| }, |
| { |
| "name": "model.layers.13.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 21053440 |
| }, |
| { |
| "name": "model.layers.13.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 24199168 |
| } |
| ], |
| "md5sum": "6f2ad755a550a0710a898621b3d727b8" |
| }, |
| { |
| "dataPath": "params_shard_80.bin", |
| "format": "raw-shard", |
| "nbytes": 45088768, |
| "records": [ |
| { |
| "name": "model.layers.14.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 45088768, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b66ade8cb0a4e44fcc563ef0e2e99d56" |
| }, |
| { |
| "dataPath": "params_shard_81.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "model.layers.14.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "5b36ef50f129ba546be80fbf567ca1fc" |
| }, |
| { |
| "dataPath": "params_shard_82.bin", |
| "format": "raw-shard", |
| "nbytes": 32063488, |
| "records": [ |
| { |
| "name": "model.layers.13.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.14.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 1048576 |
| }, |
| { |
| "name": "model.layers.14.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1376 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 22544384, |
| "byteOffset": 1056768 |
| }, |
| { |
| "name": "model.layers.14.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 344 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2818048, |
| "byteOffset": 23601152 |
| }, |
| { |
| "name": "model.layers.14.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 5636096, |
| "byteOffset": 26419200 |
| }, |
| { |
| "name": "model.layers.14.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 32055296 |
| } |
| ], |
| "md5sum": "9d8f6e6eb0d9bdf634471ebfb1e0644b" |
| }, |
| { |
| "dataPath": "params_shard_83.bin", |
| "format": "raw-shard", |
| "nbytes": 22544384, |
| "records": [ |
| { |
| "name": "model.layers.15.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1376 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 22544384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "cf1b2fb24fe201540c0382307861eb37" |
| }, |
| { |
| "dataPath": "params_shard_84.bin", |
| "format": "raw-shard", |
| "nbytes": 45088768, |
| "records": [ |
| { |
| "name": "model.layers.15.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 45088768, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "d3c98d6991ea03e26083d191d2195058" |
| }, |
| { |
| "dataPath": "params_shard_85.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "model.layers.15.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "68dbd6def606dd5ecdbbff4be1c216b8" |
| }, |
| { |
| "dataPath": "params_shard_86.bin", |
| "format": "raw-shard", |
| "nbytes": 32587776, |
| "records": [ |
| { |
| "name": "model.layers.14.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.14.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 3145728 |
| }, |
| { |
| "name": "model.layers.14.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 11534336 |
| }, |
| { |
| "name": "model.layers.15.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 12582912 |
| }, |
| { |
| "name": "model.layers.15.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 344 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2818048, |
| "byteOffset": 12591104 |
| }, |
| { |
| "name": "model.layers.15.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 5636096, |
| "byteOffset": 15409152 |
| }, |
| { |
| "name": "model.layers.15.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 21045248 |
| }, |
| { |
| "name": "model.layers.15.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 21053440 |
| }, |
| { |
| "name": "model.layers.15.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 24199168 |
| } |
| ], |
| "md5sum": "369663e203571a17fe97c4b5a3142709" |
| }, |
| { |
| "dataPath": "params_shard_87.bin", |
| "format": "raw-shard", |
| "nbytes": 45088768, |
| "records": [ |
| { |
| "name": "model.layers.16.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 45088768, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "75d588500b4624f36e55b51843fa8050" |
| }, |
| { |
| "dataPath": "params_shard_88.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "model.layers.16.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "7781fb7ea99b2de93626de5213d4838b" |
| }, |
| { |
| "dataPath": "params_shard_89.bin", |
| "format": "raw-shard", |
| "nbytes": 32063488, |
| "records": [ |
| { |
| "name": "model.layers.15.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.16.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 1048576 |
| }, |
| { |
| "name": "model.layers.16.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1376 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 22544384, |
| "byteOffset": 1056768 |
| }, |
| { |
| "name": "model.layers.16.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 344 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2818048, |
| "byteOffset": 23601152 |
| }, |
| { |
| "name": "model.layers.16.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 5636096, |
| "byteOffset": 26419200 |
| }, |
| { |
| "name": "model.layers.16.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 32055296 |
| } |
| ], |
| "md5sum": "fa2ff5773cbee17c3da6103bc1e4a79d" |
| }, |
| { |
| "dataPath": "params_shard_90.bin", |
| "format": "raw-shard", |
| "nbytes": 22544384, |
| "records": [ |
| { |
| "name": "model.layers.17.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1376 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 22544384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "d8c75a3f9b830c0c5ff7ba11216dcd6e" |
| }, |
| { |
| "dataPath": "params_shard_91.bin", |
| "format": "raw-shard", |
| "nbytes": 45088768, |
| "records": [ |
| { |
| "name": "model.layers.17.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 45088768, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "191a5468fa6c9be7841aa9366602add6" |
| }, |
| { |
| "dataPath": "params_shard_92.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "model.layers.17.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "fd46ce5bd54acfc77cb7f360ee4250d9" |
| }, |
| { |
| "dataPath": "params_shard_93.bin", |
| "format": "raw-shard", |
| "nbytes": 32587776, |
| "records": [ |
| { |
| "name": "model.layers.16.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.16.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 3145728 |
| }, |
| { |
| "name": "model.layers.16.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 11534336 |
| }, |
| { |
| "name": "model.layers.17.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 12582912 |
| }, |
| { |
| "name": "model.layers.17.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 344 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2818048, |
| "byteOffset": 12591104 |
| }, |
| { |
| "name": "model.layers.17.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 5636096, |
| "byteOffset": 15409152 |
| }, |
| { |
| "name": "model.layers.17.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 21045248 |
| }, |
| { |
| "name": "model.layers.17.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 21053440 |
| }, |
| { |
| "name": "model.layers.17.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 24199168 |
| } |
| ], |
| "md5sum": "dbf9201f96d13c8613661ee536f3fb18" |
| }, |
| { |
| "dataPath": "params_shard_94.bin", |
| "format": "raw-shard", |
| "nbytes": 45088768, |
| "records": [ |
| { |
| "name": "model.layers.18.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 45088768, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "4237711229f1aed000a573f34f10e4b1" |
| }, |
| { |
| "dataPath": "params_shard_95.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "model.layers.18.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f42c4f93c7562f1e4b781ce4090416f2" |
| }, |
| { |
| "dataPath": "params_shard_96.bin", |
| "format": "raw-shard", |
| "nbytes": 32063488, |
| "records": [ |
| { |
| "name": "model.layers.17.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.18.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 1048576 |
| }, |
| { |
| "name": "model.layers.18.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1376 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 22544384, |
| "byteOffset": 1056768 |
| }, |
| { |
| "name": "model.layers.18.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 344 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2818048, |
| "byteOffset": 23601152 |
| }, |
| { |
| "name": "model.layers.18.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 5636096, |
| "byteOffset": 26419200 |
| }, |
| { |
| "name": "model.layers.18.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 32055296 |
| } |
| ], |
| "md5sum": "af00ffd1c300c9ad545b60c64b9499e1" |
| }, |
| { |
| "dataPath": "params_shard_97.bin", |
| "format": "raw-shard", |
| "nbytes": 22544384, |
| "records": [ |
| { |
| "name": "model.layers.19.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1376 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 22544384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "fe00aadffbcc54611daceab3d38fbf81" |
| }, |
| { |
| "dataPath": "params_shard_98.bin", |
| "format": "raw-shard", |
| "nbytes": 45088768, |
| "records": [ |
| { |
| "name": "model.layers.19.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 45088768, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "c0026cd4684d26a3a596fafde253e1e5" |
| }, |
| { |
| "dataPath": "params_shard_99.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "model.layers.19.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "765cf3ffafdc95de5a9932c392101100" |
| }, |
| { |
| "dataPath": "params_shard_100.bin", |
| "format": "raw-shard", |
| "nbytes": 32587776, |
| "records": [ |
| { |
| "name": "model.layers.18.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.18.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 3145728 |
| }, |
| { |
| "name": "model.layers.18.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 11534336 |
| }, |
| { |
| "name": "model.layers.19.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 12582912 |
| }, |
| { |
| "name": "model.layers.19.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 344 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2818048, |
| "byteOffset": 12591104 |
| }, |
| { |
| "name": "model.layers.19.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 5636096, |
| "byteOffset": 15409152 |
| }, |
| { |
| "name": "model.layers.19.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 21045248 |
| }, |
| { |
| "name": "model.layers.19.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 21053440 |
| }, |
| { |
| "name": "model.layers.19.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 24199168 |
| } |
| ], |
| "md5sum": "ff708222e994ebfce5708307b3a81330" |
| }, |
| { |
| "dataPath": "params_shard_101.bin", |
| "format": "raw-shard", |
| "nbytes": 45088768, |
| "records": [ |
| { |
| "name": "model.layers.20.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 45088768, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "32499d421068eb684d449d9964f1f1b7" |
| }, |
| { |
| "dataPath": "params_shard_102.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "model.layers.20.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "d6e228aba82ba0550991d12ea06c73f1" |
| }, |
| { |
| "dataPath": "params_shard_103.bin", |
| "format": "raw-shard", |
| "nbytes": 32063488, |
| "records": [ |
| { |
| "name": "model.layers.19.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.20.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 1048576 |
| }, |
| { |
| "name": "model.layers.20.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1376 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 22544384, |
| "byteOffset": 1056768 |
| }, |
| { |
| "name": "model.layers.20.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 344 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2818048, |
| "byteOffset": 23601152 |
| }, |
| { |
| "name": "model.layers.20.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 5636096, |
| "byteOffset": 26419200 |
| }, |
| { |
| "name": "model.layers.20.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 32055296 |
| } |
| ], |
| "md5sum": "701d2834932da687d852bc4b46d24be2" |
| }, |
| { |
| "dataPath": "params_shard_104.bin", |
| "format": "raw-shard", |
| "nbytes": 22544384, |
| "records": [ |
| { |
| "name": "model.layers.21.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1376 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 22544384, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "d2d8732e895ae195defe0452ce2fca7f" |
| }, |
| { |
| "dataPath": "params_shard_105.bin", |
| "format": "raw-shard", |
| "nbytes": 45088768, |
| "records": [ |
| { |
| "name": "model.layers.21.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 45088768, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "74729a932c45a392582cb0eb5b5835c3" |
| }, |
| { |
| "dataPath": "params_shard_106.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "model.layers.21.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a12e93aa4c320e267ae8f35e862e353c" |
| }, |
| { |
| "dataPath": "params_shard_107.bin", |
| "format": "raw-shard", |
| "nbytes": 32587776, |
| "records": [ |
| { |
| "name": "model.layers.20.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.20.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 3145728 |
| }, |
| { |
| "name": "model.layers.20.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 11534336 |
| }, |
| { |
| "name": "model.layers.21.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 12582912 |
| }, |
| { |
| "name": "model.layers.21.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 344 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2818048, |
| "byteOffset": 12591104 |
| }, |
| { |
| "name": "model.layers.21.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 5636096, |
| "byteOffset": 15409152 |
| }, |
| { |
| "name": "model.layers.21.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 21045248 |
| }, |
| { |
| "name": "model.layers.21.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 21053440 |
| }, |
| { |
| "name": "model.layers.21.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 24199168 |
| } |
| ], |
| "md5sum": "8f83f02ce8a0e6dec6000af034d60446" |
| }, |
| { |
| "dataPath": "params_shard_108.bin", |
| "format": "raw-shard", |
| "nbytes": 45088768, |
| "records": [ |
| { |
| "name": "model.layers.22.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 45088768, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "2a55782fccb8ced40079f23c0a41d8f5" |
| }, |
| { |
| "dataPath": "params_shard_109.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "model.layers.22.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "07549e960a01e34073fc9b6b201d68b5" |
| }, |
| { |
| "dataPath": "params_shard_110.bin", |
| "format": "raw-shard", |
| "nbytes": 32063488, |
| "records": [ |
| { |
| "name": "model.layers.21.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.22.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 1048576 |
| }, |
| { |
| "name": "model.layers.22.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1376 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 22544384, |
| "byteOffset": 1056768 |
| }, |
| { |
| "name": "model.layers.22.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 344 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2818048, |
| "byteOffset": 23601152 |
| }, |
| { |
| "name": "model.layers.22.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 5636096, |
| "byteOffset": 26419200 |
| }, |
| { |
| "name": "model.layers.22.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 32055296 |
| } |
| ], |
| "md5sum": "933fa09367fcf753695a040ed77b79b4" |
| }, |
| { |
| "dataPath": "params_shard_111.bin", |
| "format": "raw-shard", |
| "nbytes": 45088768, |
| "records": [ |
| { |
| "name": "model.layers.23.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 45088768, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "707f597e1f52aef3c9ba4530fe12103a" |
| }, |
| { |
| "dataPath": "params_shard_112.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "model.layers.23.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "90a00953625dc74930c54be3ba3af7df" |
| }, |
| { |
| "dataPath": "params_shard_113.bin", |
| "format": "raw-shard", |
| "nbytes": 30801920, |
| "records": [ |
| { |
| "name": "model.layers.22.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.22.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 3145728 |
| }, |
| { |
| "name": "model.layers.22.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 11534336 |
| }, |
| { |
| "name": "model.layers.23.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 5636096, |
| "byteOffset": 12582912 |
| }, |
| { |
| "name": "model.layers.23.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 18219008 |
| }, |
| { |
| "name": "model.layers.23.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 512 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 21364736 |
| }, |
| { |
| "name": "model.layers.23.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 128 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 29753344 |
| } |
| ], |
| "md5sum": "f012f4370457724fa7788b95fc39a5b1" |
| } |
| ] |
| } |