| { |
| "metadata": { |
| "ParamSize": 405, |
| "ParamBytes": 7322112000.0, |
| "BitsPerParam": 4.500366415925148 |
| }, |
| "records": [ |
| { |
| "dataPath": "params_shard_0.bin", |
| "format": "raw-shard", |
| "nbytes": 81960960, |
| "records": [ |
| { |
| "name": "lm_head.q_weight", |
| "shape": [ |
| 32016, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 81960960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "1ee5584d6bc8fb156ff2fddb0fa80888" |
| }, |
| { |
| "dataPath": "params_shard_1.bin", |
| "format": "raw-shard", |
| "nbytes": 35389440, |
| "records": [ |
| { |
| "name": "model.layers.30.mlp.down_proj.q_weight", |
| "shape": [ |
| 5120, |
| 1728 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 35389440, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "872920e1368ef0fe5513228bf0c5a884" |
| }, |
| { |
| "dataPath": "params_shard_2.bin", |
| "format": "raw-shard", |
| "nbytes": 35389440, |
| "records": [ |
| { |
| "name": "model.layers.31.mlp.down_proj.q_weight", |
| "shape": [ |
| 5120, |
| 1728 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 35389440, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "50c07c16e0ce77f89d1d34c99aa30f47" |
| }, |
| { |
| "dataPath": "params_shard_3.bin", |
| "format": "raw-shard", |
| "nbytes": 70778880, |
| "records": [ |
| { |
| "name": "model.layers.31.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 27648, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 70778880, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "176753fc0fd8a7f30d7d6a655d76699a" |
| }, |
| { |
| "dataPath": "params_shard_4.bin", |
| "format": "raw-shard", |
| "nbytes": 39321600, |
| "records": [ |
| { |
| "name": "model.layers.31.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 15360, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 39321600, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "0ab2736ca5c83a4a424cda64a9021af8" |
| }, |
| { |
| "dataPath": "params_shard_5.bin", |
| "format": "raw-shard", |
| "nbytes": 32896000, |
| "records": [ |
| { |
| "name": "lm_head.q_scale", |
| "shape": [ |
| 32016, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10245120, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.30.input_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 10245120 |
| }, |
| { |
| "name": "model.layers.30.mlp.down_proj.q_scale", |
| "shape": [ |
| 5120, |
| 432 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4423680, |
| "byteOffset": 10255360 |
| }, |
| { |
| "name": "model.layers.30.post_attention_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 14679040 |
| }, |
| { |
| "name": "model.layers.31.input_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 14689280 |
| }, |
| { |
| "name": "model.layers.31.mlp.down_proj.q_scale", |
| "shape": [ |
| 5120, |
| 432 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4423680, |
| "byteOffset": 14699520 |
| }, |
| { |
| "name": "model.layers.31.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 27648, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8847360, |
| "byteOffset": 19123200 |
| }, |
| { |
| "name": "model.layers.31.post_attention_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 27970560 |
| }, |
| { |
| "name": "model.layers.31.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 15360, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4915200, |
| "byteOffset": 27980800 |
| } |
| ], |
| "md5sum": "313ed96fa0680cef4277431c5c0baffa" |
| }, |
| { |
| "dataPath": "params_shard_6.bin", |
| "format": "raw-shard", |
| "nbytes": 35389440, |
| "records": [ |
| { |
| "name": "model.layers.32.mlp.down_proj.q_weight", |
| "shape": [ |
| 5120, |
| 1728 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 35389440, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "7b2d3276e074b499750cf5dc5d612723" |
| }, |
| { |
| "dataPath": "params_shard_7.bin", |
| "format": "raw-shard", |
| "nbytes": 70778880, |
| "records": [ |
| { |
| "name": "model.layers.32.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 27648, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 70778880, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "9122ebff4e374949685e3b9bd0bc47f3" |
| }, |
| { |
| "dataPath": "params_shard_8.bin", |
| "format": "raw-shard", |
| "nbytes": 39321600, |
| "records": [ |
| { |
| "name": "model.layers.32.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 15360, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 39321600, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "237cbb17dc22d0521702e2b061fe9032" |
| }, |
| { |
| "dataPath": "params_shard_9.bin", |
| "format": "raw-shard", |
| "nbytes": 32952320, |
| "records": [ |
| { |
| "name": "model.layers.31.self_attn.o_proj.q_weight", |
| "shape": [ |
| 5120, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 13107200, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.31.self_attn.o_proj.q_scale", |
| "shape": [ |
| 5120, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1638400, |
| "byteOffset": 13107200 |
| }, |
| { |
| "name": "model.layers.32.input_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 14745600 |
| }, |
| { |
| "name": "model.layers.32.mlp.down_proj.q_scale", |
| "shape": [ |
| 5120, |
| 432 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4423680, |
| "byteOffset": 14755840 |
| }, |
| { |
| "name": "model.layers.32.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 27648, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8847360, |
| "byteOffset": 19179520 |
| }, |
| { |
| "name": "model.layers.32.post_attention_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 28026880 |
| }, |
| { |
| "name": "model.layers.32.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 15360, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4915200, |
| "byteOffset": 28037120 |
| } |
| ], |
| "md5sum": "b04854d7836eb6e1fefe45d94f280212" |
| }, |
| { |
| "dataPath": "params_shard_10.bin", |
| "format": "raw-shard", |
| "nbytes": 35389440, |
| "records": [ |
| { |
| "name": "model.layers.33.mlp.down_proj.q_weight", |
| "shape": [ |
| 5120, |
| 1728 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 35389440, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "316407403954a508a12f48d78769d7f4" |
| }, |
| { |
| "dataPath": "params_shard_11.bin", |
| "format": "raw-shard", |
| "nbytes": 70778880, |
| "records": [ |
| { |
| "name": "model.layers.33.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 27648, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 70778880, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "8bd5bbdbc1cb0f39336612d36ac2a6f7" |
| }, |
| { |
| "dataPath": "params_shard_12.bin", |
| "format": "raw-shard", |
| "nbytes": 39321600, |
| "records": [ |
| { |
| "name": "model.layers.33.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 15360, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 39321600, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "9bdefc7ac0895cb98a2bc95a1a3063ec" |
| }, |
| { |
| "dataPath": "params_shard_13.bin", |
| "format": "raw-shard", |
| "nbytes": 32952320, |
| "records": [ |
| { |
| "name": "model.layers.32.self_attn.o_proj.q_weight", |
| "shape": [ |
| 5120, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 13107200, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.32.self_attn.o_proj.q_scale", |
| "shape": [ |
| 5120, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1638400, |
| "byteOffset": 13107200 |
| }, |
| { |
| "name": "model.layers.33.input_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 14745600 |
| }, |
| { |
| "name": "model.layers.33.mlp.down_proj.q_scale", |
| "shape": [ |
| 5120, |
| 432 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4423680, |
| "byteOffset": 14755840 |
| }, |
| { |
| "name": "model.layers.33.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 27648, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8847360, |
| "byteOffset": 19179520 |
| }, |
| { |
| "name": "model.layers.33.post_attention_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 28026880 |
| }, |
| { |
| "name": "model.layers.33.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 15360, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4915200, |
| "byteOffset": 28037120 |
| } |
| ], |
| "md5sum": "5c2583e487bf119fe89a428f006f4643" |
| }, |
| { |
| "dataPath": "params_shard_14.bin", |
| "format": "raw-shard", |
| "nbytes": 35389440, |
| "records": [ |
| { |
| "name": "model.layers.34.mlp.down_proj.q_weight", |
| "shape": [ |
| 5120, |
| 1728 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 35389440, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "2bd542abeb0a7c4d1389fd58a426f5a5" |
| }, |
| { |
| "dataPath": "params_shard_15.bin", |
| "format": "raw-shard", |
| "nbytes": 70778880, |
| "records": [ |
| { |
| "name": "model.layers.34.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 27648, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 70778880, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "4a4a15ba51f71c1ccb0527da9574f18f" |
| }, |
| { |
| "dataPath": "params_shard_16.bin", |
| "format": "raw-shard", |
| "nbytes": 39321600, |
| "records": [ |
| { |
| "name": "model.layers.34.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 15360, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 39321600, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "63a806ca5b4b39fccdab2b865f67acd6" |
| }, |
| { |
| "dataPath": "params_shard_17.bin", |
| "format": "raw-shard", |
| "nbytes": 32952320, |
| "records": [ |
| { |
| "name": "model.layers.33.self_attn.o_proj.q_weight", |
| "shape": [ |
| 5120, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 13107200, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.33.self_attn.o_proj.q_scale", |
| "shape": [ |
| 5120, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1638400, |
| "byteOffset": 13107200 |
| }, |
| { |
| "name": "model.layers.34.input_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 14745600 |
| }, |
| { |
| "name": "model.layers.34.mlp.down_proj.q_scale", |
| "shape": [ |
| 5120, |
| 432 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4423680, |
| "byteOffset": 14755840 |
| }, |
| { |
| "name": "model.layers.34.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 27648, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8847360, |
| "byteOffset": 19179520 |
| }, |
| { |
| "name": "model.layers.34.post_attention_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 28026880 |
| }, |
| { |
| "name": "model.layers.34.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 15360, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4915200, |
| "byteOffset": 28037120 |
| } |
| ], |
| "md5sum": "0457e85a25f72e9bb19fd9da7483492a" |
| }, |
| { |
| "dataPath": "params_shard_18.bin", |
| "format": "raw-shard", |
| "nbytes": 35389440, |
| "records": [ |
| { |
| "name": "model.layers.35.mlp.down_proj.q_weight", |
| "shape": [ |
| 5120, |
| 1728 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 35389440, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "73904f45a8a3780001487c0312fb4fbe" |
| }, |
| { |
| "dataPath": "params_shard_19.bin", |
| "format": "raw-shard", |
| "nbytes": 70778880, |
| "records": [ |
| { |
| "name": "model.layers.35.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 27648, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 70778880, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "3106c7a759efc5f8c8685e81f025caf7" |
| }, |
| { |
| "dataPath": "params_shard_20.bin", |
| "format": "raw-shard", |
| "nbytes": 39321600, |
| "records": [ |
| { |
| "name": "model.layers.35.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 15360, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 39321600, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f7b87441fc42112552f6ce3ff1c31bdb" |
| }, |
| { |
| "dataPath": "params_shard_21.bin", |
| "format": "raw-shard", |
| "nbytes": 32952320, |
| "records": [ |
| { |
| "name": "model.layers.34.self_attn.o_proj.q_weight", |
| "shape": [ |
| 5120, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 13107200, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.34.self_attn.o_proj.q_scale", |
| "shape": [ |
| 5120, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1638400, |
| "byteOffset": 13107200 |
| }, |
| { |
| "name": "model.layers.35.input_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 14745600 |
| }, |
| { |
| "name": "model.layers.35.mlp.down_proj.q_scale", |
| "shape": [ |
| 5120, |
| 432 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4423680, |
| "byteOffset": 14755840 |
| }, |
| { |
| "name": "model.layers.35.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 27648, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8847360, |
| "byteOffset": 19179520 |
| }, |
| { |
| "name": "model.layers.35.post_attention_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 28026880 |
| }, |
| { |
| "name": "model.layers.35.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 15360, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4915200, |
| "byteOffset": 28037120 |
| } |
| ], |
| "md5sum": "8a1789fe42158381b9dca4d10dbe9c1e" |
| }, |
| { |
| "dataPath": "params_shard_22.bin", |
| "format": "raw-shard", |
| "nbytes": 35389440, |
| "records": [ |
| { |
| "name": "model.layers.36.mlp.down_proj.q_weight", |
| "shape": [ |
| 5120, |
| 1728 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 35389440, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "21ee4ef4999f2a43f31bb271900eaf7a" |
| }, |
| { |
| "dataPath": "params_shard_23.bin", |
| "format": "raw-shard", |
| "nbytes": 70778880, |
| "records": [ |
| { |
| "name": "model.layers.36.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 27648, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 70778880, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "8b954a3ad8d4492660fcf04544634db0" |
| }, |
| { |
| "dataPath": "params_shard_24.bin", |
| "format": "raw-shard", |
| "nbytes": 39321600, |
| "records": [ |
| { |
| "name": "model.layers.36.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 15360, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 39321600, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f8ae6544e9c97673f4e57e65dcff2c83" |
| }, |
| { |
| "dataPath": "params_shard_25.bin", |
| "format": "raw-shard", |
| "nbytes": 32952320, |
| "records": [ |
| { |
| "name": "model.layers.35.self_attn.o_proj.q_weight", |
| "shape": [ |
| 5120, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 13107200, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.35.self_attn.o_proj.q_scale", |
| "shape": [ |
| 5120, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1638400, |
| "byteOffset": 13107200 |
| }, |
| { |
| "name": "model.layers.36.input_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 14745600 |
| }, |
| { |
| "name": "model.layers.36.mlp.down_proj.q_scale", |
| "shape": [ |
| 5120, |
| 432 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4423680, |
| "byteOffset": 14755840 |
| }, |
| { |
| "name": "model.layers.36.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 27648, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8847360, |
| "byteOffset": 19179520 |
| }, |
| { |
| "name": "model.layers.36.post_attention_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 28026880 |
| }, |
| { |
| "name": "model.layers.36.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 15360, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4915200, |
| "byteOffset": 28037120 |
| } |
| ], |
| "md5sum": "79cb0e594ced339f9812e8410ddb3bcb" |
| }, |
| { |
| "dataPath": "params_shard_26.bin", |
| "format": "raw-shard", |
| "nbytes": 35389440, |
| "records": [ |
| { |
| "name": "model.layers.37.mlp.down_proj.q_weight", |
| "shape": [ |
| 5120, |
| 1728 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 35389440, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f27b12411768a0a959eaaf5225af440a" |
| }, |
| { |
| "dataPath": "params_shard_27.bin", |
| "format": "raw-shard", |
| "nbytes": 70778880, |
| "records": [ |
| { |
| "name": "model.layers.37.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 27648, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 70778880, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "679a112240eb41a00e35394eb5ca9175" |
| }, |
| { |
| "dataPath": "params_shard_28.bin", |
| "format": "raw-shard", |
| "nbytes": 39321600, |
| "records": [ |
| { |
| "name": "model.layers.37.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 15360, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 39321600, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "1b1e0ff1ffadaf2d93a370461699e8bd" |
| }, |
| { |
| "dataPath": "params_shard_29.bin", |
| "format": "raw-shard", |
| "nbytes": 32952320, |
| "records": [ |
| { |
| "name": "model.layers.36.self_attn.o_proj.q_weight", |
| "shape": [ |
| 5120, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 13107200, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.36.self_attn.o_proj.q_scale", |
| "shape": [ |
| 5120, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1638400, |
| "byteOffset": 13107200 |
| }, |
| { |
| "name": "model.layers.37.input_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 14745600 |
| }, |
| { |
| "name": "model.layers.37.mlp.down_proj.q_scale", |
| "shape": [ |
| 5120, |
| 432 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4423680, |
| "byteOffset": 14755840 |
| }, |
| { |
| "name": "model.layers.37.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 27648, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8847360, |
| "byteOffset": 19179520 |
| }, |
| { |
| "name": "model.layers.37.post_attention_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 28026880 |
| }, |
| { |
| "name": "model.layers.37.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 15360, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4915200, |
| "byteOffset": 28037120 |
| } |
| ], |
| "md5sum": "eafa13feda0b66c0b4380c7c0d75226a" |
| }, |
| { |
| "dataPath": "params_shard_30.bin", |
| "format": "raw-shard", |
| "nbytes": 35389440, |
| "records": [ |
| { |
| "name": "model.layers.38.mlp.down_proj.q_weight", |
| "shape": [ |
| 5120, |
| 1728 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 35389440, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "2ddac2d36f20b7ab2d78b305e8c20ec1" |
| }, |
| { |
| "dataPath": "params_shard_31.bin", |
| "format": "raw-shard", |
| "nbytes": 70778880, |
| "records": [ |
| { |
| "name": "model.layers.38.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 27648, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 70778880, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "cf997ac80b190a6e8bf8bf5751a3825f" |
| }, |
| { |
| "dataPath": "params_shard_32.bin", |
| "format": "raw-shard", |
| "nbytes": 39321600, |
| "records": [ |
| { |
| "name": "model.layers.38.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 15360, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 39321600, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "2c91311789034234dba6b10b07f2c699" |
| }, |
| { |
| "dataPath": "params_shard_33.bin", |
| "format": "raw-shard", |
| "nbytes": 32952320, |
| "records": [ |
| { |
| "name": "model.layers.37.self_attn.o_proj.q_weight", |
| "shape": [ |
| 5120, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 13107200, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.37.self_attn.o_proj.q_scale", |
| "shape": [ |
| 5120, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1638400, |
| "byteOffset": 13107200 |
| }, |
| { |
| "name": "model.layers.38.input_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 14745600 |
| }, |
| { |
| "name": "model.layers.38.mlp.down_proj.q_scale", |
| "shape": [ |
| 5120, |
| 432 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4423680, |
| "byteOffset": 14755840 |
| }, |
| { |
| "name": "model.layers.38.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 27648, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8847360, |
| "byteOffset": 19179520 |
| }, |
| { |
| "name": "model.layers.38.post_attention_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 28026880 |
| }, |
| { |
| "name": "model.layers.38.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 15360, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4915200, |
| "byteOffset": 28037120 |
| } |
| ], |
| "md5sum": "217d6119b081a3d683e59fc609028d3e" |
| }, |
| { |
| "dataPath": "params_shard_34.bin", |
| "format": "raw-shard", |
| "nbytes": 35389440, |
| "records": [ |
| { |
| "name": "model.layers.39.mlp.down_proj.q_weight", |
| "shape": [ |
| 5120, |
| 1728 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 35389440, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "1546070d210f92ecfe96d1e1bf4c882f" |
| }, |
| { |
| "dataPath": "params_shard_35.bin", |
| "format": "raw-shard", |
| "nbytes": 70778880, |
| "records": [ |
| { |
| "name": "model.layers.39.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 27648, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 70778880, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "50eda23e767ec60bdf33be4207da5ba3" |
| }, |
| { |
| "dataPath": "params_shard_36.bin", |
| "format": "raw-shard", |
| "nbytes": 39321600, |
| "records": [ |
| { |
| "name": "model.layers.39.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 15360, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 39321600, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "591d80a0e586dd47f7efb8398ba75d7b" |
| }, |
| { |
| "dataPath": "params_shard_37.bin", |
| "format": "raw-shard", |
| "nbytes": 32952320, |
| "records": [ |
| { |
| "name": "model.layers.38.self_attn.o_proj.q_weight", |
| "shape": [ |
| 5120, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 13107200, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.38.self_attn.o_proj.q_scale", |
| "shape": [ |
| 5120, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1638400, |
| "byteOffset": 13107200 |
| }, |
| { |
| "name": "model.layers.39.input_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 14745600 |
| }, |
| { |
| "name": "model.layers.39.mlp.down_proj.q_scale", |
| "shape": [ |
| 5120, |
| 432 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4423680, |
| "byteOffset": 14755840 |
| }, |
| { |
| "name": "model.layers.39.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 27648, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8847360, |
| "byteOffset": 19179520 |
| }, |
| { |
| "name": "model.layers.39.post_attention_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 28026880 |
| }, |
| { |
| "name": "model.layers.39.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 15360, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4915200, |
| "byteOffset": 28037120 |
| } |
| ], |
| "md5sum": "d4e6e3edabdea0ed8f307b7c2b45e4e5" |
| }, |
| { |
| "dataPath": "params_shard_38.bin", |
| "format": "raw-shard", |
| "nbytes": 81960960, |
| "records": [ |
| { |
| "name": "model.embed_tokens.q_weight", |
| "shape": [ |
| 32016, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 81960960, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "795fa29990b5244e1bc27ecf24bbcf33" |
| }, |
| { |
| "dataPath": "params_shard_39.bin", |
| "format": "raw-shard", |
| "nbytes": 35389440, |
| "records": [ |
| { |
| "name": "model.layers.0.mlp.down_proj.q_weight", |
| "shape": [ |
| 5120, |
| 1728 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 35389440, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ba27bd41c620a9567f9fa876b772ca78" |
| }, |
| { |
| "dataPath": "params_shard_40.bin", |
| "format": "raw-shard", |
| "nbytes": 70778880, |
| "records": [ |
| { |
| "name": "model.layers.0.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 27648, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 70778880, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "15f62f3658369092c31728122022370d" |
| }, |
| { |
| "dataPath": "params_shard_41.bin", |
| "format": "raw-shard", |
| "nbytes": 29434880, |
| "records": [ |
| { |
| "name": "model.layers.39.self_attn.o_proj.q_weight", |
| "shape": [ |
| 5120, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 13107200, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.39.self_attn.o_proj.q_scale", |
| "shape": [ |
| 5120, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1638400, |
| "byteOffset": 13107200 |
| }, |
| { |
| "name": "model.norm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 14745600 |
| }, |
| { |
| "name": "model.embed_tokens.q_scale", |
| "shape": [ |
| 32016, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10245120, |
| "byteOffset": 14755840 |
| }, |
| { |
| "name": "model.layers.0.input_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 25000960 |
| }, |
| { |
| "name": "model.layers.0.mlp.down_proj.q_scale", |
| "shape": [ |
| 5120, |
| 432 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4423680, |
| "byteOffset": 25011200 |
| } |
| ], |
| "md5sum": "d877799325d9c37d1ec162add123333c" |
| }, |
| { |
| "dataPath": "params_shard_42.bin", |
| "format": "raw-shard", |
| "nbytes": 39321600, |
| "records": [ |
| { |
| "name": "model.layers.0.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 15360, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 39321600, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "c5839076a3ea6ddb3586ade97c446ca4" |
| }, |
| { |
| "dataPath": "params_shard_43.bin", |
| "format": "raw-shard", |
| "nbytes": 35389440, |
| "records": [ |
| { |
| "name": "model.layers.1.mlp.down_proj.q_weight", |
| "shape": [ |
| 5120, |
| 1728 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 35389440, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "eb2db0d9c73f82fab4d58c7d64d5ecc0" |
| }, |
| { |
| "dataPath": "params_shard_44.bin", |
| "format": "raw-shard", |
| "nbytes": 70778880, |
| "records": [ |
| { |
| "name": "model.layers.1.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 27648, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 70778880, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f9608ee17b01149bbcd02780cb3967ea" |
| }, |
| { |
| "dataPath": "params_shard_45.bin", |
| "format": "raw-shard", |
| "nbytes": 32952320, |
| "records": [ |
| { |
| "name": "model.layers.0.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 27648, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8847360, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.0.post_attention_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 8847360 |
| }, |
| { |
| "name": "model.layers.0.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 15360, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4915200, |
| "byteOffset": 8857600 |
| }, |
| { |
| "name": "model.layers.0.self_attn.o_proj.q_weight", |
| "shape": [ |
| 5120, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 13107200, |
| "byteOffset": 13772800 |
| }, |
| { |
| "name": "model.layers.0.self_attn.o_proj.q_scale", |
| "shape": [ |
| 5120, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1638400, |
| "byteOffset": 26880000 |
| }, |
| { |
| "name": "model.layers.1.input_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 28518400 |
| }, |
| { |
| "name": "model.layers.1.mlp.down_proj.q_scale", |
| "shape": [ |
| 5120, |
| 432 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4423680, |
| "byteOffset": 28528640 |
| } |
| ], |
| "md5sum": "916fe2a04238995fd46733880f02b8b0" |
| }, |
| { |
| "dataPath": "params_shard_46.bin", |
| "format": "raw-shard", |
| "nbytes": 39321600, |
| "records": [ |
| { |
| "name": "model.layers.1.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 15360, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 39321600, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "6a81d76aed8c4be99188f53063747e1b" |
| }, |
| { |
| "dataPath": "params_shard_47.bin", |
| "format": "raw-shard", |
| "nbytes": 35389440, |
| "records": [ |
| { |
| "name": "model.layers.10.mlp.down_proj.q_weight", |
| "shape": [ |
| 5120, |
| 1728 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 35389440, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "9973e65d76a6162c8b6510f4581a20d0" |
| }, |
| { |
| "dataPath": "params_shard_48.bin", |
| "format": "raw-shard", |
| "nbytes": 70778880, |
| "records": [ |
| { |
| "name": "model.layers.10.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 27648, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 70778880, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f3fac4a6c10b8c6992c628292cf84685" |
| }, |
| { |
| "dataPath": "params_shard_49.bin", |
| "format": "raw-shard", |
| "nbytes": 32952320, |
| "records": [ |
| { |
| "name": "model.layers.1.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 27648, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8847360, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.1.post_attention_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 8847360 |
| }, |
| { |
| "name": "model.layers.1.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 15360, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4915200, |
| "byteOffset": 8857600 |
| }, |
| { |
| "name": "model.layers.1.self_attn.o_proj.q_weight", |
| "shape": [ |
| 5120, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 13107200, |
| "byteOffset": 13772800 |
| }, |
| { |
| "name": "model.layers.1.self_attn.o_proj.q_scale", |
| "shape": [ |
| 5120, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1638400, |
| "byteOffset": 26880000 |
| }, |
| { |
| "name": "model.layers.10.input_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 28518400 |
| }, |
| { |
| "name": "model.layers.10.mlp.down_proj.q_scale", |
| "shape": [ |
| 5120, |
| 432 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4423680, |
| "byteOffset": 28528640 |
| } |
| ], |
| "md5sum": "21cf9ee3153d7e7c21001b4e6b9c4682" |
| }, |
| { |
| "dataPath": "params_shard_50.bin", |
| "format": "raw-shard", |
| "nbytes": 39321600, |
| "records": [ |
| { |
| "name": "model.layers.10.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 15360, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 39321600, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "18be1f5f9c067430103f22f914a4f330" |
| }, |
| { |
| "dataPath": "params_shard_51.bin", |
| "format": "raw-shard", |
| "nbytes": 35389440, |
| "records": [ |
| { |
| "name": "model.layers.11.mlp.down_proj.q_weight", |
| "shape": [ |
| 5120, |
| 1728 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 35389440, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "4c67ff62dfa597f686fe1d3e4b3c1734" |
| }, |
| { |
| "dataPath": "params_shard_52.bin", |
| "format": "raw-shard", |
| "nbytes": 70778880, |
| "records": [ |
| { |
| "name": "model.layers.11.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 27648, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 70778880, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a5710c9cc2f2ce4638390e9acfdde382" |
| }, |
| { |
| "dataPath": "params_shard_53.bin", |
| "format": "raw-shard", |
| "nbytes": 32952320, |
| "records": [ |
| { |
| "name": "model.layers.10.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 27648, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8847360, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.10.post_attention_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 8847360 |
| }, |
| { |
| "name": "model.layers.10.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 15360, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4915200, |
| "byteOffset": 8857600 |
| }, |
| { |
| "name": "model.layers.10.self_attn.o_proj.q_weight", |
| "shape": [ |
| 5120, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 13107200, |
| "byteOffset": 13772800 |
| }, |
| { |
| "name": "model.layers.10.self_attn.o_proj.q_scale", |
| "shape": [ |
| 5120, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1638400, |
| "byteOffset": 26880000 |
| }, |
| { |
| "name": "model.layers.11.input_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 28518400 |
| }, |
| { |
| "name": "model.layers.11.mlp.down_proj.q_scale", |
| "shape": [ |
| 5120, |
| 432 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4423680, |
| "byteOffset": 28528640 |
| } |
| ], |
| "md5sum": "534bfd42cae9e32a5e7258041049351e" |
| }, |
| { |
| "dataPath": "params_shard_54.bin", |
| "format": "raw-shard", |
| "nbytes": 39321600, |
| "records": [ |
| { |
| "name": "model.layers.11.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 15360, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 39321600, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "4a5e0c3d7d729680768241502e30c58d" |
| }, |
| { |
| "dataPath": "params_shard_55.bin", |
| "format": "raw-shard", |
| "nbytes": 35389440, |
| "records": [ |
| { |
| "name": "model.layers.12.mlp.down_proj.q_weight", |
| "shape": [ |
| 5120, |
| 1728 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 35389440, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a528e1916109ff3875ce57bfb00c1810" |
| }, |
| { |
| "dataPath": "params_shard_56.bin", |
| "format": "raw-shard", |
| "nbytes": 70778880, |
| "records": [ |
| { |
| "name": "model.layers.12.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 27648, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 70778880, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f8aa347d3faa95f84414de2e20261271" |
| }, |
| { |
| "dataPath": "params_shard_57.bin", |
| "format": "raw-shard", |
| "nbytes": 32952320, |
| "records": [ |
| { |
| "name": "model.layers.11.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 27648, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8847360, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.11.post_attention_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 8847360 |
| }, |
| { |
| "name": "model.layers.11.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 15360, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4915200, |
| "byteOffset": 8857600 |
| }, |
| { |
| "name": "model.layers.11.self_attn.o_proj.q_weight", |
| "shape": [ |
| 5120, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 13107200, |
| "byteOffset": 13772800 |
| }, |
| { |
| "name": "model.layers.11.self_attn.o_proj.q_scale", |
| "shape": [ |
| 5120, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1638400, |
| "byteOffset": 26880000 |
| }, |
| { |
| "name": "model.layers.12.input_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 28518400 |
| }, |
| { |
| "name": "model.layers.12.mlp.down_proj.q_scale", |
| "shape": [ |
| 5120, |
| 432 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4423680, |
| "byteOffset": 28528640 |
| } |
| ], |
| "md5sum": "cecbf3748b28799f6b0caaa6b1a46dd4" |
| }, |
| { |
| "dataPath": "params_shard_58.bin", |
| "format": "raw-shard", |
| "nbytes": 39321600, |
| "records": [ |
| { |
| "name": "model.layers.12.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 15360, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 39321600, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "c780f3f6b60588e1697aceecbf299d74" |
| }, |
| { |
| "dataPath": "params_shard_59.bin", |
| "format": "raw-shard", |
| "nbytes": 35389440, |
| "records": [ |
| { |
| "name": "model.layers.13.mlp.down_proj.q_weight", |
| "shape": [ |
| 5120, |
| 1728 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 35389440, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "9003ebc48086557fe0af00e4378f0dd1" |
| }, |
| { |
| "dataPath": "params_shard_60.bin", |
| "format": "raw-shard", |
| "nbytes": 70778880, |
| "records": [ |
| { |
| "name": "model.layers.13.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 27648, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 70778880, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "c3a6cf527e4f0ed89f0e76b60b281876" |
| }, |
| { |
| "dataPath": "params_shard_61.bin", |
| "format": "raw-shard", |
| "nbytes": 32952320, |
| "records": [ |
| { |
| "name": "model.layers.12.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 27648, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8847360, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.12.post_attention_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 8847360 |
| }, |
| { |
| "name": "model.layers.12.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 15360, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4915200, |
| "byteOffset": 8857600 |
| }, |
| { |
| "name": "model.layers.12.self_attn.o_proj.q_weight", |
| "shape": [ |
| 5120, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 13107200, |
| "byteOffset": 13772800 |
| }, |
| { |
| "name": "model.layers.12.self_attn.o_proj.q_scale", |
| "shape": [ |
| 5120, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1638400, |
| "byteOffset": 26880000 |
| }, |
| { |
| "name": "model.layers.13.input_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 28518400 |
| }, |
| { |
| "name": "model.layers.13.mlp.down_proj.q_scale", |
| "shape": [ |
| 5120, |
| 432 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4423680, |
| "byteOffset": 28528640 |
| } |
| ], |
| "md5sum": "e674e8115268b2c59bc529c3ca801032" |
| }, |
| { |
| "dataPath": "params_shard_62.bin", |
| "format": "raw-shard", |
| "nbytes": 39321600, |
| "records": [ |
| { |
| "name": "model.layers.13.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 15360, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 39321600, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a1cf96068bf1438e828a0e3eeb39c3a3" |
| }, |
| { |
| "dataPath": "params_shard_63.bin", |
| "format": "raw-shard", |
| "nbytes": 35389440, |
| "records": [ |
| { |
| "name": "model.layers.14.mlp.down_proj.q_weight", |
| "shape": [ |
| 5120, |
| 1728 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 35389440, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "825860bc11b986f3277ea7b8547603ab" |
| }, |
| { |
| "dataPath": "params_shard_64.bin", |
| "format": "raw-shard", |
| "nbytes": 70778880, |
| "records": [ |
| { |
| "name": "model.layers.14.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 27648, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 70778880, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "26498bc5ef758991d484d0d27a0e03d4" |
| }, |
| { |
| "dataPath": "params_shard_65.bin", |
| "format": "raw-shard", |
| "nbytes": 32952320, |
| "records": [ |
| { |
| "name": "model.layers.13.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 27648, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8847360, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.13.post_attention_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 8847360 |
| }, |
| { |
| "name": "model.layers.13.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 15360, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4915200, |
| "byteOffset": 8857600 |
| }, |
| { |
| "name": "model.layers.13.self_attn.o_proj.q_weight", |
| "shape": [ |
| 5120, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 13107200, |
| "byteOffset": 13772800 |
| }, |
| { |
| "name": "model.layers.13.self_attn.o_proj.q_scale", |
| "shape": [ |
| 5120, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1638400, |
| "byteOffset": 26880000 |
| }, |
| { |
| "name": "model.layers.14.input_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 28518400 |
| }, |
| { |
| "name": "model.layers.14.mlp.down_proj.q_scale", |
| "shape": [ |
| 5120, |
| 432 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4423680, |
| "byteOffset": 28528640 |
| } |
| ], |
| "md5sum": "b4150084627c28f9a1f18af9410bb25a" |
| }, |
| { |
| "dataPath": "params_shard_66.bin", |
| "format": "raw-shard", |
| "nbytes": 39321600, |
| "records": [ |
| { |
| "name": "model.layers.14.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 15360, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 39321600, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "19bf9c1b139d2f6bed6d80f413a36b70" |
| }, |
| { |
| "dataPath": "params_shard_67.bin", |
| "format": "raw-shard", |
| "nbytes": 39321600, |
| "records": [ |
| { |
| "name": "model.layers.15.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 15360, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 39321600, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a93aa2744747f8388ad8a6a217a9eaab" |
| }, |
| { |
| "dataPath": "params_shard_68.bin", |
| "format": "raw-shard", |
| "nbytes": 35389440, |
| "records": [ |
| { |
| "name": "model.layers.2.mlp.down_proj.q_weight", |
| "shape": [ |
| 5120, |
| 1728 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 35389440, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "da113ad156f2e22c603d91173f7bb4b9" |
| }, |
| { |
| "dataPath": "params_shard_69.bin", |
| "format": "raw-shard", |
| "nbytes": 33443840, |
| "records": [ |
| { |
| "name": "model.layers.14.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 27648, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8847360, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.14.post_attention_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 8847360 |
| }, |
| { |
| "name": "model.layers.14.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 15360, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4915200, |
| "byteOffset": 8857600 |
| }, |
| { |
| "name": "model.layers.14.self_attn.o_proj.q_weight", |
| "shape": [ |
| 5120, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 13107200, |
| "byteOffset": 13772800 |
| }, |
| { |
| "name": "model.layers.14.self_attn.o_proj.q_scale", |
| "shape": [ |
| 5120, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1638400, |
| "byteOffset": 26880000 |
| }, |
| { |
| "name": "model.layers.15.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 15360, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4915200, |
| "byteOffset": 28518400 |
| }, |
| { |
| "name": "model.layers.2.input_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 33433600 |
| } |
| ], |
| "md5sum": "3b163eb911d6a1dbef1efb171d81caa6" |
| }, |
| { |
| "dataPath": "params_shard_70.bin", |
| "format": "raw-shard", |
| "nbytes": 70778880, |
| "records": [ |
| { |
| "name": "model.layers.2.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 27648, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 70778880, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "e48fadcfd07d6954e02aecff5979de06" |
| }, |
| { |
| "dataPath": "params_shard_71.bin", |
| "format": "raw-shard", |
| "nbytes": 39321600, |
| "records": [ |
| { |
| "name": "model.layers.2.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 15360, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 39321600, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "7656cd9147f96cfb7522699e910c664f" |
| }, |
| { |
| "dataPath": "params_shard_72.bin", |
| "format": "raw-shard", |
| "nbytes": 35389440, |
| "records": [ |
| { |
| "name": "model.layers.3.mlp.down_proj.q_weight", |
| "shape": [ |
| 5120, |
| 1728 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 35389440, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "47385a1254de5cc22ae07887ae0def05" |
| }, |
| { |
| "dataPath": "params_shard_73.bin", |
| "format": "raw-shard", |
| "nbytes": 32952320, |
| "records": [ |
| { |
| "name": "model.layers.2.mlp.down_proj.q_scale", |
| "shape": [ |
| 5120, |
| 432 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4423680, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.2.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 27648, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8847360, |
| "byteOffset": 4423680 |
| }, |
| { |
| "name": "model.layers.2.post_attention_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 13271040 |
| }, |
| { |
| "name": "model.layers.2.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 15360, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4915200, |
| "byteOffset": 13281280 |
| }, |
| { |
| "name": "model.layers.2.self_attn.o_proj.q_weight", |
| "shape": [ |
| 5120, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 13107200, |
| "byteOffset": 18196480 |
| }, |
| { |
| "name": "model.layers.2.self_attn.o_proj.q_scale", |
| "shape": [ |
| 5120, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1638400, |
| "byteOffset": 31303680 |
| }, |
| { |
| "name": "model.layers.3.input_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 32942080 |
| } |
| ], |
| "md5sum": "71fed8b3560cdff61a3d98e703c2bb10" |
| }, |
| { |
| "dataPath": "params_shard_74.bin", |
| "format": "raw-shard", |
| "nbytes": 70778880, |
| "records": [ |
| { |
| "name": "model.layers.3.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 27648, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 70778880, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "80af7dcbffb1a201450773ff7e65c785" |
| }, |
| { |
| "dataPath": "params_shard_75.bin", |
| "format": "raw-shard", |
| "nbytes": 39321600, |
| "records": [ |
| { |
| "name": "model.layers.3.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 15360, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 39321600, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "cad4507b0270d1e1ecc30c8bdf1fa29d" |
| }, |
| { |
| "dataPath": "params_shard_76.bin", |
| "format": "raw-shard", |
| "nbytes": 35389440, |
| "records": [ |
| { |
| "name": "model.layers.4.mlp.down_proj.q_weight", |
| "shape": [ |
| 5120, |
| 1728 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 35389440, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "c54e73b37b7d6a5bc60b56a804576983" |
| }, |
| { |
| "dataPath": "params_shard_77.bin", |
| "format": "raw-shard", |
| "nbytes": 32952320, |
| "records": [ |
| { |
| "name": "model.layers.3.mlp.down_proj.q_scale", |
| "shape": [ |
| 5120, |
| 432 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4423680, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.3.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 27648, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8847360, |
| "byteOffset": 4423680 |
| }, |
| { |
| "name": "model.layers.3.post_attention_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 13271040 |
| }, |
| { |
| "name": "model.layers.3.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 15360, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4915200, |
| "byteOffset": 13281280 |
| }, |
| { |
| "name": "model.layers.3.self_attn.o_proj.q_weight", |
| "shape": [ |
| 5120, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 13107200, |
| "byteOffset": 18196480 |
| }, |
| { |
| "name": "model.layers.3.self_attn.o_proj.q_scale", |
| "shape": [ |
| 5120, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1638400, |
| "byteOffset": 31303680 |
| }, |
| { |
| "name": "model.layers.4.input_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 32942080 |
| } |
| ], |
| "md5sum": "00241179a017cd028794fd6fe9f25c80" |
| }, |
| { |
| "dataPath": "params_shard_78.bin", |
| "format": "raw-shard", |
| "nbytes": 70778880, |
| "records": [ |
| { |
| "name": "model.layers.4.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 27648, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 70778880, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "3dfdd8d2134fe700a0da3c26ca7eb976" |
| }, |
| { |
| "dataPath": "params_shard_79.bin", |
| "format": "raw-shard", |
| "nbytes": 39321600, |
| "records": [ |
| { |
| "name": "model.layers.4.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 15360, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 39321600, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "6188a3a18e80e84aa92d302e538c49a6" |
| }, |
| { |
| "dataPath": "params_shard_80.bin", |
| "format": "raw-shard", |
| "nbytes": 35389440, |
| "records": [ |
| { |
| "name": "model.layers.5.mlp.down_proj.q_weight", |
| "shape": [ |
| 5120, |
| 1728 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 35389440, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "fd25224e4946c0b45a6a8d6b45bdadca" |
| }, |
| { |
| "dataPath": "params_shard_81.bin", |
| "format": "raw-shard", |
| "nbytes": 32952320, |
| "records": [ |
| { |
| "name": "model.layers.4.mlp.down_proj.q_scale", |
| "shape": [ |
| 5120, |
| 432 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4423680, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.4.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 27648, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8847360, |
| "byteOffset": 4423680 |
| }, |
| { |
| "name": "model.layers.4.post_attention_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 13271040 |
| }, |
| { |
| "name": "model.layers.4.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 15360, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4915200, |
| "byteOffset": 13281280 |
| }, |
| { |
| "name": "model.layers.4.self_attn.o_proj.q_weight", |
| "shape": [ |
| 5120, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 13107200, |
| "byteOffset": 18196480 |
| }, |
| { |
| "name": "model.layers.4.self_attn.o_proj.q_scale", |
| "shape": [ |
| 5120, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1638400, |
| "byteOffset": 31303680 |
| }, |
| { |
| "name": "model.layers.5.input_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 32942080 |
| } |
| ], |
| "md5sum": "42d865e8841172fa767b3c2ad13fd4d1" |
| }, |
| { |
| "dataPath": "params_shard_82.bin", |
| "format": "raw-shard", |
| "nbytes": 70778880, |
| "records": [ |
| { |
| "name": "model.layers.5.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 27648, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 70778880, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "41cf35517a46ea15749912d291fcf746" |
| }, |
| { |
| "dataPath": "params_shard_83.bin", |
| "format": "raw-shard", |
| "nbytes": 39321600, |
| "records": [ |
| { |
| "name": "model.layers.5.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 15360, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 39321600, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "fa62bcfca55dc17fde0687ee19a459ee" |
| }, |
| { |
| "dataPath": "params_shard_84.bin", |
| "format": "raw-shard", |
| "nbytes": 35389440, |
| "records": [ |
| { |
| "name": "model.layers.6.mlp.down_proj.q_weight", |
| "shape": [ |
| 5120, |
| 1728 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 35389440, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "3dad4cefa6dfca219ec48982999e075d" |
| }, |
| { |
| "dataPath": "params_shard_85.bin", |
| "format": "raw-shard", |
| "nbytes": 32952320, |
| "records": [ |
| { |
| "name": "model.layers.5.mlp.down_proj.q_scale", |
| "shape": [ |
| 5120, |
| 432 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4423680, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.5.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 27648, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8847360, |
| "byteOffset": 4423680 |
| }, |
| { |
| "name": "model.layers.5.post_attention_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 13271040 |
| }, |
| { |
| "name": "model.layers.5.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 15360, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4915200, |
| "byteOffset": 13281280 |
| }, |
| { |
| "name": "model.layers.5.self_attn.o_proj.q_weight", |
| "shape": [ |
| 5120, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 13107200, |
| "byteOffset": 18196480 |
| }, |
| { |
| "name": "model.layers.5.self_attn.o_proj.q_scale", |
| "shape": [ |
| 5120, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1638400, |
| "byteOffset": 31303680 |
| }, |
| { |
| "name": "model.layers.6.input_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 32942080 |
| } |
| ], |
| "md5sum": "67187eb99228a999fd846f25c1a706dc" |
| }, |
| { |
| "dataPath": "params_shard_86.bin", |
| "format": "raw-shard", |
| "nbytes": 70778880, |
| "records": [ |
| { |
| "name": "model.layers.6.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 27648, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 70778880, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "23593325e134cabeb0f2ba9002a2ebf7" |
| }, |
| { |
| "dataPath": "params_shard_87.bin", |
| "format": "raw-shard", |
| "nbytes": 39321600, |
| "records": [ |
| { |
| "name": "model.layers.6.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 15360, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 39321600, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "27c7038c60a941ec87ba73452b5ff278" |
| }, |
| { |
| "dataPath": "params_shard_88.bin", |
| "format": "raw-shard", |
| "nbytes": 35389440, |
| "records": [ |
| { |
| "name": "model.layers.7.mlp.down_proj.q_weight", |
| "shape": [ |
| 5120, |
| 1728 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 35389440, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "2e2683ad98ddeb98fab4caad3c51a3e3" |
| }, |
| { |
| "dataPath": "params_shard_89.bin", |
| "format": "raw-shard", |
| "nbytes": 32952320, |
| "records": [ |
| { |
| "name": "model.layers.6.mlp.down_proj.q_scale", |
| "shape": [ |
| 5120, |
| 432 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4423680, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.6.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 27648, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8847360, |
| "byteOffset": 4423680 |
| }, |
| { |
| "name": "model.layers.6.post_attention_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 13271040 |
| }, |
| { |
| "name": "model.layers.6.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 15360, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4915200, |
| "byteOffset": 13281280 |
| }, |
| { |
| "name": "model.layers.6.self_attn.o_proj.q_weight", |
| "shape": [ |
| 5120, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 13107200, |
| "byteOffset": 18196480 |
| }, |
| { |
| "name": "model.layers.6.self_attn.o_proj.q_scale", |
| "shape": [ |
| 5120, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1638400, |
| "byteOffset": 31303680 |
| }, |
| { |
| "name": "model.layers.7.input_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 32942080 |
| } |
| ], |
| "md5sum": "59a3a554f24e6de14ed4109cb2183b7b" |
| }, |
| { |
| "dataPath": "params_shard_90.bin", |
| "format": "raw-shard", |
| "nbytes": 70778880, |
| "records": [ |
| { |
| "name": "model.layers.7.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 27648, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 70778880, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "05bb290d573e7d59b3b3efad7f5c048d" |
| }, |
| { |
| "dataPath": "params_shard_91.bin", |
| "format": "raw-shard", |
| "nbytes": 39321600, |
| "records": [ |
| { |
| "name": "model.layers.7.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 15360, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 39321600, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "18172379ea89fef1dbbc4d69b02a7862" |
| }, |
| { |
| "dataPath": "params_shard_92.bin", |
| "format": "raw-shard", |
| "nbytes": 35389440, |
| "records": [ |
| { |
| "name": "model.layers.8.mlp.down_proj.q_weight", |
| "shape": [ |
| 5120, |
| 1728 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 35389440, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "071d500a84457f73e006646919b7c064" |
| }, |
| { |
| "dataPath": "params_shard_93.bin", |
| "format": "raw-shard", |
| "nbytes": 32952320, |
| "records": [ |
| { |
| "name": "model.layers.7.mlp.down_proj.q_scale", |
| "shape": [ |
| 5120, |
| 432 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4423680, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.7.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 27648, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8847360, |
| "byteOffset": 4423680 |
| }, |
| { |
| "name": "model.layers.7.post_attention_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 13271040 |
| }, |
| { |
| "name": "model.layers.7.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 15360, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4915200, |
| "byteOffset": 13281280 |
| }, |
| { |
| "name": "model.layers.7.self_attn.o_proj.q_weight", |
| "shape": [ |
| 5120, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 13107200, |
| "byteOffset": 18196480 |
| }, |
| { |
| "name": "model.layers.7.self_attn.o_proj.q_scale", |
| "shape": [ |
| 5120, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1638400, |
| "byteOffset": 31303680 |
| }, |
| { |
| "name": "model.layers.8.input_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 32942080 |
| } |
| ], |
| "md5sum": "9af4534abd1bfc6464b1e4cfe96e4049" |
| }, |
| { |
| "dataPath": "params_shard_94.bin", |
| "format": "raw-shard", |
| "nbytes": 70778880, |
| "records": [ |
| { |
| "name": "model.layers.8.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 27648, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 70778880, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "24bb515cdb92761744c6154bea0cd0d8" |
| }, |
| { |
| "dataPath": "params_shard_95.bin", |
| "format": "raw-shard", |
| "nbytes": 39321600, |
| "records": [ |
| { |
| "name": "model.layers.8.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 15360, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 39321600, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "1a3dda178604bcc303b3e9a1a33bc113" |
| }, |
| { |
| "dataPath": "params_shard_96.bin", |
| "format": "raw-shard", |
| "nbytes": 35389440, |
| "records": [ |
| { |
| "name": "model.layers.9.mlp.down_proj.q_weight", |
| "shape": [ |
| 5120, |
| 1728 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 35389440, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "bdc7966667ffa480a56d17cfd56a652a" |
| }, |
| { |
| "dataPath": "params_shard_97.bin", |
| "format": "raw-shard", |
| "nbytes": 32952320, |
| "records": [ |
| { |
| "name": "model.layers.8.mlp.down_proj.q_scale", |
| "shape": [ |
| 5120, |
| 432 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4423680, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.8.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 27648, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8847360, |
| "byteOffset": 4423680 |
| }, |
| { |
| "name": "model.layers.8.post_attention_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 13271040 |
| }, |
| { |
| "name": "model.layers.8.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 15360, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4915200, |
| "byteOffset": 13281280 |
| }, |
| { |
| "name": "model.layers.8.self_attn.o_proj.q_weight", |
| "shape": [ |
| 5120, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 13107200, |
| "byteOffset": 18196480 |
| }, |
| { |
| "name": "model.layers.8.self_attn.o_proj.q_scale", |
| "shape": [ |
| 5120, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1638400, |
| "byteOffset": 31303680 |
| }, |
| { |
| "name": "model.layers.9.input_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 32942080 |
| } |
| ], |
| "md5sum": "b4a3775c098837b39b3eedeb1b1af1fe" |
| }, |
| { |
| "dataPath": "params_shard_98.bin", |
| "format": "raw-shard", |
| "nbytes": 70778880, |
| "records": [ |
| { |
| "name": "model.layers.9.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 27648, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 70778880, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f47ec4b5c2c590e1e961c6211aa5bdb9" |
| }, |
| { |
| "dataPath": "params_shard_99.bin", |
| "format": "raw-shard", |
| "nbytes": 39321600, |
| "records": [ |
| { |
| "name": "model.layers.9.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 15360, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 39321600, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ab30bbde8cc5ec3ee8fafab6c4adddf5" |
| }, |
| { |
| "dataPath": "params_shard_100.bin", |
| "format": "raw-shard", |
| "nbytes": 35389440, |
| "records": [ |
| { |
| "name": "model.layers.15.mlp.down_proj.q_weight", |
| "shape": [ |
| 5120, |
| 1728 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 35389440, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "de328e28102f0aab497ef9e4612ccbd0" |
| }, |
| { |
| "dataPath": "params_shard_101.bin", |
| "format": "raw-shard", |
| "nbytes": 32952320, |
| "records": [ |
| { |
| "name": "model.layers.9.mlp.down_proj.q_scale", |
| "shape": [ |
| 5120, |
| 432 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4423680, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.9.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 27648, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8847360, |
| "byteOffset": 4423680 |
| }, |
| { |
| "name": "model.layers.9.post_attention_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 13271040 |
| }, |
| { |
| "name": "model.layers.9.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 15360, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4915200, |
| "byteOffset": 13281280 |
| }, |
| { |
| "name": "model.layers.9.self_attn.o_proj.q_weight", |
| "shape": [ |
| 5120, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 13107200, |
| "byteOffset": 18196480 |
| }, |
| { |
| "name": "model.layers.9.self_attn.o_proj.q_scale", |
| "shape": [ |
| 5120, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1638400, |
| "byteOffset": 31303680 |
| }, |
| { |
| "name": "model.layers.15.input_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 32942080 |
| } |
| ], |
| "md5sum": "422817698af67a0c6d92416ed536c5be" |
| }, |
| { |
| "dataPath": "params_shard_102.bin", |
| "format": "raw-shard", |
| "nbytes": 70778880, |
| "records": [ |
| { |
| "name": "model.layers.15.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 27648, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 70778880, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ee52d35e46205d9771b79ec3564c7630" |
| }, |
| { |
| "dataPath": "params_shard_103.bin", |
| "format": "raw-shard", |
| "nbytes": 35389440, |
| "records": [ |
| { |
| "name": "model.layers.16.mlp.down_proj.q_weight", |
| "shape": [ |
| 5120, |
| 1728 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 35389440, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "4d6908282a5ab37a5a1a02b164cc8ea5" |
| }, |
| { |
| "dataPath": "params_shard_104.bin", |
| "format": "raw-shard", |
| "nbytes": 70778880, |
| "records": [ |
| { |
| "name": "model.layers.16.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 27648, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 70778880, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b79c68016b45f0d29fb2b97329eae8c8" |
| }, |
| { |
| "dataPath": "params_shard_105.bin", |
| "format": "raw-shard", |
| "nbytes": 32460800, |
| "records": [ |
| { |
| "name": "model.layers.15.mlp.down_proj.q_scale", |
| "shape": [ |
| 5120, |
| 432 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4423680, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.15.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 27648, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8847360, |
| "byteOffset": 4423680 |
| }, |
| { |
| "name": "model.layers.15.post_attention_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 13271040 |
| }, |
| { |
| "name": "model.layers.15.self_attn.o_proj.q_weight", |
| "shape": [ |
| 5120, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 13107200, |
| "byteOffset": 13281280 |
| }, |
| { |
| "name": "model.layers.15.self_attn.o_proj.q_scale", |
| "shape": [ |
| 5120, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1638400, |
| "byteOffset": 26388480 |
| }, |
| { |
| "name": "model.layers.16.input_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 28026880 |
| }, |
| { |
| "name": "model.layers.16.mlp.down_proj.q_scale", |
| "shape": [ |
| 5120, |
| 432 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4423680, |
| "byteOffset": 28037120 |
| } |
| ], |
| "md5sum": "bf922abf8ea108ff862ec47b5f281b6b" |
| }, |
| { |
| "dataPath": "params_shard_106.bin", |
| "format": "raw-shard", |
| "nbytes": 39321600, |
| "records": [ |
| { |
| "name": "model.layers.16.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 15360, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 39321600, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "0506ee784f67976cfca8861315d05218" |
| }, |
| { |
| "dataPath": "params_shard_107.bin", |
| "format": "raw-shard", |
| "nbytes": 35389440, |
| "records": [ |
| { |
| "name": "model.layers.17.mlp.down_proj.q_weight", |
| "shape": [ |
| 5120, |
| 1728 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 35389440, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "d12cd93cb2fa621d0c1c469d6e7127c3" |
| }, |
| { |
| "dataPath": "params_shard_108.bin", |
| "format": "raw-shard", |
| "nbytes": 70778880, |
| "records": [ |
| { |
| "name": "model.layers.17.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 27648, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 70778880, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "2cd1da0d7b093f0327551f8218ec4436" |
| }, |
| { |
| "dataPath": "params_shard_109.bin", |
| "format": "raw-shard", |
| "nbytes": 32952320, |
| "records": [ |
| { |
| "name": "model.layers.16.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 27648, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8847360, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.16.post_attention_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 8847360 |
| }, |
| { |
| "name": "model.layers.16.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 15360, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4915200, |
| "byteOffset": 8857600 |
| }, |
| { |
| "name": "model.layers.16.self_attn.o_proj.q_weight", |
| "shape": [ |
| 5120, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 13107200, |
| "byteOffset": 13772800 |
| }, |
| { |
| "name": "model.layers.16.self_attn.o_proj.q_scale", |
| "shape": [ |
| 5120, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1638400, |
| "byteOffset": 26880000 |
| }, |
| { |
| "name": "model.layers.17.input_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 28518400 |
| }, |
| { |
| "name": "model.layers.17.mlp.down_proj.q_scale", |
| "shape": [ |
| 5120, |
| 432 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4423680, |
| "byteOffset": 28528640 |
| } |
| ], |
| "md5sum": "c760a404204463709728ffa9dd1b7c04" |
| }, |
| { |
| "dataPath": "params_shard_110.bin", |
| "format": "raw-shard", |
| "nbytes": 39321600, |
| "records": [ |
| { |
| "name": "model.layers.17.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 15360, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 39321600, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f496440e29baed5d8a916f1211cef85a" |
| }, |
| { |
| "dataPath": "params_shard_111.bin", |
| "format": "raw-shard", |
| "nbytes": 35389440, |
| "records": [ |
| { |
| "name": "model.layers.18.mlp.down_proj.q_weight", |
| "shape": [ |
| 5120, |
| 1728 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 35389440, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "9896a10a9cb456cac27010f852b1e91b" |
| }, |
| { |
| "dataPath": "params_shard_112.bin", |
| "format": "raw-shard", |
| "nbytes": 70778880, |
| "records": [ |
| { |
| "name": "model.layers.18.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 27648, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 70778880, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "613693d9d0794bb21f9fa9c3ed1da511" |
| }, |
| { |
| "dataPath": "params_shard_113.bin", |
| "format": "raw-shard", |
| "nbytes": 32952320, |
| "records": [ |
| { |
| "name": "model.layers.17.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 27648, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8847360, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.17.post_attention_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 8847360 |
| }, |
| { |
| "name": "model.layers.17.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 15360, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4915200, |
| "byteOffset": 8857600 |
| }, |
| { |
| "name": "model.layers.17.self_attn.o_proj.q_weight", |
| "shape": [ |
| 5120, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 13107200, |
| "byteOffset": 13772800 |
| }, |
| { |
| "name": "model.layers.17.self_attn.o_proj.q_scale", |
| "shape": [ |
| 5120, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1638400, |
| "byteOffset": 26880000 |
| }, |
| { |
| "name": "model.layers.18.input_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 28518400 |
| }, |
| { |
| "name": "model.layers.18.mlp.down_proj.q_scale", |
| "shape": [ |
| 5120, |
| 432 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4423680, |
| "byteOffset": 28528640 |
| } |
| ], |
| "md5sum": "ae6026e796c7134c8884289cc845156d" |
| }, |
| { |
| "dataPath": "params_shard_114.bin", |
| "format": "raw-shard", |
| "nbytes": 39321600, |
| "records": [ |
| { |
| "name": "model.layers.18.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 15360, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 39321600, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "e842948a4e37942d161ce8ca587eba31" |
| }, |
| { |
| "dataPath": "params_shard_115.bin", |
| "format": "raw-shard", |
| "nbytes": 35389440, |
| "records": [ |
| { |
| "name": "model.layers.19.mlp.down_proj.q_weight", |
| "shape": [ |
| 5120, |
| 1728 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 35389440, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b6e1f01dd55b6e2e87eb843166538ffd" |
| }, |
| { |
| "dataPath": "params_shard_116.bin", |
| "format": "raw-shard", |
| "nbytes": 70778880, |
| "records": [ |
| { |
| "name": "model.layers.19.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 27648, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 70778880, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "6638478024eda0a857d74c7ac5d40784" |
| }, |
| { |
| "dataPath": "params_shard_117.bin", |
| "format": "raw-shard", |
| "nbytes": 32952320, |
| "records": [ |
| { |
| "name": "model.layers.18.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 27648, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8847360, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.18.post_attention_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 8847360 |
| }, |
| { |
| "name": "model.layers.18.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 15360, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4915200, |
| "byteOffset": 8857600 |
| }, |
| { |
| "name": "model.layers.18.self_attn.o_proj.q_weight", |
| "shape": [ |
| 5120, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 13107200, |
| "byteOffset": 13772800 |
| }, |
| { |
| "name": "model.layers.18.self_attn.o_proj.q_scale", |
| "shape": [ |
| 5120, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1638400, |
| "byteOffset": 26880000 |
| }, |
| { |
| "name": "model.layers.19.input_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 28518400 |
| }, |
| { |
| "name": "model.layers.19.mlp.down_proj.q_scale", |
| "shape": [ |
| 5120, |
| 432 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4423680, |
| "byteOffset": 28528640 |
| } |
| ], |
| "md5sum": "66a45f4f94eca313306345f092360a6a" |
| }, |
| { |
| "dataPath": "params_shard_118.bin", |
| "format": "raw-shard", |
| "nbytes": 39321600, |
| "records": [ |
| { |
| "name": "model.layers.19.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 15360, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 39321600, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a0baba61732a39e5f8b13ffafeb2ff97" |
| }, |
| { |
| "dataPath": "params_shard_119.bin", |
| "format": "raw-shard", |
| "nbytes": 35389440, |
| "records": [ |
| { |
| "name": "model.layers.20.mlp.down_proj.q_weight", |
| "shape": [ |
| 5120, |
| 1728 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 35389440, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "1c5830d64fa7f8f3953b81155a81f378" |
| }, |
| { |
| "dataPath": "params_shard_120.bin", |
| "format": "raw-shard", |
| "nbytes": 70778880, |
| "records": [ |
| { |
| "name": "model.layers.20.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 27648, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 70778880, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "bfc5ec33aa28a36302cd2c3891899160" |
| }, |
| { |
| "dataPath": "params_shard_121.bin", |
| "format": "raw-shard", |
| "nbytes": 32952320, |
| "records": [ |
| { |
| "name": "model.layers.19.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 27648, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8847360, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.19.post_attention_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 8847360 |
| }, |
| { |
| "name": "model.layers.19.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 15360, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4915200, |
| "byteOffset": 8857600 |
| }, |
| { |
| "name": "model.layers.19.self_attn.o_proj.q_weight", |
| "shape": [ |
| 5120, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 13107200, |
| "byteOffset": 13772800 |
| }, |
| { |
| "name": "model.layers.19.self_attn.o_proj.q_scale", |
| "shape": [ |
| 5120, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1638400, |
| "byteOffset": 26880000 |
| }, |
| { |
| "name": "model.layers.20.input_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 28518400 |
| }, |
| { |
| "name": "model.layers.20.mlp.down_proj.q_scale", |
| "shape": [ |
| 5120, |
| 432 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4423680, |
| "byteOffset": 28528640 |
| } |
| ], |
| "md5sum": "fe4d03f5ee49bb8ac264f0be8e7f6317" |
| }, |
| { |
| "dataPath": "params_shard_122.bin", |
| "format": "raw-shard", |
| "nbytes": 39321600, |
| "records": [ |
| { |
| "name": "model.layers.20.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 15360, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 39321600, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "4bde820a6fe565a394c1c6eca968e041" |
| }, |
| { |
| "dataPath": "params_shard_123.bin", |
| "format": "raw-shard", |
| "nbytes": 35389440, |
| "records": [ |
| { |
| "name": "model.layers.21.mlp.down_proj.q_weight", |
| "shape": [ |
| 5120, |
| 1728 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 35389440, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "24ead7a8bea7e447ce117691d7a6744d" |
| }, |
| { |
| "dataPath": "params_shard_124.bin", |
| "format": "raw-shard", |
| "nbytes": 70778880, |
| "records": [ |
| { |
| "name": "model.layers.21.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 27648, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 70778880, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "8f109345cc3f3f790c8e44dbcf2e1cfd" |
| }, |
| { |
| "dataPath": "params_shard_125.bin", |
| "format": "raw-shard", |
| "nbytes": 32952320, |
| "records": [ |
| { |
| "name": "model.layers.20.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 27648, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8847360, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.20.post_attention_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 8847360 |
| }, |
| { |
| "name": "model.layers.20.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 15360, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4915200, |
| "byteOffset": 8857600 |
| }, |
| { |
| "name": "model.layers.20.self_attn.o_proj.q_weight", |
| "shape": [ |
| 5120, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 13107200, |
| "byteOffset": 13772800 |
| }, |
| { |
| "name": "model.layers.20.self_attn.o_proj.q_scale", |
| "shape": [ |
| 5120, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1638400, |
| "byteOffset": 26880000 |
| }, |
| { |
| "name": "model.layers.21.input_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 28518400 |
| }, |
| { |
| "name": "model.layers.21.mlp.down_proj.q_scale", |
| "shape": [ |
| 5120, |
| 432 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4423680, |
| "byteOffset": 28528640 |
| } |
| ], |
| "md5sum": "5ecda72e18ae27e13f5ab018e7a2982c" |
| }, |
| { |
| "dataPath": "params_shard_126.bin", |
| "format": "raw-shard", |
| "nbytes": 39321600, |
| "records": [ |
| { |
| "name": "model.layers.21.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 15360, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 39321600, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b0ca167a8f2642b8b6771142b2d65834" |
| }, |
| { |
| "dataPath": "params_shard_127.bin", |
| "format": "raw-shard", |
| "nbytes": 35389440, |
| "records": [ |
| { |
| "name": "model.layers.22.mlp.down_proj.q_weight", |
| "shape": [ |
| 5120, |
| 1728 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 35389440, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "84f62d1a8b4c1a9761cc1ecd23ccfe51" |
| }, |
| { |
| "dataPath": "params_shard_128.bin", |
| "format": "raw-shard", |
| "nbytes": 70778880, |
| "records": [ |
| { |
| "name": "model.layers.22.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 27648, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 70778880, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "5134cf20ab36e7ff6daef78ffe9c8c76" |
| }, |
| { |
| "dataPath": "params_shard_129.bin", |
| "format": "raw-shard", |
| "nbytes": 32952320, |
| "records": [ |
| { |
| "name": "model.layers.21.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 27648, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8847360, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.21.post_attention_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 8847360 |
| }, |
| { |
| "name": "model.layers.21.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 15360, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4915200, |
| "byteOffset": 8857600 |
| }, |
| { |
| "name": "model.layers.21.self_attn.o_proj.q_weight", |
| "shape": [ |
| 5120, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 13107200, |
| "byteOffset": 13772800 |
| }, |
| { |
| "name": "model.layers.21.self_attn.o_proj.q_scale", |
| "shape": [ |
| 5120, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1638400, |
| "byteOffset": 26880000 |
| }, |
| { |
| "name": "model.layers.22.input_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 28518400 |
| }, |
| { |
| "name": "model.layers.22.mlp.down_proj.q_scale", |
| "shape": [ |
| 5120, |
| 432 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4423680, |
| "byteOffset": 28528640 |
| } |
| ], |
| "md5sum": "520cc8e461bca714b4ca93942ebb6ab6" |
| }, |
| { |
| "dataPath": "params_shard_130.bin", |
| "format": "raw-shard", |
| "nbytes": 39321600, |
| "records": [ |
| { |
| "name": "model.layers.22.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 15360, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 39321600, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "5e8dbcdd70fdc6cb3a874281ead6dc43" |
| }, |
| { |
| "dataPath": "params_shard_131.bin", |
| "format": "raw-shard", |
| "nbytes": 35389440, |
| "records": [ |
| { |
| "name": "model.layers.23.mlp.down_proj.q_weight", |
| "shape": [ |
| 5120, |
| 1728 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 35389440, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "9d9c64c871e6f3bd9bd83a19c983849d" |
| }, |
| { |
| "dataPath": "params_shard_132.bin", |
| "format": "raw-shard", |
| "nbytes": 70778880, |
| "records": [ |
| { |
| "name": "model.layers.23.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 27648, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 70778880, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "48c2d2c157e066d79d0b65193624c384" |
| }, |
| { |
| "dataPath": "params_shard_133.bin", |
| "format": "raw-shard", |
| "nbytes": 32952320, |
| "records": [ |
| { |
| "name": "model.layers.22.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 27648, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8847360, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.22.post_attention_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 8847360 |
| }, |
| { |
| "name": "model.layers.22.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 15360, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4915200, |
| "byteOffset": 8857600 |
| }, |
| { |
| "name": "model.layers.22.self_attn.o_proj.q_weight", |
| "shape": [ |
| 5120, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 13107200, |
| "byteOffset": 13772800 |
| }, |
| { |
| "name": "model.layers.22.self_attn.o_proj.q_scale", |
| "shape": [ |
| 5120, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1638400, |
| "byteOffset": 26880000 |
| }, |
| { |
| "name": "model.layers.23.input_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 28518400 |
| }, |
| { |
| "name": "model.layers.23.mlp.down_proj.q_scale", |
| "shape": [ |
| 5120, |
| 432 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4423680, |
| "byteOffset": 28528640 |
| } |
| ], |
| "md5sum": "2142caaba6268a7db571bab5355b3ff0" |
| }, |
| { |
| "dataPath": "params_shard_134.bin", |
| "format": "raw-shard", |
| "nbytes": 39321600, |
| "records": [ |
| { |
| "name": "model.layers.23.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 15360, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 39321600, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "57cd2023b475da062ea1aa6c0d61c00a" |
| }, |
| { |
| "dataPath": "params_shard_135.bin", |
| "format": "raw-shard", |
| "nbytes": 35389440, |
| "records": [ |
| { |
| "name": "model.layers.24.mlp.down_proj.q_weight", |
| "shape": [ |
| 5120, |
| 1728 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 35389440, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a8a66fa5bfa5c3e3c9838e99a1c2f0ec" |
| }, |
| { |
| "dataPath": "params_shard_136.bin", |
| "format": "raw-shard", |
| "nbytes": 70778880, |
| "records": [ |
| { |
| "name": "model.layers.24.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 27648, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 70778880, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "5250cdb8aa0aca7c1543d79ef4b765e3" |
| }, |
| { |
| "dataPath": "params_shard_137.bin", |
| "format": "raw-shard", |
| "nbytes": 32952320, |
| "records": [ |
| { |
| "name": "model.layers.23.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 27648, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8847360, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.23.post_attention_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 8847360 |
| }, |
| { |
| "name": "model.layers.23.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 15360, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4915200, |
| "byteOffset": 8857600 |
| }, |
| { |
| "name": "model.layers.23.self_attn.o_proj.q_weight", |
| "shape": [ |
| 5120, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 13107200, |
| "byteOffset": 13772800 |
| }, |
| { |
| "name": "model.layers.23.self_attn.o_proj.q_scale", |
| "shape": [ |
| 5120, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1638400, |
| "byteOffset": 26880000 |
| }, |
| { |
| "name": "model.layers.24.input_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 28518400 |
| }, |
| { |
| "name": "model.layers.24.mlp.down_proj.q_scale", |
| "shape": [ |
| 5120, |
| 432 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4423680, |
| "byteOffset": 28528640 |
| } |
| ], |
| "md5sum": "54fa9e781b1a98f7f41fc5b5ec229408" |
| }, |
| { |
| "dataPath": "params_shard_138.bin", |
| "format": "raw-shard", |
| "nbytes": 39321600, |
| "records": [ |
| { |
| "name": "model.layers.24.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 15360, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 39321600, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "0573252e2337e29d47b74f3cd0fc4b3e" |
| }, |
| { |
| "dataPath": "params_shard_139.bin", |
| "format": "raw-shard", |
| "nbytes": 35389440, |
| "records": [ |
| { |
| "name": "model.layers.25.mlp.down_proj.q_weight", |
| "shape": [ |
| 5120, |
| 1728 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 35389440, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "24a8b1311b99a91b9d6ecb9a93e918c5" |
| }, |
| { |
| "dataPath": "params_shard_140.bin", |
| "format": "raw-shard", |
| "nbytes": 70778880, |
| "records": [ |
| { |
| "name": "model.layers.25.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 27648, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 70778880, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "66423c34e01d988da874cda187b1e900" |
| }, |
| { |
| "dataPath": "params_shard_141.bin", |
| "format": "raw-shard", |
| "nbytes": 32952320, |
| "records": [ |
| { |
| "name": "model.layers.24.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 27648, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8847360, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.24.post_attention_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 8847360 |
| }, |
| { |
| "name": "model.layers.24.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 15360, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4915200, |
| "byteOffset": 8857600 |
| }, |
| { |
| "name": "model.layers.24.self_attn.o_proj.q_weight", |
| "shape": [ |
| 5120, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 13107200, |
| "byteOffset": 13772800 |
| }, |
| { |
| "name": "model.layers.24.self_attn.o_proj.q_scale", |
| "shape": [ |
| 5120, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1638400, |
| "byteOffset": 26880000 |
| }, |
| { |
| "name": "model.layers.25.input_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 28518400 |
| }, |
| { |
| "name": "model.layers.25.mlp.down_proj.q_scale", |
| "shape": [ |
| 5120, |
| 432 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4423680, |
| "byteOffset": 28528640 |
| } |
| ], |
| "md5sum": "3a2291adadc1fd862e2ffcb7c00d6348" |
| }, |
| { |
| "dataPath": "params_shard_142.bin", |
| "format": "raw-shard", |
| "nbytes": 39321600, |
| "records": [ |
| { |
| "name": "model.layers.25.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 15360, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 39321600, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "e1d30bf7c2381128d5569bea7ace4485" |
| }, |
| { |
| "dataPath": "params_shard_143.bin", |
| "format": "raw-shard", |
| "nbytes": 35389440, |
| "records": [ |
| { |
| "name": "model.layers.26.mlp.down_proj.q_weight", |
| "shape": [ |
| 5120, |
| 1728 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 35389440, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "39a43cbf30d2a4b05b0f6b4702b47a13" |
| }, |
| { |
| "dataPath": "params_shard_144.bin", |
| "format": "raw-shard", |
| "nbytes": 70778880, |
| "records": [ |
| { |
| "name": "model.layers.26.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 27648, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 70778880, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "63fab7f8d3eac92054e2bf7ee68c0ee5" |
| }, |
| { |
| "dataPath": "params_shard_145.bin", |
| "format": "raw-shard", |
| "nbytes": 32952320, |
| "records": [ |
| { |
| "name": "model.layers.25.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 27648, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8847360, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.25.post_attention_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 8847360 |
| }, |
| { |
| "name": "model.layers.25.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 15360, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4915200, |
| "byteOffset": 8857600 |
| }, |
| { |
| "name": "model.layers.25.self_attn.o_proj.q_weight", |
| "shape": [ |
| 5120, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 13107200, |
| "byteOffset": 13772800 |
| }, |
| { |
| "name": "model.layers.25.self_attn.o_proj.q_scale", |
| "shape": [ |
| 5120, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1638400, |
| "byteOffset": 26880000 |
| }, |
| { |
| "name": "model.layers.26.input_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 28518400 |
| }, |
| { |
| "name": "model.layers.26.mlp.down_proj.q_scale", |
| "shape": [ |
| 5120, |
| 432 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4423680, |
| "byteOffset": 28528640 |
| } |
| ], |
| "md5sum": "3a338b9e6f74024f947a76f305c549bb" |
| }, |
| { |
| "dataPath": "params_shard_146.bin", |
| "format": "raw-shard", |
| "nbytes": 39321600, |
| "records": [ |
| { |
| "name": "model.layers.26.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 15360, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 39321600, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "8d87a4a84c9bf35f8dd315fea165d3d4" |
| }, |
| { |
| "dataPath": "params_shard_147.bin", |
| "format": "raw-shard", |
| "nbytes": 35389440, |
| "records": [ |
| { |
| "name": "model.layers.27.mlp.down_proj.q_weight", |
| "shape": [ |
| 5120, |
| 1728 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 35389440, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "45b3dd1febf075f6e577de73a88665bc" |
| }, |
| { |
| "dataPath": "params_shard_148.bin", |
| "format": "raw-shard", |
| "nbytes": 70778880, |
| "records": [ |
| { |
| "name": "model.layers.27.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 27648, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 70778880, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "d56520602ac72730b05f55ca71cbce7b" |
| }, |
| { |
| "dataPath": "params_shard_149.bin", |
| "format": "raw-shard", |
| "nbytes": 32952320, |
| "records": [ |
| { |
| "name": "model.layers.26.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 27648, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8847360, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.26.post_attention_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 8847360 |
| }, |
| { |
| "name": "model.layers.26.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 15360, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4915200, |
| "byteOffset": 8857600 |
| }, |
| { |
| "name": "model.layers.26.self_attn.o_proj.q_weight", |
| "shape": [ |
| 5120, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 13107200, |
| "byteOffset": 13772800 |
| }, |
| { |
| "name": "model.layers.26.self_attn.o_proj.q_scale", |
| "shape": [ |
| 5120, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1638400, |
| "byteOffset": 26880000 |
| }, |
| { |
| "name": "model.layers.27.input_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 28518400 |
| }, |
| { |
| "name": "model.layers.27.mlp.down_proj.q_scale", |
| "shape": [ |
| 5120, |
| 432 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4423680, |
| "byteOffset": 28528640 |
| } |
| ], |
| "md5sum": "4961545ed5094b6dd1c5038e82122b53" |
| }, |
| { |
| "dataPath": "params_shard_150.bin", |
| "format": "raw-shard", |
| "nbytes": 39321600, |
| "records": [ |
| { |
| "name": "model.layers.27.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 15360, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 39321600, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "aa13428b14251071cdbdd5473613b0b7" |
| }, |
| { |
| "dataPath": "params_shard_151.bin", |
| "format": "raw-shard", |
| "nbytes": 35389440, |
| "records": [ |
| { |
| "name": "model.layers.28.mlp.down_proj.q_weight", |
| "shape": [ |
| 5120, |
| 1728 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 35389440, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "7f4b7874795fe04fc5f64fdf0ba7c8b9" |
| }, |
| { |
| "dataPath": "params_shard_152.bin", |
| "format": "raw-shard", |
| "nbytes": 70778880, |
| "records": [ |
| { |
| "name": "model.layers.28.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 27648, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 70778880, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "5b7378055b998f8426d8c082adf25dd2" |
| }, |
| { |
| "dataPath": "params_shard_153.bin", |
| "format": "raw-shard", |
| "nbytes": 32952320, |
| "records": [ |
| { |
| "name": "model.layers.27.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 27648, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8847360, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.27.post_attention_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 8847360 |
| }, |
| { |
| "name": "model.layers.27.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 15360, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4915200, |
| "byteOffset": 8857600 |
| }, |
| { |
| "name": "model.layers.27.self_attn.o_proj.q_weight", |
| "shape": [ |
| 5120, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 13107200, |
| "byteOffset": 13772800 |
| }, |
| { |
| "name": "model.layers.27.self_attn.o_proj.q_scale", |
| "shape": [ |
| 5120, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1638400, |
| "byteOffset": 26880000 |
| }, |
| { |
| "name": "model.layers.28.input_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 28518400 |
| }, |
| { |
| "name": "model.layers.28.mlp.down_proj.q_scale", |
| "shape": [ |
| 5120, |
| 432 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4423680, |
| "byteOffset": 28528640 |
| } |
| ], |
| "md5sum": "e3b24cb121e19c39b060fb7c32ddebb2" |
| }, |
| { |
| "dataPath": "params_shard_154.bin", |
| "format": "raw-shard", |
| "nbytes": 39321600, |
| "records": [ |
| { |
| "name": "model.layers.28.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 15360, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 39321600, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "aabf23effcfa2d11bd11a773565d8af6" |
| }, |
| { |
| "dataPath": "params_shard_155.bin", |
| "format": "raw-shard", |
| "nbytes": 35389440, |
| "records": [ |
| { |
| "name": "model.layers.29.mlp.down_proj.q_weight", |
| "shape": [ |
| 5120, |
| 1728 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 35389440, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "766a0437064e16b072349a0955900188" |
| }, |
| { |
| "dataPath": "params_shard_156.bin", |
| "format": "raw-shard", |
| "nbytes": 70778880, |
| "records": [ |
| { |
| "name": "model.layers.29.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 27648, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 70778880, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "55c812b972c9ed970dad14ac9710e6c0" |
| }, |
| { |
| "dataPath": "params_shard_157.bin", |
| "format": "raw-shard", |
| "nbytes": 32952320, |
| "records": [ |
| { |
| "name": "model.layers.28.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 27648, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8847360, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.28.post_attention_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 8847360 |
| }, |
| { |
| "name": "model.layers.28.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 15360, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4915200, |
| "byteOffset": 8857600 |
| }, |
| { |
| "name": "model.layers.28.self_attn.o_proj.q_weight", |
| "shape": [ |
| 5120, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 13107200, |
| "byteOffset": 13772800 |
| }, |
| { |
| "name": "model.layers.28.self_attn.o_proj.q_scale", |
| "shape": [ |
| 5120, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1638400, |
| "byteOffset": 26880000 |
| }, |
| { |
| "name": "model.layers.29.input_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 28518400 |
| }, |
| { |
| "name": "model.layers.29.mlp.down_proj.q_scale", |
| "shape": [ |
| 5120, |
| 432 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4423680, |
| "byteOffset": 28528640 |
| } |
| ], |
| "md5sum": "d8acccff587e89db392f154a6ab930ba" |
| }, |
| { |
| "dataPath": "params_shard_158.bin", |
| "format": "raw-shard", |
| "nbytes": 39321600, |
| "records": [ |
| { |
| "name": "model.layers.29.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 15360, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 39321600, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "1b9258631d02be4a6ab29cf122d53253" |
| }, |
| { |
| "dataPath": "params_shard_159.bin", |
| "format": "raw-shard", |
| "nbytes": 70778880, |
| "records": [ |
| { |
| "name": "model.layers.30.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 27648, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 70778880, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "447cc8fbb7d6652faf282c2767169b10" |
| }, |
| { |
| "dataPath": "params_shard_160.bin", |
| "format": "raw-shard", |
| "nbytes": 28518400, |
| "records": [ |
| { |
| "name": "model.layers.29.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 27648, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8847360, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.29.post_attention_layernorm.weight", |
| "shape": [ |
| 5120 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 10240, |
| "byteOffset": 8847360 |
| }, |
| { |
| "name": "model.layers.29.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 15360, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4915200, |
| "byteOffset": 8857600 |
| }, |
| { |
| "name": "model.layers.29.self_attn.o_proj.q_weight", |
| "shape": [ |
| 5120, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 13107200, |
| "byteOffset": 13772800 |
| }, |
| { |
| "name": "model.layers.29.self_attn.o_proj.q_scale", |
| "shape": [ |
| 5120, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1638400, |
| "byteOffset": 26880000 |
| } |
| ], |
| "md5sum": "2cc674bc9d0babeb07e99d03d19691a8" |
| }, |
| { |
| "dataPath": "params_shard_161.bin", |
| "format": "raw-shard", |
| "nbytes": 39321600, |
| "records": [ |
| { |
| "name": "model.layers.30.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 15360, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 39321600, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "471d1134b710fde119c0ac04270fc5b6" |
| }, |
| { |
| "dataPath": "params_shard_162.bin", |
| "format": "raw-shard", |
| "nbytes": 28508160, |
| "records": [ |
| { |
| "name": "model.layers.30.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 27648, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8847360, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.30.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 15360, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4915200, |
| "byteOffset": 8847360 |
| }, |
| { |
| "name": "model.layers.30.self_attn.o_proj.q_weight", |
| "shape": [ |
| 5120, |
| 640 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 13107200, |
| "byteOffset": 13762560 |
| }, |
| { |
| "name": "model.layers.30.self_attn.o_proj.q_scale", |
| "shape": [ |
| 5120, |
| 160 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 1638400, |
| "byteOffset": 26869760 |
| } |
| ], |
| "md5sum": "64cf2ac035fc977d1e1acfcf06a3af4e" |
| } |
| ] |
| } |