| { |
| "metadata": { |
| "ParamSize": 325, |
| "ParamBytes": 3048608704.0, |
| "BitsPerParam": 3.6193070644493246 |
| }, |
| "records": [ |
| { |
| "dataPath": "params_shard_0.bin", |
| "format": "raw-shard", |
| "nbytes": 52762368, |
| "records": [ |
| { |
| "name": "lm_head.q_weight", |
| "shape": [ |
| 32016, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 52762368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "c42f676c9e5f47490cb57a7fa536f694" |
| }, |
| { |
| "dataPath": "params_shard_1.bin", |
| "format": "raw-shard", |
| "nbytes": 18087936, |
| "records": [ |
| { |
| "name": "model.layers.24.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1104 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18087936, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "c067dcd8780e97d599ca80e753e2c57d" |
| }, |
| { |
| "dataPath": "params_shard_2.bin", |
| "format": "raw-shard", |
| "nbytes": 36282368, |
| "records": [ |
| { |
| "name": "model.layers.24.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 36282368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "9e8a50b5ecc922cce9eb48867534cd16" |
| }, |
| { |
| "dataPath": "params_shard_3.bin", |
| "format": "raw-shard", |
| "nbytes": 29229792, |
| "records": [ |
| { |
| "name": "lm_head.q_scale", |
| "shape": [ |
| 32016, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6595296, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.23.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 6595296 |
| }, |
| { |
| "name": "model.layers.23.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1104 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18087936, |
| "byteOffset": 6603488 |
| }, |
| { |
| "name": "model.layers.23.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 276 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2260992, |
| "byteOffset": 24691424 |
| }, |
| { |
| "name": "model.layers.23.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 26952416 |
| }, |
| { |
| "name": "model.layers.24.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 26960608 |
| }, |
| { |
| "name": "model.layers.24.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 276 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2260992, |
| "byteOffset": 26968800 |
| } |
| ], |
| "md5sum": "b072a6e5e7080a6cae4005607f58a517" |
| }, |
| { |
| "dataPath": "params_shard_4.bin", |
| "format": "raw-shard", |
| "nbytes": 27325440, |
| "records": [ |
| { |
| "name": "model.layers.24.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4535296, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.24.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 4535296 |
| }, |
| { |
| "name": "model.layers.24.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 20250624, |
| "byteOffset": 4543488 |
| }, |
| { |
| "name": "model.layers.24.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2531328, |
| "byteOffset": 24794112 |
| } |
| ], |
| "md5sum": "277c16f3c5389153228bb452dd148e10" |
| }, |
| { |
| "dataPath": "params_shard_5.bin", |
| "format": "raw-shard", |
| "nbytes": 36282368, |
| "records": [ |
| { |
| "name": "model.layers.25.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 36282368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "4feabbb67d458a3d150ee1fea6a3a8f2" |
| }, |
| { |
| "dataPath": "params_shard_6.bin", |
| "format": "raw-shard", |
| "nbytes": 20250624, |
| "records": [ |
| { |
| "name": "model.layers.25.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 20250624, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "24a8a99c0dcbe0f2677d1457b512695b" |
| }, |
| { |
| "dataPath": "params_shard_7.bin", |
| "format": "raw-shard", |
| "nbytes": 32494592, |
| "records": [ |
| { |
| "name": "model.layers.24.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 6750208, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.24.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 843776, |
| "byteOffset": 6750208 |
| }, |
| { |
| "name": "model.layers.25.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 7593984 |
| }, |
| { |
| "name": "model.layers.25.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1104 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18087936, |
| "byteOffset": 7602176 |
| }, |
| { |
| "name": "model.layers.25.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 276 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2260992, |
| "byteOffset": 25690112 |
| }, |
| { |
| "name": "model.layers.25.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4535296, |
| "byteOffset": 27951104 |
| }, |
| { |
| "name": "model.layers.25.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 32486400 |
| } |
| ], |
| "md5sum": "048a401bc493124ce9f6eec2d832c707" |
| }, |
| { |
| "dataPath": "params_shard_8.bin", |
| "format": "raw-shard", |
| "nbytes": 36282368, |
| "records": [ |
| { |
| "name": "model.layers.26.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 36282368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "26483f534695e5cdf2cced21468c094d" |
| }, |
| { |
| "dataPath": "params_shard_9.bin", |
| "format": "raw-shard", |
| "nbytes": 30482432, |
| "records": [ |
| { |
| "name": "model.layers.25.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2531328, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.25.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 6750208, |
| "byteOffset": 2531328 |
| }, |
| { |
| "name": "model.layers.25.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 843776, |
| "byteOffset": 9281536 |
| }, |
| { |
| "name": "model.layers.26.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 10125312 |
| }, |
| { |
| "name": "model.layers.26.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1104 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18087936, |
| "byteOffset": 10133504 |
| }, |
| { |
| "name": "model.layers.26.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 276 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2260992, |
| "byteOffset": 28221440 |
| } |
| ], |
| "md5sum": "f244e12abda99b8809535a949234bd35" |
| }, |
| { |
| "dataPath": "params_shard_10.bin", |
| "format": "raw-shard", |
| "nbytes": 27325440, |
| "records": [ |
| { |
| "name": "model.layers.26.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4535296, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.26.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 4535296 |
| }, |
| { |
| "name": "model.layers.26.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 20250624, |
| "byteOffset": 4543488 |
| }, |
| { |
| "name": "model.layers.26.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2531328, |
| "byteOffset": 24794112 |
| } |
| ], |
| "md5sum": "4eecff9501161d53f6fce252cee212d1" |
| }, |
| { |
| "dataPath": "params_shard_11.bin", |
| "format": "raw-shard", |
| "nbytes": 36282368, |
| "records": [ |
| { |
| "name": "model.layers.27.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 36282368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f0f5c7ed15f75b054af873fa63eee509" |
| }, |
| { |
| "dataPath": "params_shard_12.bin", |
| "format": "raw-shard", |
| "nbytes": 20250624, |
| "records": [ |
| { |
| "name": "model.layers.27.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 20250624, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "e97829650f3c8721c388d731b73fbafa" |
| }, |
| { |
| "dataPath": "params_shard_13.bin", |
| "format": "raw-shard", |
| "nbytes": 32494592, |
| "records": [ |
| { |
| "name": "model.layers.26.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 6750208, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.26.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 843776, |
| "byteOffset": 6750208 |
| }, |
| { |
| "name": "model.layers.27.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 7593984 |
| }, |
| { |
| "name": "model.layers.27.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1104 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18087936, |
| "byteOffset": 7602176 |
| }, |
| { |
| "name": "model.layers.27.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 276 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2260992, |
| "byteOffset": 25690112 |
| }, |
| { |
| "name": "model.layers.27.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4535296, |
| "byteOffset": 27951104 |
| }, |
| { |
| "name": "model.layers.27.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 32486400 |
| } |
| ], |
| "md5sum": "2050177792a8236e0047b412ee524e4f" |
| }, |
| { |
| "dataPath": "params_shard_14.bin", |
| "format": "raw-shard", |
| "nbytes": 36282368, |
| "records": [ |
| { |
| "name": "model.layers.28.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 36282368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a08c1bb00d1e2eec0273f552f2cd6faf" |
| }, |
| { |
| "dataPath": "params_shard_15.bin", |
| "format": "raw-shard", |
| "nbytes": 30482432, |
| "records": [ |
| { |
| "name": "model.layers.27.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2531328, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.27.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 6750208, |
| "byteOffset": 2531328 |
| }, |
| { |
| "name": "model.layers.27.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 843776, |
| "byteOffset": 9281536 |
| }, |
| { |
| "name": "model.layers.28.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 10125312 |
| }, |
| { |
| "name": "model.layers.28.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1104 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18087936, |
| "byteOffset": 10133504 |
| }, |
| { |
| "name": "model.layers.28.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 276 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2260992, |
| "byteOffset": 28221440 |
| } |
| ], |
| "md5sum": "347222f8a54e21d12d9c4971706764aa" |
| }, |
| { |
| "dataPath": "params_shard_16.bin", |
| "format": "raw-shard", |
| "nbytes": 27325440, |
| "records": [ |
| { |
| "name": "model.layers.28.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4535296, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.28.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 4535296 |
| }, |
| { |
| "name": "model.layers.28.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 20250624, |
| "byteOffset": 4543488 |
| }, |
| { |
| "name": "model.layers.28.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2531328, |
| "byteOffset": 24794112 |
| } |
| ], |
| "md5sum": "21d838e5972d65e57d377cf5b1d829e7" |
| }, |
| { |
| "dataPath": "params_shard_17.bin", |
| "format": "raw-shard", |
| "nbytes": 36282368, |
| "records": [ |
| { |
| "name": "model.layers.29.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 36282368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "962679ab0b2d704986583c2f3f7ea413" |
| }, |
| { |
| "dataPath": "params_shard_18.bin", |
| "format": "raw-shard", |
| "nbytes": 20250624, |
| "records": [ |
| { |
| "name": "model.layers.29.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 20250624, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "82951d1f80bf53c473acc399da7443ad" |
| }, |
| { |
| "dataPath": "params_shard_19.bin", |
| "format": "raw-shard", |
| "nbytes": 32494592, |
| "records": [ |
| { |
| "name": "model.layers.28.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 6750208, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.28.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 843776, |
| "byteOffset": 6750208 |
| }, |
| { |
| "name": "model.layers.29.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 7593984 |
| }, |
| { |
| "name": "model.layers.29.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1104 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18087936, |
| "byteOffset": 7602176 |
| }, |
| { |
| "name": "model.layers.29.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 276 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2260992, |
| "byteOffset": 25690112 |
| }, |
| { |
| "name": "model.layers.29.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4535296, |
| "byteOffset": 27951104 |
| }, |
| { |
| "name": "model.layers.29.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 32486400 |
| } |
| ], |
| "md5sum": "eb68eb89fb4cbd61d6fab0083de4df60" |
| }, |
| { |
| "dataPath": "params_shard_20.bin", |
| "format": "raw-shard", |
| "nbytes": 36282368, |
| "records": [ |
| { |
| "name": "model.layers.30.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 36282368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "885f56f7c9ab3fd45c26ddfd0da53ff0" |
| }, |
| { |
| "dataPath": "params_shard_21.bin", |
| "format": "raw-shard", |
| "nbytes": 30482432, |
| "records": [ |
| { |
| "name": "model.layers.29.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2531328, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.29.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 6750208, |
| "byteOffset": 2531328 |
| }, |
| { |
| "name": "model.layers.29.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 843776, |
| "byteOffset": 9281536 |
| }, |
| { |
| "name": "model.layers.30.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 10125312 |
| }, |
| { |
| "name": "model.layers.30.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1104 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18087936, |
| "byteOffset": 10133504 |
| }, |
| { |
| "name": "model.layers.30.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 276 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2260992, |
| "byteOffset": 28221440 |
| } |
| ], |
| "md5sum": "930d00de9dbc39e534a7ec8df9ab68de" |
| }, |
| { |
| "dataPath": "params_shard_22.bin", |
| "format": "raw-shard", |
| "nbytes": 27325440, |
| "records": [ |
| { |
| "name": "model.layers.30.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4535296, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.30.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 4535296 |
| }, |
| { |
| "name": "model.layers.30.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 20250624, |
| "byteOffset": 4543488 |
| }, |
| { |
| "name": "model.layers.30.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2531328, |
| "byteOffset": 24794112 |
| } |
| ], |
| "md5sum": "bbe4ef8aad23c71e94b7b902310c94e0" |
| }, |
| { |
| "dataPath": "params_shard_23.bin", |
| "format": "raw-shard", |
| "nbytes": 36282368, |
| "records": [ |
| { |
| "name": "model.layers.31.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 36282368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "941a59910a086587db11fb763728cdbb" |
| }, |
| { |
| "dataPath": "params_shard_24.bin", |
| "format": "raw-shard", |
| "nbytes": 20250624, |
| "records": [ |
| { |
| "name": "model.layers.31.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 20250624, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "e24567428615d2bf62f1ebfbf0605c12" |
| }, |
| { |
| "dataPath": "params_shard_25.bin", |
| "format": "raw-shard", |
| "nbytes": 32494592, |
| "records": [ |
| { |
| "name": "model.layers.30.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 6750208, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.30.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 843776, |
| "byteOffset": 6750208 |
| }, |
| { |
| "name": "model.layers.31.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 7593984 |
| }, |
| { |
| "name": "model.layers.31.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1104 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18087936, |
| "byteOffset": 7602176 |
| }, |
| { |
| "name": "model.layers.31.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 276 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2260992, |
| "byteOffset": 25690112 |
| }, |
| { |
| "name": "model.layers.31.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4535296, |
| "byteOffset": 27951104 |
| }, |
| { |
| "name": "model.layers.31.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 32486400 |
| } |
| ], |
| "md5sum": "7336357368bcb4274db82b65e23de17d" |
| }, |
| { |
| "dataPath": "params_shard_26.bin", |
| "format": "raw-shard", |
| "nbytes": 52762368, |
| "records": [ |
| { |
| "name": "model.embed_tokens.q_weight", |
| "shape": [ |
| 32016, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 52762368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "1d1614174bc4a70dc676649ef0cf84ec" |
| }, |
| { |
| "dataPath": "params_shard_27.bin", |
| "format": "raw-shard", |
| "nbytes": 18087936, |
| "records": [ |
| { |
| "name": "model.layers.0.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1104 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18087936, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "6e525d0171f5618e41854582fd485bc7" |
| }, |
| { |
| "dataPath": "params_shard_28.bin", |
| "format": "raw-shard", |
| "nbytes": 36282368, |
| "records": [ |
| { |
| "name": "model.layers.0.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 36282368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "2b5b58653c44540297af00ab24b537c0" |
| }, |
| { |
| "dataPath": "params_shard_29.bin", |
| "format": "raw-shard", |
| "nbytes": 20250624, |
| "records": [ |
| { |
| "name": "model.layers.0.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 20250624, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "603bc99c36aa33fc4ae4047bb56435fe" |
| }, |
| { |
| "dataPath": "params_shard_30.bin", |
| "format": "raw-shard", |
| "nbytes": 32823008, |
| "records": [ |
| { |
| "name": "model.layers.31.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2531328, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.31.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 6750208, |
| "byteOffset": 2531328 |
| }, |
| { |
| "name": "model.layers.31.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 843776, |
| "byteOffset": 9281536 |
| }, |
| { |
| "name": "model.norm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 10125312 |
| }, |
| { |
| "name": "model.embed_tokens.q_scale", |
| "shape": [ |
| 32016, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 6595296, |
| "byteOffset": 10133504 |
| }, |
| { |
| "name": "model.layers.0.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 16728800 |
| }, |
| { |
| "name": "model.layers.0.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 276 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2260992, |
| "byteOffset": 16736992 |
| }, |
| { |
| "name": "model.layers.0.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4535296, |
| "byteOffset": 18997984 |
| }, |
| { |
| "name": "model.layers.0.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 23533280 |
| }, |
| { |
| "name": "model.layers.0.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2531328, |
| "byteOffset": 23541472 |
| }, |
| { |
| "name": "model.layers.0.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 6750208, |
| "byteOffset": 26072800 |
| } |
| ], |
| "md5sum": "7036177483c594d4da78a290a35c6a52" |
| }, |
| { |
| "dataPath": "params_shard_31.bin", |
| "format": "raw-shard", |
| "nbytes": 36282368, |
| "records": [ |
| { |
| "name": "model.layers.1.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 36282368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a611babe0770a56968a3c32b3427813b" |
| }, |
| { |
| "dataPath": "params_shard_32.bin", |
| "format": "raw-shard", |
| "nbytes": 20250624, |
| "records": [ |
| { |
| "name": "model.layers.1.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 20250624, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "96c9330bcc4840ba88d7aaf298f1eaa2" |
| }, |
| { |
| "dataPath": "params_shard_33.bin", |
| "format": "raw-shard", |
| "nbytes": 28275712, |
| "records": [ |
| { |
| "name": "model.layers.0.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 843776, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.1.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 843776 |
| }, |
| { |
| "name": "model.layers.1.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1104 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18087936, |
| "byteOffset": 851968 |
| }, |
| { |
| "name": "model.layers.1.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 276 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2260992, |
| "byteOffset": 18939904 |
| }, |
| { |
| "name": "model.layers.1.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4535296, |
| "byteOffset": 21200896 |
| }, |
| { |
| "name": "model.layers.1.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 25736192 |
| }, |
| { |
| "name": "model.layers.1.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2531328, |
| "byteOffset": 25744384 |
| } |
| ], |
| "md5sum": "38faa2a8b7e96d8398dd5eda9bb1b706" |
| }, |
| { |
| "dataPath": "params_shard_34.bin", |
| "format": "raw-shard", |
| "nbytes": 36282368, |
| "records": [ |
| { |
| "name": "model.layers.10.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 36282368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "0edb382d54c7d2502ef69164bfec488c" |
| }, |
| { |
| "dataPath": "params_shard_35.bin", |
| "format": "raw-shard", |
| "nbytes": 20250624, |
| "records": [ |
| { |
| "name": "model.layers.10.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 20250624, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "20a9a9260732ae00c75fb2a171630319" |
| }, |
| { |
| "dataPath": "params_shard_36.bin", |
| "format": "raw-shard", |
| "nbytes": 32494592, |
| "records": [ |
| { |
| "name": "model.layers.1.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 6750208, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.1.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 843776, |
| "byteOffset": 6750208 |
| }, |
| { |
| "name": "model.layers.10.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 7593984 |
| }, |
| { |
| "name": "model.layers.10.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1104 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18087936, |
| "byteOffset": 7602176 |
| }, |
| { |
| "name": "model.layers.10.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 276 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2260992, |
| "byteOffset": 25690112 |
| }, |
| { |
| "name": "model.layers.10.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4535296, |
| "byteOffset": 27951104 |
| }, |
| { |
| "name": "model.layers.10.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 32486400 |
| } |
| ], |
| "md5sum": "dd558445e29df14d4037d186d3f57f0e" |
| }, |
| { |
| "dataPath": "params_shard_37.bin", |
| "format": "raw-shard", |
| "nbytes": 36282368, |
| "records": [ |
| { |
| "name": "model.layers.11.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 36282368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "c8c99bee6f67c027b25bb4a923e5fb3c" |
| }, |
| { |
| "dataPath": "params_shard_38.bin", |
| "format": "raw-shard", |
| "nbytes": 20250624, |
| "records": [ |
| { |
| "name": "model.layers.11.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 20250624, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "6bacad932c8a4112861e6e21f78ba937" |
| }, |
| { |
| "dataPath": "params_shard_39.bin", |
| "format": "raw-shard", |
| "nbytes": 18087936, |
| "records": [ |
| { |
| "name": "model.layers.2.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1104 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18087936, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "45d44b3b510333c69397d4ca9107186a" |
| }, |
| { |
| "dataPath": "params_shard_40.bin", |
| "format": "raw-shard", |
| "nbytes": 36282368, |
| "records": [ |
| { |
| "name": "model.layers.2.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 36282368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "262bc0c5c936f17b4610e53115140023" |
| }, |
| { |
| "dataPath": "params_shard_41.bin", |
| "format": "raw-shard", |
| "nbytes": 20250624, |
| "records": [ |
| { |
| "name": "model.layers.2.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 20250624, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "04be911667ee2098a1d176bda9886be6" |
| }, |
| { |
| "dataPath": "params_shard_42.bin", |
| "format": "raw-shard", |
| "nbytes": 31598592, |
| "records": [ |
| { |
| "name": "model.layers.10.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2531328, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.10.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 6750208, |
| "byteOffset": 2531328 |
| }, |
| { |
| "name": "model.layers.10.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 843776, |
| "byteOffset": 9281536 |
| }, |
| { |
| "name": "model.layers.11.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4535296, |
| "byteOffset": 10125312 |
| }, |
| { |
| "name": "model.layers.11.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2531328, |
| "byteOffset": 14660608 |
| }, |
| { |
| "name": "model.layers.11.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 6750208, |
| "byteOffset": 17191936 |
| }, |
| { |
| "name": "model.layers.11.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 843776, |
| "byteOffset": 23942144 |
| }, |
| { |
| "name": "model.layers.2.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 24785920 |
| }, |
| { |
| "name": "model.layers.2.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 276 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2260992, |
| "byteOffset": 24794112 |
| }, |
| { |
| "name": "model.layers.2.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4535296, |
| "byteOffset": 27055104 |
| }, |
| { |
| "name": "model.layers.2.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 31590400 |
| } |
| ], |
| "md5sum": "3115dff22dd960fad4384e0e6a3b817f" |
| }, |
| { |
| "dataPath": "params_shard_43.bin", |
| "format": "raw-shard", |
| "nbytes": 36282368, |
| "records": [ |
| { |
| "name": "model.layers.3.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 36282368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "bc78756aaa3ec55bbf470c646bd71ea8" |
| }, |
| { |
| "dataPath": "params_shard_44.bin", |
| "format": "raw-shard", |
| "nbytes": 30482432, |
| "records": [ |
| { |
| "name": "model.layers.2.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2531328, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.2.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 6750208, |
| "byteOffset": 2531328 |
| }, |
| { |
| "name": "model.layers.2.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 843776, |
| "byteOffset": 9281536 |
| }, |
| { |
| "name": "model.layers.3.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 10125312 |
| }, |
| { |
| "name": "model.layers.3.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1104 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18087936, |
| "byteOffset": 10133504 |
| }, |
| { |
| "name": "model.layers.3.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 276 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2260992, |
| "byteOffset": 28221440 |
| } |
| ], |
| "md5sum": "dc6a89012786d8676efdb810024a8689" |
| }, |
| { |
| "dataPath": "params_shard_45.bin", |
| "format": "raw-shard", |
| "nbytes": 27325440, |
| "records": [ |
| { |
| "name": "model.layers.3.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4535296, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.3.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 4535296 |
| }, |
| { |
| "name": "model.layers.3.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 20250624, |
| "byteOffset": 4543488 |
| }, |
| { |
| "name": "model.layers.3.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2531328, |
| "byteOffset": 24794112 |
| } |
| ], |
| "md5sum": "ac28d8a035de99b3ad22a8130d57346c" |
| }, |
| { |
| "dataPath": "params_shard_46.bin", |
| "format": "raw-shard", |
| "nbytes": 36282368, |
| "records": [ |
| { |
| "name": "model.layers.4.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 36282368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "0f686f19c2a767b68c1e17aa89eac75e" |
| }, |
| { |
| "dataPath": "params_shard_47.bin", |
| "format": "raw-shard", |
| "nbytes": 20250624, |
| "records": [ |
| { |
| "name": "model.layers.4.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 20250624, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "092745b8bada9c3125e73b3015ea323c" |
| }, |
| { |
| "dataPath": "params_shard_48.bin", |
| "format": "raw-shard", |
| "nbytes": 32494592, |
| "records": [ |
| { |
| "name": "model.layers.3.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 6750208, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.3.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 843776, |
| "byteOffset": 6750208 |
| }, |
| { |
| "name": "model.layers.4.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 7593984 |
| }, |
| { |
| "name": "model.layers.4.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1104 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18087936, |
| "byteOffset": 7602176 |
| }, |
| { |
| "name": "model.layers.4.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 276 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2260992, |
| "byteOffset": 25690112 |
| }, |
| { |
| "name": "model.layers.4.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4535296, |
| "byteOffset": 27951104 |
| }, |
| { |
| "name": "model.layers.4.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 32486400 |
| } |
| ], |
| "md5sum": "72b13633599fb635840b250f34ee356c" |
| }, |
| { |
| "dataPath": "params_shard_49.bin", |
| "format": "raw-shard", |
| "nbytes": 36282368, |
| "records": [ |
| { |
| "name": "model.layers.5.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 36282368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "5a1b12c6a3361b53b0338b022ee41664" |
| }, |
| { |
| "dataPath": "params_shard_50.bin", |
| "format": "raw-shard", |
| "nbytes": 30482432, |
| "records": [ |
| { |
| "name": "model.layers.4.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2531328, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.4.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 6750208, |
| "byteOffset": 2531328 |
| }, |
| { |
| "name": "model.layers.4.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 843776, |
| "byteOffset": 9281536 |
| }, |
| { |
| "name": "model.layers.5.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 10125312 |
| }, |
| { |
| "name": "model.layers.5.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1104 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18087936, |
| "byteOffset": 10133504 |
| }, |
| { |
| "name": "model.layers.5.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 276 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2260992, |
| "byteOffset": 28221440 |
| } |
| ], |
| "md5sum": "7b8a775bd17fe337fa1b70a035e431e9" |
| }, |
| { |
| "dataPath": "params_shard_51.bin", |
| "format": "raw-shard", |
| "nbytes": 27325440, |
| "records": [ |
| { |
| "name": "model.layers.5.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4535296, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.5.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 4535296 |
| }, |
| { |
| "name": "model.layers.5.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 20250624, |
| "byteOffset": 4543488 |
| }, |
| { |
| "name": "model.layers.5.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2531328, |
| "byteOffset": 24794112 |
| } |
| ], |
| "md5sum": "3f45e135040d2b26212afc5be4b652dc" |
| }, |
| { |
| "dataPath": "params_shard_52.bin", |
| "format": "raw-shard", |
| "nbytes": 36282368, |
| "records": [ |
| { |
| "name": "model.layers.6.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 36282368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "e73a714bfa1e330a116ae06db26615f3" |
| }, |
| { |
| "dataPath": "params_shard_53.bin", |
| "format": "raw-shard", |
| "nbytes": 20250624, |
| "records": [ |
| { |
| "name": "model.layers.6.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 20250624, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "174da339b801d0598df115dc0ce6298e" |
| }, |
| { |
| "dataPath": "params_shard_54.bin", |
| "format": "raw-shard", |
| "nbytes": 32494592, |
| "records": [ |
| { |
| "name": "model.layers.5.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 6750208, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.5.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 843776, |
| "byteOffset": 6750208 |
| }, |
| { |
| "name": "model.layers.6.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 7593984 |
| }, |
| { |
| "name": "model.layers.6.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1104 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18087936, |
| "byteOffset": 7602176 |
| }, |
| { |
| "name": "model.layers.6.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 276 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2260992, |
| "byteOffset": 25690112 |
| }, |
| { |
| "name": "model.layers.6.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4535296, |
| "byteOffset": 27951104 |
| }, |
| { |
| "name": "model.layers.6.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 32486400 |
| } |
| ], |
| "md5sum": "6b60564cba534ba05e2c7bf6cdcb0934" |
| }, |
| { |
| "dataPath": "params_shard_55.bin", |
| "format": "raw-shard", |
| "nbytes": 36282368, |
| "records": [ |
| { |
| "name": "model.layers.7.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 36282368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "7cd85c1fea22d34bcedc2d2fabe90192" |
| }, |
| { |
| "dataPath": "params_shard_56.bin", |
| "format": "raw-shard", |
| "nbytes": 30482432, |
| "records": [ |
| { |
| "name": "model.layers.6.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2531328, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.6.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 6750208, |
| "byteOffset": 2531328 |
| }, |
| { |
| "name": "model.layers.6.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 843776, |
| "byteOffset": 9281536 |
| }, |
| { |
| "name": "model.layers.7.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 10125312 |
| }, |
| { |
| "name": "model.layers.7.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1104 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18087936, |
| "byteOffset": 10133504 |
| }, |
| { |
| "name": "model.layers.7.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 276 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2260992, |
| "byteOffset": 28221440 |
| } |
| ], |
| "md5sum": "f03183185a1cf712d6bfdb5c7aa52ece" |
| }, |
| { |
| "dataPath": "params_shard_57.bin", |
| "format": "raw-shard", |
| "nbytes": 27325440, |
| "records": [ |
| { |
| "name": "model.layers.7.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4535296, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.7.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 4535296 |
| }, |
| { |
| "name": "model.layers.7.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 20250624, |
| "byteOffset": 4543488 |
| }, |
| { |
| "name": "model.layers.7.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2531328, |
| "byteOffset": 24794112 |
| } |
| ], |
| "md5sum": "aebef4ef4e339a55eae64a7fbd4f3686" |
| }, |
| { |
| "dataPath": "params_shard_58.bin", |
| "format": "raw-shard", |
| "nbytes": 36282368, |
| "records": [ |
| { |
| "name": "model.layers.8.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 36282368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "82aa0d6b0636543fb941fa2fb4ffc77b" |
| }, |
| { |
| "dataPath": "params_shard_59.bin", |
| "format": "raw-shard", |
| "nbytes": 20250624, |
| "records": [ |
| { |
| "name": "model.layers.8.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 20250624, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a7c8e9b7b375314f9aaca8ebd0f5b3f1" |
| }, |
| { |
| "dataPath": "params_shard_60.bin", |
| "format": "raw-shard", |
| "nbytes": 32494592, |
| "records": [ |
| { |
| "name": "model.layers.7.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 6750208, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.7.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 843776, |
| "byteOffset": 6750208 |
| }, |
| { |
| "name": "model.layers.8.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 7593984 |
| }, |
| { |
| "name": "model.layers.8.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1104 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18087936, |
| "byteOffset": 7602176 |
| }, |
| { |
| "name": "model.layers.8.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 276 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2260992, |
| "byteOffset": 25690112 |
| }, |
| { |
| "name": "model.layers.8.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4535296, |
| "byteOffset": 27951104 |
| }, |
| { |
| "name": "model.layers.8.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 32486400 |
| } |
| ], |
| "md5sum": "5b387f7572d912149dc686668262b48e" |
| }, |
| { |
| "dataPath": "params_shard_61.bin", |
| "format": "raw-shard", |
| "nbytes": 36282368, |
| "records": [ |
| { |
| "name": "model.layers.9.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 36282368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "0eb95391aa379e869c35cd14556496bf" |
| }, |
| { |
| "dataPath": "params_shard_62.bin", |
| "format": "raw-shard", |
| "nbytes": 30482432, |
| "records": [ |
| { |
| "name": "model.layers.8.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2531328, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.8.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 6750208, |
| "byteOffset": 2531328 |
| }, |
| { |
| "name": "model.layers.8.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 843776, |
| "byteOffset": 9281536 |
| }, |
| { |
| "name": "model.layers.9.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 10125312 |
| }, |
| { |
| "name": "model.layers.9.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1104 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18087936, |
| "byteOffset": 10133504 |
| }, |
| { |
| "name": "model.layers.9.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 276 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2260992, |
| "byteOffset": 28221440 |
| } |
| ], |
| "md5sum": "abbf6bfda48b80ef50cd9e25ac29ac2e" |
| }, |
| { |
| "dataPath": "params_shard_63.bin", |
| "format": "raw-shard", |
| "nbytes": 27325440, |
| "records": [ |
| { |
| "name": "model.layers.9.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4535296, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.9.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 4535296 |
| }, |
| { |
| "name": "model.layers.9.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 20250624, |
| "byteOffset": 4543488 |
| }, |
| { |
| "name": "model.layers.9.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2531328, |
| "byteOffset": 24794112 |
| } |
| ], |
| "md5sum": "07b193489ced74929f7fcdc82f958a4a" |
| }, |
| { |
| "dataPath": "params_shard_64.bin", |
| "format": "raw-shard", |
| "nbytes": 18087936, |
| "records": [ |
| { |
| "name": "model.layers.12.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1104 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18087936, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "2e4da4822aeaab318c2d54d9c4643bc1" |
| }, |
| { |
| "dataPath": "params_shard_65.bin", |
| "format": "raw-shard", |
| "nbytes": 36282368, |
| "records": [ |
| { |
| "name": "model.layers.12.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 36282368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "9aca5114ff3a1b357f2b5b05c4af222a" |
| }, |
| { |
| "dataPath": "params_shard_66.bin", |
| "format": "raw-shard", |
| "nbytes": 30228480, |
| "records": [ |
| { |
| "name": "model.layers.9.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 6750208, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.9.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 843776, |
| "byteOffset": 6750208 |
| }, |
| { |
| "name": "model.layers.11.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 7593984 |
| }, |
| { |
| "name": "model.layers.11.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1104 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18087936, |
| "byteOffset": 7602176 |
| }, |
| { |
| "name": "model.layers.11.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 276 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2260992, |
| "byteOffset": 25690112 |
| }, |
| { |
| "name": "model.layers.11.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 27951104 |
| }, |
| { |
| "name": "model.layers.12.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 27959296 |
| }, |
| { |
| "name": "model.layers.12.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 276 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2260992, |
| "byteOffset": 27967488 |
| } |
| ], |
| "md5sum": "8c9ab6d16819923eccb8090141d51c67" |
| }, |
| { |
| "dataPath": "params_shard_67.bin", |
| "format": "raw-shard", |
| "nbytes": 27325440, |
| "records": [ |
| { |
| "name": "model.layers.12.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4535296, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.12.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 4535296 |
| }, |
| { |
| "name": "model.layers.12.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 20250624, |
| "byteOffset": 4543488 |
| }, |
| { |
| "name": "model.layers.12.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2531328, |
| "byteOffset": 24794112 |
| } |
| ], |
| "md5sum": "117535dceb8b584a58092e9208ecfbec" |
| }, |
| { |
| "dataPath": "params_shard_68.bin", |
| "format": "raw-shard", |
| "nbytes": 36282368, |
| "records": [ |
| { |
| "name": "model.layers.13.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 36282368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "1f0ef21ca224866bc0924b31e91a0a4c" |
| }, |
| { |
| "dataPath": "params_shard_69.bin", |
| "format": "raw-shard", |
| "nbytes": 20250624, |
| "records": [ |
| { |
| "name": "model.layers.13.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 20250624, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "5d672dec703c9c67b9d7d320ad01877e" |
| }, |
| { |
| "dataPath": "params_shard_70.bin", |
| "format": "raw-shard", |
| "nbytes": 32494592, |
| "records": [ |
| { |
| "name": "model.layers.12.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 6750208, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.12.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 843776, |
| "byteOffset": 6750208 |
| }, |
| { |
| "name": "model.layers.13.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 7593984 |
| }, |
| { |
| "name": "model.layers.13.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1104 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18087936, |
| "byteOffset": 7602176 |
| }, |
| { |
| "name": "model.layers.13.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 276 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2260992, |
| "byteOffset": 25690112 |
| }, |
| { |
| "name": "model.layers.13.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4535296, |
| "byteOffset": 27951104 |
| }, |
| { |
| "name": "model.layers.13.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 32486400 |
| } |
| ], |
| "md5sum": "34abe335812521835de8036c0347919e" |
| }, |
| { |
| "dataPath": "params_shard_71.bin", |
| "format": "raw-shard", |
| "nbytes": 36282368, |
| "records": [ |
| { |
| "name": "model.layers.14.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 36282368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "4acc3cbaa0a5745a4156f8f1491f5f8e" |
| }, |
| { |
| "dataPath": "params_shard_72.bin", |
| "format": "raw-shard", |
| "nbytes": 30482432, |
| "records": [ |
| { |
| "name": "model.layers.13.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2531328, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.13.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 6750208, |
| "byteOffset": 2531328 |
| }, |
| { |
| "name": "model.layers.13.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 843776, |
| "byteOffset": 9281536 |
| }, |
| { |
| "name": "model.layers.14.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 10125312 |
| }, |
| { |
| "name": "model.layers.14.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1104 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18087936, |
| "byteOffset": 10133504 |
| }, |
| { |
| "name": "model.layers.14.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 276 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2260992, |
| "byteOffset": 28221440 |
| } |
| ], |
| "md5sum": "0281707177c4cac3a060bc79d1b1ebaf" |
| }, |
| { |
| "dataPath": "params_shard_73.bin", |
| "format": "raw-shard", |
| "nbytes": 27325440, |
| "records": [ |
| { |
| "name": "model.layers.14.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4535296, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.14.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 4535296 |
| }, |
| { |
| "name": "model.layers.14.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 20250624, |
| "byteOffset": 4543488 |
| }, |
| { |
| "name": "model.layers.14.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2531328, |
| "byteOffset": 24794112 |
| } |
| ], |
| "md5sum": "fdb361bf93f8aae85591b099be716673" |
| }, |
| { |
| "dataPath": "params_shard_74.bin", |
| "format": "raw-shard", |
| "nbytes": 36282368, |
| "records": [ |
| { |
| "name": "model.layers.15.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 36282368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "7e873d528db069e0294d498b3440eaab" |
| }, |
| { |
| "dataPath": "params_shard_75.bin", |
| "format": "raw-shard", |
| "nbytes": 20250624, |
| "records": [ |
| { |
| "name": "model.layers.15.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 20250624, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "d9e7409319de11beda88f7a5ba0ce4ec" |
| }, |
| { |
| "dataPath": "params_shard_76.bin", |
| "format": "raw-shard", |
| "nbytes": 32494592, |
| "records": [ |
| { |
| "name": "model.layers.14.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 6750208, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.14.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 843776, |
| "byteOffset": 6750208 |
| }, |
| { |
| "name": "model.layers.15.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 7593984 |
| }, |
| { |
| "name": "model.layers.15.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1104 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18087936, |
| "byteOffset": 7602176 |
| }, |
| { |
| "name": "model.layers.15.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 276 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2260992, |
| "byteOffset": 25690112 |
| }, |
| { |
| "name": "model.layers.15.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4535296, |
| "byteOffset": 27951104 |
| }, |
| { |
| "name": "model.layers.15.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 32486400 |
| } |
| ], |
| "md5sum": "1d863bdb32db3ab7dd4cc87f10d6c9c6" |
| }, |
| { |
| "dataPath": "params_shard_77.bin", |
| "format": "raw-shard", |
| "nbytes": 36282368, |
| "records": [ |
| { |
| "name": "model.layers.16.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 36282368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "c4e7024a0cc590271d29643873aa6083" |
| }, |
| { |
| "dataPath": "params_shard_78.bin", |
| "format": "raw-shard", |
| "nbytes": 30482432, |
| "records": [ |
| { |
| "name": "model.layers.15.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2531328, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.15.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 6750208, |
| "byteOffset": 2531328 |
| }, |
| { |
| "name": "model.layers.15.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 843776, |
| "byteOffset": 9281536 |
| }, |
| { |
| "name": "model.layers.16.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 10125312 |
| }, |
| { |
| "name": "model.layers.16.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1104 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18087936, |
| "byteOffset": 10133504 |
| }, |
| { |
| "name": "model.layers.16.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 276 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2260992, |
| "byteOffset": 28221440 |
| } |
| ], |
| "md5sum": "252ed76ef5350bd5cb16f6317ae169ba" |
| }, |
| { |
| "dataPath": "params_shard_79.bin", |
| "format": "raw-shard", |
| "nbytes": 27325440, |
| "records": [ |
| { |
| "name": "model.layers.16.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4535296, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.16.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 4535296 |
| }, |
| { |
| "name": "model.layers.16.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 20250624, |
| "byteOffset": 4543488 |
| }, |
| { |
| "name": "model.layers.16.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2531328, |
| "byteOffset": 24794112 |
| } |
| ], |
| "md5sum": "2b155f1cfb2032ec568b0d5ac16f0e21" |
| }, |
| { |
| "dataPath": "params_shard_80.bin", |
| "format": "raw-shard", |
| "nbytes": 36282368, |
| "records": [ |
| { |
| "name": "model.layers.17.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 36282368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "4fe4fe5436cf8d476abfc3e793978404" |
| }, |
| { |
| "dataPath": "params_shard_81.bin", |
| "format": "raw-shard", |
| "nbytes": 20250624, |
| "records": [ |
| { |
| "name": "model.layers.17.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 20250624, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "40d103985b3403eca30c43cd48c7f61f" |
| }, |
| { |
| "dataPath": "params_shard_82.bin", |
| "format": "raw-shard", |
| "nbytes": 32494592, |
| "records": [ |
| { |
| "name": "model.layers.16.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 6750208, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.16.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 843776, |
| "byteOffset": 6750208 |
| }, |
| { |
| "name": "model.layers.17.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 7593984 |
| }, |
| { |
| "name": "model.layers.17.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1104 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18087936, |
| "byteOffset": 7602176 |
| }, |
| { |
| "name": "model.layers.17.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 276 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2260992, |
| "byteOffset": 25690112 |
| }, |
| { |
| "name": "model.layers.17.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4535296, |
| "byteOffset": 27951104 |
| }, |
| { |
| "name": "model.layers.17.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 32486400 |
| } |
| ], |
| "md5sum": "ba91937dfe279833780de73d00c938fc" |
| }, |
| { |
| "dataPath": "params_shard_83.bin", |
| "format": "raw-shard", |
| "nbytes": 36282368, |
| "records": [ |
| { |
| "name": "model.layers.18.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 36282368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "3ecfa5960f8b1eb1b7f2c7a96556c8ab" |
| }, |
| { |
| "dataPath": "params_shard_84.bin", |
| "format": "raw-shard", |
| "nbytes": 30482432, |
| "records": [ |
| { |
| "name": "model.layers.17.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2531328, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.17.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 6750208, |
| "byteOffset": 2531328 |
| }, |
| { |
| "name": "model.layers.17.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 843776, |
| "byteOffset": 9281536 |
| }, |
| { |
| "name": "model.layers.18.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 10125312 |
| }, |
| { |
| "name": "model.layers.18.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1104 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18087936, |
| "byteOffset": 10133504 |
| }, |
| { |
| "name": "model.layers.18.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 276 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2260992, |
| "byteOffset": 28221440 |
| } |
| ], |
| "md5sum": "e052d401e6f6ce26d66cb8cfbcb68dab" |
| }, |
| { |
| "dataPath": "params_shard_85.bin", |
| "format": "raw-shard", |
| "nbytes": 27325440, |
| "records": [ |
| { |
| "name": "model.layers.18.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4535296, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.18.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 4535296 |
| }, |
| { |
| "name": "model.layers.18.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 20250624, |
| "byteOffset": 4543488 |
| }, |
| { |
| "name": "model.layers.18.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2531328, |
| "byteOffset": 24794112 |
| } |
| ], |
| "md5sum": "2b6d165e2d94ab4c087643d24dbabeb0" |
| }, |
| { |
| "dataPath": "params_shard_86.bin", |
| "format": "raw-shard", |
| "nbytes": 36282368, |
| "records": [ |
| { |
| "name": "model.layers.19.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 36282368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b36d3a45d81daada3a69ce03e0fd5fee" |
| }, |
| { |
| "dataPath": "params_shard_87.bin", |
| "format": "raw-shard", |
| "nbytes": 20250624, |
| "records": [ |
| { |
| "name": "model.layers.19.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 20250624, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "cffe3d3a5003bc3d0f6e82954b206a0e" |
| }, |
| { |
| "dataPath": "params_shard_88.bin", |
| "format": "raw-shard", |
| "nbytes": 32494592, |
| "records": [ |
| { |
| "name": "model.layers.18.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 6750208, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.18.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 843776, |
| "byteOffset": 6750208 |
| }, |
| { |
| "name": "model.layers.19.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 7593984 |
| }, |
| { |
| "name": "model.layers.19.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1104 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18087936, |
| "byteOffset": 7602176 |
| }, |
| { |
| "name": "model.layers.19.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 276 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2260992, |
| "byteOffset": 25690112 |
| }, |
| { |
| "name": "model.layers.19.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4535296, |
| "byteOffset": 27951104 |
| }, |
| { |
| "name": "model.layers.19.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 32486400 |
| } |
| ], |
| "md5sum": "a7304e582f577245630da10a9cc700eb" |
| }, |
| { |
| "dataPath": "params_shard_89.bin", |
| "format": "raw-shard", |
| "nbytes": 36282368, |
| "records": [ |
| { |
| "name": "model.layers.20.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 36282368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "07910504c2de5db257470f3204bf041c" |
| }, |
| { |
| "dataPath": "params_shard_90.bin", |
| "format": "raw-shard", |
| "nbytes": 30482432, |
| "records": [ |
| { |
| "name": "model.layers.19.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2531328, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.19.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 6750208, |
| "byteOffset": 2531328 |
| }, |
| { |
| "name": "model.layers.19.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 843776, |
| "byteOffset": 9281536 |
| }, |
| { |
| "name": "model.layers.20.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 10125312 |
| }, |
| { |
| "name": "model.layers.20.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1104 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18087936, |
| "byteOffset": 10133504 |
| }, |
| { |
| "name": "model.layers.20.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 276 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2260992, |
| "byteOffset": 28221440 |
| } |
| ], |
| "md5sum": "aba6d51a21cd643d47d11e7d563b4eb4" |
| }, |
| { |
| "dataPath": "params_shard_91.bin", |
| "format": "raw-shard", |
| "nbytes": 27325440, |
| "records": [ |
| { |
| "name": "model.layers.20.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4535296, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.20.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 4535296 |
| }, |
| { |
| "name": "model.layers.20.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 20250624, |
| "byteOffset": 4543488 |
| }, |
| { |
| "name": "model.layers.20.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2531328, |
| "byteOffset": 24794112 |
| } |
| ], |
| "md5sum": "27a00b2f6eb28c29e66fdadd72e73482" |
| }, |
| { |
| "dataPath": "params_shard_92.bin", |
| "format": "raw-shard", |
| "nbytes": 36282368, |
| "records": [ |
| { |
| "name": "model.layers.21.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 36282368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "bd2fc33def6bbed6a4f7897385442a92" |
| }, |
| { |
| "dataPath": "params_shard_93.bin", |
| "format": "raw-shard", |
| "nbytes": 20250624, |
| "records": [ |
| { |
| "name": "model.layers.21.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 20250624, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ff37f556795e1b0b827de40bd933bbd7" |
| }, |
| { |
| "dataPath": "params_shard_94.bin", |
| "format": "raw-shard", |
| "nbytes": 32494592, |
| "records": [ |
| { |
| "name": "model.layers.20.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 6750208, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.20.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 843776, |
| "byteOffset": 6750208 |
| }, |
| { |
| "name": "model.layers.21.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 7593984 |
| }, |
| { |
| "name": "model.layers.21.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1104 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18087936, |
| "byteOffset": 7602176 |
| }, |
| { |
| "name": "model.layers.21.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 276 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2260992, |
| "byteOffset": 25690112 |
| }, |
| { |
| "name": "model.layers.21.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4535296, |
| "byteOffset": 27951104 |
| }, |
| { |
| "name": "model.layers.21.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 32486400 |
| } |
| ], |
| "md5sum": "1e57b9294a634d5e407c799d3e97ffac" |
| }, |
| { |
| "dataPath": "params_shard_95.bin", |
| "format": "raw-shard", |
| "nbytes": 36282368, |
| "records": [ |
| { |
| "name": "model.layers.22.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 36282368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "c454f4ab785a50bf983b2eb6c3c3e988" |
| }, |
| { |
| "dataPath": "params_shard_96.bin", |
| "format": "raw-shard", |
| "nbytes": 30482432, |
| "records": [ |
| { |
| "name": "model.layers.21.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2531328, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.21.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 6750208, |
| "byteOffset": 2531328 |
| }, |
| { |
| "name": "model.layers.21.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 843776, |
| "byteOffset": 9281536 |
| }, |
| { |
| "name": "model.layers.22.input_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 10125312 |
| }, |
| { |
| "name": "model.layers.22.mlp.down_proj.q_weight", |
| "shape": [ |
| 4096, |
| 1104 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18087936, |
| "byteOffset": 10133504 |
| }, |
| { |
| "name": "model.layers.22.mlp.down_proj.q_scale", |
| "shape": [ |
| 4096, |
| 276 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2260992, |
| "byteOffset": 28221440 |
| } |
| ], |
| "md5sum": "e021046f69e02bd28538124aa1c780dc" |
| }, |
| { |
| "dataPath": "params_shard_97.bin", |
| "format": "raw-shard", |
| "nbytes": 27325440, |
| "records": [ |
| { |
| "name": "model.layers.22.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4535296, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.22.post_attention_layernorm.weight", |
| "shape": [ |
| 4096 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 8192, |
| "byteOffset": 4535296 |
| }, |
| { |
| "name": "model.layers.22.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 20250624, |
| "byteOffset": 4543488 |
| }, |
| { |
| "name": "model.layers.22.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2531328, |
| "byteOffset": 24794112 |
| } |
| ], |
| "md5sum": "381cf00ee63e55b1799f520ae810de38" |
| }, |
| { |
| "dataPath": "params_shard_98.bin", |
| "format": "raw-shard", |
| "nbytes": 36282368, |
| "records": [ |
| { |
| "name": "model.layers.23.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 22016, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 36282368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "07442b06cf2153625e8529aa83481219" |
| }, |
| { |
| "dataPath": "params_shard_99.bin", |
| "format": "raw-shard", |
| "nbytes": 32379904, |
| "records": [ |
| { |
| "name": "model.layers.22.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 6750208, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.22.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 843776, |
| "byteOffset": 6750208 |
| }, |
| { |
| "name": "model.layers.23.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 22016, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 4535296, |
| "byteOffset": 7593984 |
| }, |
| { |
| "name": "model.layers.23.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 12288, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 20250624, |
| "byteOffset": 12129280 |
| } |
| ], |
| "md5sum": "fb009dee4ed1ba7950f57bf7005a5a28" |
| }, |
| { |
| "dataPath": "params_shard_100.bin", |
| "format": "raw-shard", |
| "nbytes": 10125312, |
| "records": [ |
| { |
| "name": "model.layers.23.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 12288, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2531328, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.23.self_attn.o_proj.q_weight", |
| "shape": [ |
| 4096, |
| 412 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 6750208, |
| "byteOffset": 2531328 |
| }, |
| { |
| "name": "model.layers.23.self_attn.o_proj.q_scale", |
| "shape": [ |
| 4096, |
| 103 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 843776, |
| "byteOffset": 9281536 |
| } |
| ], |
| "md5sum": "6f5a496325f5343cb90e78ae7dbe0101" |
| } |
| ] |
| } |