| { | |
| "metadata": { | |
| "ParamSize": 325, | |
| "ParamBytes": 4526981120.0, | |
| "BitsPerParam": 5.000992646497372 | |
| }, | |
| "records": [ | |
| { | |
| "dataPath": "params_shard_0.bin", | |
| "format": "raw-shard", | |
| "nbytes": 65536000, | |
| "records": [ | |
| { | |
| "name": "model.embed_tokens.q_weight", | |
| "shape": [ | |
| 32000, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 65536000, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2b57581148c833b363b3a46155d6c7a0" | |
| }, | |
| { | |
| "dataPath": "params_shard_1.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6ca6a9ebee27547da33fca7f9a5db8b8" | |
| }, | |
| { | |
| "dataPath": "params_shard_2.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31784960, | |
| "records": [ | |
| { | |
| "name": "model.embed_tokens.q_scale", | |
| "shape": [ | |
| 32000, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192000, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 8192000 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1572864, | |
| "byteOffset": 20774912 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 22347776 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1048576, | |
| "byteOffset": 30736384 | |
| } | |
| ], | |
| "md5sum": "b9ad634c3a1133ba2239edbebecbcaaf" | |
| }, | |
| { | |
| "dataPath": "params_shard_3.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "44107b290919f876e6154c068bd661ba" | |
| }, | |
| { | |
| "dataPath": "params_shard_4.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25182208, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 7340032, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.0.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 3670016, | |
| "byteOffset": 7340032 | |
| }, | |
| { | |
| "name": "model.layers.0.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 11010048 | |
| }, | |
| { | |
| "name": "model.layers.0.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 11018240 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 11026432 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1572864, | |
| "byteOffset": 23609344 | |
| } | |
| ], | |
| "md5sum": "717ec17e1227154411c1a820dc047261" | |
| }, | |
| { | |
| "dataPath": "params_shard_5.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "38c6b4269ae350b9282fa431c846ed23" | |
| }, | |
| { | |
| "dataPath": "params_shard_6.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b7239cac09cb8273a156c003010be479" | |
| }, | |
| { | |
| "dataPath": "params_shard_7.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33046528, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.1.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 7340032, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "model.layers.1.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 3670016, | |
| "byteOffset": 16777216 | |
| }, | |
| { | |
| "name": "model.layers.1.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 20447232 | |
| }, | |
| { | |
| "name": "model.layers.1.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 20455424 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 20463616 | |
| } | |
| ], | |
| "md5sum": "de1f018181fbbdcd4474bd967e6bde47" | |
| }, | |
| { | |
| "dataPath": "params_shard_8.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "7d5f9af4aa13184e1d199522f522e369" | |
| }, | |
| { | |
| "dataPath": "params_shard_9.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6687708501605032ee921bee4a2253a8" | |
| }, | |
| { | |
| "dataPath": "params_shard_10.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22036480, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1572864, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 1572864 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1048576, | |
| "byteOffset": 9961472 | |
| }, | |
| { | |
| "name": "model.layers.2.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 7340032, | |
| "byteOffset": 11010048 | |
| }, | |
| { | |
| "name": "model.layers.2.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 3670016, | |
| "byteOffset": 18350080 | |
| }, | |
| { | |
| "name": "model.layers.2.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 22020096 | |
| }, | |
| { | |
| "name": "model.layers.2.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 22028288 | |
| } | |
| ], | |
| "md5sum": "f484c7d54447e8fa3a60554b31576b39" | |
| }, | |
| { | |
| "dataPath": "params_shard_11.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4255e2d37ee1260ea398fdb377da60fc" | |
| }, | |
| { | |
| "dataPath": "params_shard_12.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "dacfa3f4e70f731902865d8bd771e2ad" | |
| }, | |
| { | |
| "dataPath": "params_shard_13.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30932992, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1048576, | |
| "byteOffset": 22544384 | |
| }, | |
| { | |
| "name": "model.layers.3.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 7340032, | |
| "byteOffset": 23592960 | |
| } | |
| ], | |
| "md5sum": "6bfed2448f571e67ffc4fb0ea22d7ec4" | |
| }, | |
| { | |
| "dataPath": "params_shard_14.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "bb5427d504bac40c1d589078f9764a05" | |
| }, | |
| { | |
| "dataPath": "params_shard_15.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27279360, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 3670016, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.3.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 3670016 | |
| }, | |
| { | |
| "name": "model.layers.3.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 3678208 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 3686400 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1572864, | |
| "byteOffset": 16269312 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 17842176 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1048576, | |
| "byteOffset": 26230784 | |
| } | |
| ], | |
| "md5sum": "6ea7d8604ec7fd9d5ed88ac8c72a8b85" | |
| }, | |
| { | |
| "dataPath": "params_shard_16.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c01d31ab5e41daccd65013bbb2142aa8" | |
| }, | |
| { | |
| "dataPath": "params_shard_17.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25182208, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 7340032, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.4.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 3670016, | |
| "byteOffset": 7340032 | |
| }, | |
| { | |
| "name": "model.layers.4.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 11010048 | |
| }, | |
| { | |
| "name": "model.layers.4.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 11018240 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 11026432 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1572864, | |
| "byteOffset": 23609344 | |
| } | |
| ], | |
| "md5sum": "b56702e3404bdc2aafa82160c94f47ff" | |
| }, | |
| { | |
| "dataPath": "params_shard_18.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "83520f6b4fa3b4348fc151aada5a9f50" | |
| }, | |
| { | |
| "dataPath": "params_shard_19.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8aa79d26e4b00dbec3c96452f6f30adf" | |
| }, | |
| { | |
| "dataPath": "params_shard_20.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33046528, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.5.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 7340032, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "model.layers.5.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 3670016, | |
| "byteOffset": 16777216 | |
| }, | |
| { | |
| "name": "model.layers.5.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 20447232 | |
| }, | |
| { | |
| "name": "model.layers.5.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 20455424 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 20463616 | |
| } | |
| ], | |
| "md5sum": "0b42566d825abc55ea0ee8dd55060446" | |
| }, | |
| { | |
| "dataPath": "params_shard_21.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "419b16bd7d0d37291266b7f48fa5dcb6" | |
| }, | |
| { | |
| "dataPath": "params_shard_22.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b070ae27e587e4a482d3411535572b70" | |
| }, | |
| { | |
| "dataPath": "params_shard_23.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22036480, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1572864, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 1572864 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1048576, | |
| "byteOffset": 9961472 | |
| }, | |
| { | |
| "name": "model.layers.6.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 7340032, | |
| "byteOffset": 11010048 | |
| }, | |
| { | |
| "name": "model.layers.6.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 3670016, | |
| "byteOffset": 18350080 | |
| }, | |
| { | |
| "name": "model.layers.6.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 22020096 | |
| }, | |
| { | |
| "name": "model.layers.6.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 22028288 | |
| } | |
| ], | |
| "md5sum": "5b0d59357e5021db580dff05fe7814bb" | |
| }, | |
| { | |
| "dataPath": "params_shard_24.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "86689d860fbb0e16097a7e1765f2c36e" | |
| }, | |
| { | |
| "dataPath": "params_shard_25.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e0b09ac9c8fa7c38bb0654c860b221bf" | |
| }, | |
| { | |
| "dataPath": "params_shard_26.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30932992, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1048576, | |
| "byteOffset": 22544384 | |
| }, | |
| { | |
| "name": "model.layers.7.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 7340032, | |
| "byteOffset": 23592960 | |
| } | |
| ], | |
| "md5sum": "7373b71e9a1a0b89d25dcd6e1d0ebab3" | |
| }, | |
| { | |
| "dataPath": "params_shard_27.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ba4811273bd1f67f87acce4123c0a4d9" | |
| }, | |
| { | |
| "dataPath": "params_shard_28.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27279360, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 3670016, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.7.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 3670016 | |
| }, | |
| { | |
| "name": "model.layers.7.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 3678208 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 3686400 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1572864, | |
| "byteOffset": 16269312 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 17842176 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1048576, | |
| "byteOffset": 26230784 | |
| } | |
| ], | |
| "md5sum": "0c3cd1a038246ac969e16fd27fbee63e" | |
| }, | |
| { | |
| "dataPath": "params_shard_29.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ac3e5c45718c63c58b1ff2b468511930" | |
| }, | |
| { | |
| "dataPath": "params_shard_30.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25182208, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 7340032, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.8.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 3670016, | |
| "byteOffset": 7340032 | |
| }, | |
| { | |
| "name": "model.layers.8.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 11010048 | |
| }, | |
| { | |
| "name": "model.layers.8.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 11018240 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 11026432 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1572864, | |
| "byteOffset": 23609344 | |
| } | |
| ], | |
| "md5sum": "4a92b0010f7779fe8146c7e119ad1e38" | |
| }, | |
| { | |
| "dataPath": "params_shard_31.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "aa25f9417b4b647b4dd585481684096d" | |
| }, | |
| { | |
| "dataPath": "params_shard_32.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "644979ebe6bc7ddad8e041fd6643c64f" | |
| }, | |
| { | |
| "dataPath": "params_shard_33.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33046528, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.9.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 7340032, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "model.layers.9.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 3670016, | |
| "byteOffset": 16777216 | |
| }, | |
| { | |
| "name": "model.layers.9.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 20447232 | |
| }, | |
| { | |
| "name": "model.layers.9.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 20455424 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 20463616 | |
| } | |
| ], | |
| "md5sum": "3ddc1e7858e2835bae793e1b398af8b8" | |
| }, | |
| { | |
| "dataPath": "params_shard_34.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "7eaaf6173a0875d16f06151daf42b60e" | |
| }, | |
| { | |
| "dataPath": "params_shard_35.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "77fb7d5486f709c3d1cca18426db011b" | |
| }, | |
| { | |
| "dataPath": "params_shard_36.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22036480, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1572864, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 1572864 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1048576, | |
| "byteOffset": 9961472 | |
| }, | |
| { | |
| "name": "model.layers.10.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 7340032, | |
| "byteOffset": 11010048 | |
| }, | |
| { | |
| "name": "model.layers.10.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 3670016, | |
| "byteOffset": 18350080 | |
| }, | |
| { | |
| "name": "model.layers.10.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 22020096 | |
| }, | |
| { | |
| "name": "model.layers.10.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 22028288 | |
| } | |
| ], | |
| "md5sum": "0d4346ce38f66e41d89d7ed4784833e6" | |
| }, | |
| { | |
| "dataPath": "params_shard_37.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "088efa948932204d94d80b7a6875cfcc" | |
| }, | |
| { | |
| "dataPath": "params_shard_38.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "85aa88ee63924ccc34f955bdecc749cc" | |
| }, | |
| { | |
| "dataPath": "params_shard_39.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30932992, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1048576, | |
| "byteOffset": 22544384 | |
| }, | |
| { | |
| "name": "model.layers.11.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 7340032, | |
| "byteOffset": 23592960 | |
| } | |
| ], | |
| "md5sum": "31f7733650c0a63d200a9c940be44555" | |
| }, | |
| { | |
| "dataPath": "params_shard_40.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b32e72d14b38e6ceb04698c3ad891d01" | |
| }, | |
| { | |
| "dataPath": "params_shard_41.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27279360, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 3670016, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.11.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 3670016 | |
| }, | |
| { | |
| "name": "model.layers.11.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 3678208 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 3686400 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1572864, | |
| "byteOffset": 16269312 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 17842176 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1048576, | |
| "byteOffset": 26230784 | |
| } | |
| ], | |
| "md5sum": "175c665ccd16fe72caf13df0f77391ed" | |
| }, | |
| { | |
| "dataPath": "params_shard_42.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "076b3e469d8d6ea5e6a58ca061429ff0" | |
| }, | |
| { | |
| "dataPath": "params_shard_43.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25182208, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 7340032, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.12.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 3670016, | |
| "byteOffset": 7340032 | |
| }, | |
| { | |
| "name": "model.layers.12.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 11010048 | |
| }, | |
| { | |
| "name": "model.layers.12.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 11018240 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 11026432 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1572864, | |
| "byteOffset": 23609344 | |
| } | |
| ], | |
| "md5sum": "d3ff4b7e1f19f50fc687bbeae49715b2" | |
| }, | |
| { | |
| "dataPath": "params_shard_44.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "82060f5401635818d122cddc5b5c185d" | |
| }, | |
| { | |
| "dataPath": "params_shard_45.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "33ca18a531504a44eb61575d013da59d" | |
| }, | |
| { | |
| "dataPath": "params_shard_46.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33046528, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.13.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 7340032, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "model.layers.13.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 3670016, | |
| "byteOffset": 16777216 | |
| }, | |
| { | |
| "name": "model.layers.13.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 20447232 | |
| }, | |
| { | |
| "name": "model.layers.13.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 20455424 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 20463616 | |
| } | |
| ], | |
| "md5sum": "12539be0707daf80af062fe537e0f4f6" | |
| }, | |
| { | |
| "dataPath": "params_shard_47.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "20a90cec41929343164315d90554e302" | |
| }, | |
| { | |
| "dataPath": "params_shard_48.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "030432ae84253006e40b4fb977074b31" | |
| }, | |
| { | |
| "dataPath": "params_shard_49.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22036480, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1572864, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 1572864 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1048576, | |
| "byteOffset": 9961472 | |
| }, | |
| { | |
| "name": "model.layers.14.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 7340032, | |
| "byteOffset": 11010048 | |
| }, | |
| { | |
| "name": "model.layers.14.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 3670016, | |
| "byteOffset": 18350080 | |
| }, | |
| { | |
| "name": "model.layers.14.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 22020096 | |
| }, | |
| { | |
| "name": "model.layers.14.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 22028288 | |
| } | |
| ], | |
| "md5sum": "50bc5999a488a9ad297f82a1486a8f4e" | |
| }, | |
| { | |
| "dataPath": "params_shard_50.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6ea07ab5a38db124863e509b07acd32d" | |
| }, | |
| { | |
| "dataPath": "params_shard_51.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "626c8b2a505460b91b5c4b58fc9b7875" | |
| }, | |
| { | |
| "dataPath": "params_shard_52.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30932992, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1048576, | |
| "byteOffset": 22544384 | |
| }, | |
| { | |
| "name": "model.layers.15.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 7340032, | |
| "byteOffset": 23592960 | |
| } | |
| ], | |
| "md5sum": "88700075416df65b850ba48c131725b4" | |
| }, | |
| { | |
| "dataPath": "params_shard_53.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "842a56a0f7e583ae89e23cb4d099a7c0" | |
| }, | |
| { | |
| "dataPath": "params_shard_54.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27279360, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 3670016, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.15.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 3670016 | |
| }, | |
| { | |
| "name": "model.layers.15.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 3678208 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 3686400 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1572864, | |
| "byteOffset": 16269312 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 17842176 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1048576, | |
| "byteOffset": 26230784 | |
| } | |
| ], | |
| "md5sum": "ad2bd38575599ee2a36f279b6225f2fa" | |
| }, | |
| { | |
| "dataPath": "params_shard_55.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "68cab40b14caeb9255c28b704803c5e6" | |
| }, | |
| { | |
| "dataPath": "params_shard_56.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25182208, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 7340032, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.16.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 3670016, | |
| "byteOffset": 7340032 | |
| }, | |
| { | |
| "name": "model.layers.16.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 11010048 | |
| }, | |
| { | |
| "name": "model.layers.16.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 11018240 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 11026432 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1572864, | |
| "byteOffset": 23609344 | |
| } | |
| ], | |
| "md5sum": "d84be5520aee019e832d40fcf36822a2" | |
| }, | |
| { | |
| "dataPath": "params_shard_57.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5511bec942d3afdd65936d0da857722f" | |
| }, | |
| { | |
| "dataPath": "params_shard_58.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "83f667fd608df6263e5ed7473a6faa23" | |
| }, | |
| { | |
| "dataPath": "params_shard_59.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33046528, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.17.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 7340032, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "model.layers.17.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 3670016, | |
| "byteOffset": 16777216 | |
| }, | |
| { | |
| "name": "model.layers.17.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 20447232 | |
| }, | |
| { | |
| "name": "model.layers.17.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 20455424 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 20463616 | |
| } | |
| ], | |
| "md5sum": "589bb0a1ab6cc09bdd82783882a55694" | |
| }, | |
| { | |
| "dataPath": "params_shard_60.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "27335bb9a3b5fa4324e4ff3ad39cf993" | |
| }, | |
| { | |
| "dataPath": "params_shard_61.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "44add29b1c56f7f02b89e6e06b07195b" | |
| }, | |
| { | |
| "dataPath": "params_shard_62.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22036480, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1572864, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 1572864 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1048576, | |
| "byteOffset": 9961472 | |
| }, | |
| { | |
| "name": "model.layers.18.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 7340032, | |
| "byteOffset": 11010048 | |
| }, | |
| { | |
| "name": "model.layers.18.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 3670016, | |
| "byteOffset": 18350080 | |
| }, | |
| { | |
| "name": "model.layers.18.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 22020096 | |
| }, | |
| { | |
| "name": "model.layers.18.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 22028288 | |
| } | |
| ], | |
| "md5sum": "dc4afe30101e1f2325138af45e2c5292" | |
| }, | |
| { | |
| "dataPath": "params_shard_63.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "219aba7b043ca595d412f23a6bf5a8c5" | |
| }, | |
| { | |
| "dataPath": "params_shard_64.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "cbecf041fe891ef327960cb09b1dc37b" | |
| }, | |
| { | |
| "dataPath": "params_shard_65.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30932992, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1048576, | |
| "byteOffset": 22544384 | |
| }, | |
| { | |
| "name": "model.layers.19.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 7340032, | |
| "byteOffset": 23592960 | |
| } | |
| ], | |
| "md5sum": "8d9c6d6891df20fcff00f2ee710557a1" | |
| }, | |
| { | |
| "dataPath": "params_shard_66.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2c87245d5fc6cc7c5f49bcd9bfa02bc5" | |
| }, | |
| { | |
| "dataPath": "params_shard_67.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27279360, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 3670016, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.19.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 3670016 | |
| }, | |
| { | |
| "name": "model.layers.19.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 3678208 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 3686400 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1572864, | |
| "byteOffset": 16269312 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 17842176 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1048576, | |
| "byteOffset": 26230784 | |
| } | |
| ], | |
| "md5sum": "e2dec404e99446470a2054abf6d2a1b6" | |
| }, | |
| { | |
| "dataPath": "params_shard_68.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "3f66d2e950e68589fcc70bab8da254ff" | |
| }, | |
| { | |
| "dataPath": "params_shard_69.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25182208, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 7340032, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.20.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 3670016, | |
| "byteOffset": 7340032 | |
| }, | |
| { | |
| "name": "model.layers.20.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 11010048 | |
| }, | |
| { | |
| "name": "model.layers.20.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 11018240 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 11026432 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1572864, | |
| "byteOffset": 23609344 | |
| } | |
| ], | |
| "md5sum": "60afeee7b1ffb6a5fa5e2cae76a1e6fb" | |
| }, | |
| { | |
| "dataPath": "params_shard_70.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "bc241bf3bfcb43f07068f9c84e66c662" | |
| }, | |
| { | |
| "dataPath": "params_shard_71.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "09d8eddef2e30603a5a53e3157b5a0ae" | |
| }, | |
| { | |
| "dataPath": "params_shard_72.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33046528, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.21.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 7340032, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "model.layers.21.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 3670016, | |
| "byteOffset": 16777216 | |
| }, | |
| { | |
| "name": "model.layers.21.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 20447232 | |
| }, | |
| { | |
| "name": "model.layers.21.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 20455424 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 20463616 | |
| } | |
| ], | |
| "md5sum": "4e21374fb169864295a70e05bf77843e" | |
| }, | |
| { | |
| "dataPath": "params_shard_73.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "798084215b2a4c8c3bc834c22c163af7" | |
| }, | |
| { | |
| "dataPath": "params_shard_74.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6d3227175ca9bf4b5f4c7bae892fcebd" | |
| }, | |
| { | |
| "dataPath": "params_shard_75.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22036480, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1572864, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 1572864 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1048576, | |
| "byteOffset": 9961472 | |
| }, | |
| { | |
| "name": "model.layers.22.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 7340032, | |
| "byteOffset": 11010048 | |
| }, | |
| { | |
| "name": "model.layers.22.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 3670016, | |
| "byteOffset": 18350080 | |
| }, | |
| { | |
| "name": "model.layers.22.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 22020096 | |
| }, | |
| { | |
| "name": "model.layers.22.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 22028288 | |
| } | |
| ], | |
| "md5sum": "f3f2f2eb894ad1c498aa31c0308a2e60" | |
| }, | |
| { | |
| "dataPath": "params_shard_76.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "06f878061a9ca1a9db307c125b2dec8a" | |
| }, | |
| { | |
| "dataPath": "params_shard_77.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "3b83323aad9509de69b17d9cefcf9eac" | |
| }, | |
| { | |
| "dataPath": "params_shard_78.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30932992, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1048576, | |
| "byteOffset": 22544384 | |
| }, | |
| { | |
| "name": "model.layers.23.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 7340032, | |
| "byteOffset": 23592960 | |
| } | |
| ], | |
| "md5sum": "fcd03ffc2f4913ad199d5e25258af100" | |
| }, | |
| { | |
| "dataPath": "params_shard_79.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "47073c6581c2dfe54d9c45bbc04a8d48" | |
| }, | |
| { | |
| "dataPath": "params_shard_80.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27279360, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 3670016, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.23.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 3670016 | |
| }, | |
| { | |
| "name": "model.layers.23.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 3678208 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 3686400 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1572864, | |
| "byteOffset": 16269312 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 17842176 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1048576, | |
| "byteOffset": 26230784 | |
| } | |
| ], | |
| "md5sum": "11390ad421d38afd76fec058952acd92" | |
| }, | |
| { | |
| "dataPath": "params_shard_81.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "7954727fec023ef6f9dde7af71633b6a" | |
| }, | |
| { | |
| "dataPath": "params_shard_82.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25182208, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 7340032, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.24.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 3670016, | |
| "byteOffset": 7340032 | |
| }, | |
| { | |
| "name": "model.layers.24.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 11010048 | |
| }, | |
| { | |
| "name": "model.layers.24.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 11018240 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 11026432 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1572864, | |
| "byteOffset": 23609344 | |
| } | |
| ], | |
| "md5sum": "289eadf9123efca05ffd0055094bc8ed" | |
| }, | |
| { | |
| "dataPath": "params_shard_83.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "295195734f58e386adfc264052f4ff35" | |
| }, | |
| { | |
| "dataPath": "params_shard_84.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c63f2862167a32705ea76d71dd290f0a" | |
| }, | |
| { | |
| "dataPath": "params_shard_85.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33046528, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.25.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 7340032, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "model.layers.25.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 3670016, | |
| "byteOffset": 16777216 | |
| }, | |
| { | |
| "name": "model.layers.25.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 20447232 | |
| }, | |
| { | |
| "name": "model.layers.25.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 20455424 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 20463616 | |
| } | |
| ], | |
| "md5sum": "f82105042c25e5b687c1ea11c7ec338b" | |
| }, | |
| { | |
| "dataPath": "params_shard_86.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "407b6153d6a415f31ecf1869afdb7131" | |
| }, | |
| { | |
| "dataPath": "params_shard_87.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "766d17b7a621aaebacd10f3773214514" | |
| }, | |
| { | |
| "dataPath": "params_shard_88.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22036480, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1572864, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 1572864 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1048576, | |
| "byteOffset": 9961472 | |
| }, | |
| { | |
| "name": "model.layers.26.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 7340032, | |
| "byteOffset": 11010048 | |
| }, | |
| { | |
| "name": "model.layers.26.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 3670016, | |
| "byteOffset": 18350080 | |
| }, | |
| { | |
| "name": "model.layers.26.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 22020096 | |
| }, | |
| { | |
| "name": "model.layers.26.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 22028288 | |
| } | |
| ], | |
| "md5sum": "987ca7537e2e73af6ad2bc5b3a7d9449" | |
| }, | |
| { | |
| "dataPath": "params_shard_89.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "017b5837c3992a60457a79e72cacc626" | |
| }, | |
| { | |
| "dataPath": "params_shard_90.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f22a1a0fd6dfa905299a65eaecc03129" | |
| }, | |
| { | |
| "dataPath": "params_shard_91.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30932992, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1048576, | |
| "byteOffset": 22544384 | |
| }, | |
| { | |
| "name": "model.layers.27.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 7340032, | |
| "byteOffset": 23592960 | |
| } | |
| ], | |
| "md5sum": "91019f28d13461e2926f5281d5bcb71b" | |
| }, | |
| { | |
| "dataPath": "params_shard_92.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.28.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "7e5180ff86b725845b2c249c166ebe37" | |
| }, | |
| { | |
| "dataPath": "params_shard_93.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27279360, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 3670016, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.27.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 3670016 | |
| }, | |
| { | |
| "name": "model.layers.27.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 3678208 | |
| }, | |
| { | |
| "name": "model.layers.28.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 3686400 | |
| }, | |
| { | |
| "name": "model.layers.28.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1572864, | |
| "byteOffset": 16269312 | |
| }, | |
| { | |
| "name": "model.layers.28.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 17842176 | |
| }, | |
| { | |
| "name": "model.layers.28.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1048576, | |
| "byteOffset": 26230784 | |
| } | |
| ], | |
| "md5sum": "962c6b434bb9c2fa4f9188dd4de17f06" | |
| }, | |
| { | |
| "dataPath": "params_shard_94.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.28.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2371a17cce67065e7dcd215e5efa35c1" | |
| }, | |
| { | |
| "dataPath": "params_shard_95.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25182208, | |
| "records": [ | |
| { | |
| "name": "model.layers.28.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 7340032, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.28.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 3670016, | |
| "byteOffset": 7340032 | |
| }, | |
| { | |
| "name": "model.layers.28.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 11010048 | |
| }, | |
| { | |
| "name": "model.layers.28.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 11018240 | |
| }, | |
| { | |
| "name": "model.layers.29.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 11026432 | |
| }, | |
| { | |
| "name": "model.layers.29.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1572864, | |
| "byteOffset": 23609344 | |
| } | |
| ], | |
| "md5sum": "049232b26fd661de4215be5883642d29" | |
| }, | |
| { | |
| "dataPath": "params_shard_96.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.29.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d1e12289d3542e2b5717b2f2208b0a59" | |
| }, | |
| { | |
| "dataPath": "params_shard_97.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.29.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "283339ebdfe83ed00863c24ddcb7c0cf" | |
| }, | |
| { | |
| "dataPath": "params_shard_98.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33046528, | |
| "records": [ | |
| { | |
| "name": "model.layers.29.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.29.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.29.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 7340032, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "model.layers.29.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 3670016, | |
| "byteOffset": 16777216 | |
| }, | |
| { | |
| "name": "model.layers.29.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 20447232 | |
| }, | |
| { | |
| "name": "model.layers.29.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 20455424 | |
| }, | |
| { | |
| "name": "model.layers.30.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 20463616 | |
| } | |
| ], | |
| "md5sum": "61edde125d206ee4068511cba24ed61f" | |
| }, | |
| { | |
| "dataPath": "params_shard_99.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.30.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "16c3f02e527dfe7039d5c21f7b73634b" | |
| }, | |
| { | |
| "dataPath": "params_shard_100.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.30.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "281560e4bb0abb04c74c6ef6e1edc8db" | |
| }, | |
| { | |
| "dataPath": "params_shard_101.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22036480, | |
| "records": [ | |
| { | |
| "name": "model.layers.30.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1572864, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.30.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 1572864 | |
| }, | |
| { | |
| "name": "model.layers.30.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1048576, | |
| "byteOffset": 9961472 | |
| }, | |
| { | |
| "name": "model.layers.30.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 7340032, | |
| "byteOffset": 11010048 | |
| }, | |
| { | |
| "name": "model.layers.30.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 3670016, | |
| "byteOffset": 18350080 | |
| }, | |
| { | |
| "name": "model.layers.30.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 22020096 | |
| }, | |
| { | |
| "name": "model.layers.30.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 22028288 | |
| } | |
| ], | |
| "md5sum": "b4b58c5e010350402e3ad9022fca7fee" | |
| }, | |
| { | |
| "dataPath": "params_shard_102.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.31.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c7dc9bb31b7d0aecc5615c46a96dc2a8" | |
| }, | |
| { | |
| "dataPath": "params_shard_103.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.31.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c97e4fb122eb88ace1310a156b335459" | |
| }, | |
| { | |
| "dataPath": "params_shard_104.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30932992, | |
| "records": [ | |
| { | |
| "name": "model.layers.31.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.31.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.31.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "model.layers.31.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 1048576, | |
| "byteOffset": 22544384 | |
| }, | |
| { | |
| "name": "model.layers.31.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 7340032, | |
| "byteOffset": 23592960 | |
| } | |
| ], | |
| "md5sum": "db9ade35db53aa953d2a5b9b40faf437" | |
| }, | |
| { | |
| "dataPath": "params_shard_105.bin", | |
| "format": "raw-shard", | |
| "nbytes": 65536000, | |
| "records": [ | |
| { | |
| "name": "lm_head.q_weight", | |
| "shape": [ | |
| 32000, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 65536000, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d0c2e9348270fd3413eecf40cc515729" | |
| }, | |
| { | |
| "dataPath": "params_shard_106.bin", | |
| "format": "raw-shard", | |
| "nbytes": 11886592, | |
| "records": [ | |
| { | |
| "name": "model.layers.31.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 3670016, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.31.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 3670016 | |
| }, | |
| { | |
| "name": "model.layers.31.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 3678208 | |
| }, | |
| { | |
| "name": "model.norm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192, | |
| "byteOffset": 3686400 | |
| }, | |
| { | |
| "name": "lm_head.q_scale", | |
| "shape": [ | |
| 32000, | |
| 128 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 8192000, | |
| "byteOffset": 3694592 | |
| } | |
| ], | |
| "md5sum": "6a2926f35133e3f72911941f64f8330b" | |
| } | |
| ] | |
| } |