lex-ar-qwen35-webgpu / model /tensor-cache.json
Christ0pher's picture
Upload folder using huggingface_hub
4f7953b verified
{
"metadata": {
"ParamSize": 435,
"ParamBytes": 423938816.0,
"BitsPerParam": 4.507631543378053
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 127139840,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
248320,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 127139840,
"byteOffset": 0
}
],
"md5sum": "965719745e8bff8aee1dcdaaee99d0b9"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 32844416,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
248320,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15892480,
"byteOffset": 0
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 15892480
},
{
"name": "model.layers.0.linear_attn.A_log",
"shape": [
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 32,
"byteOffset": 15894528
},
{
"name": "model.layers.0.linear_attn.conv1d_weight",
"shape": [
6144,
1,
4
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 15894560
},
{
"name": "model.layers.0.linear_attn.dt_bias",
"shape": [
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 32,
"byteOffset": 15943712
},
{
"name": "model.layers.0.linear_attn.in_proj_a.q_weight",
"shape": [
16,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 15943744
},
{
"name": "model.layers.0.linear_attn.in_proj_a.q_scale",
"shape": [
16,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15951936
},
{
"name": "model.layers.0.linear_attn.in_proj_b.q_weight",
"shape": [
16,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 15952960
},
{
"name": "model.layers.0.linear_attn.in_proj_b.q_scale",
"shape": [
16,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15961152
},
{
"name": "model.layers.0.linear_attn.in_proj_qkv.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 15962176
},
{
"name": "model.layers.0.linear_attn.in_proj_qkv.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 19107904
},
{
"name": "model.layers.0.linear_attn.in_proj_z.q_weight",
"shape": [
2048,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 19501120
},
{
"name": "model.layers.0.linear_attn.in_proj_z.q_scale",
"shape": [
2048,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 20549696
},
{
"name": "model.layers.0.linear_attn.norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 20680768
},
{
"name": "model.layers.0.linear_attn.out_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 20681024
},
{
"name": "model.layers.0.linear_attn.out_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 21729600
},
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
1024,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 21860672
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
1024,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 229376,
"byteOffset": 23695680
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
7168,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 23925056
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
7168,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 458752,
"byteOffset": 27595072
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 28053824
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 28055872
},
{
"name": "model.layers.1.linear_attn.A_log",
"shape": [
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 32,
"byteOffset": 28057920
},
{
"name": "model.layers.1.linear_attn.conv1d_weight",
"shape": [
6144,
1,
4
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 28057952
},
{
"name": "model.layers.1.linear_attn.dt_bias",
"shape": [
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 32,
"byteOffset": 28107104
},
{
"name": "model.layers.1.linear_attn.in_proj_a.q_weight",
"shape": [
16,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 28107136
},
{
"name": "model.layers.1.linear_attn.in_proj_a.q_scale",
"shape": [
16,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 28115328
},
{
"name": "model.layers.1.linear_attn.in_proj_b.q_weight",
"shape": [
16,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 28116352
},
{
"name": "model.layers.1.linear_attn.in_proj_b.q_scale",
"shape": [
16,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 28124544
},
{
"name": "model.layers.1.linear_attn.in_proj_qkv.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 28125568
},
{
"name": "model.layers.1.linear_attn.in_proj_qkv.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 31271296
},
{
"name": "model.layers.1.linear_attn.in_proj_z.q_weight",
"shape": [
2048,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 31664512
},
{
"name": "model.layers.1.linear_attn.in_proj_z.q_scale",
"shape": [
2048,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 32713088
},
{
"name": "model.layers.1.linear_attn.norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 32844160
}
],
"md5sum": "bace72e1b58ffe9a497e9b5a7eaffa6b"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 33473920,
"records": [
{
"name": "model.layers.1.linear_attn.out_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 0
},
{
"name": "model.layers.1.linear_attn.out_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 1048576
},
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
1024,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 1179648
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
1024,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 229376,
"byteOffset": 3014656
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
7168,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 3244032
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
7168,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 458752,
"byteOffset": 6914048
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 7372800
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 7374848
},
{
"name": "model.layers.10.linear_attn.A_log",
"shape": [
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 32,
"byteOffset": 7376896
},
{
"name": "model.layers.10.linear_attn.conv1d_weight",
"shape": [
6144,
1,
4
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 7376928
},
{
"name": "model.layers.10.linear_attn.dt_bias",
"shape": [
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 32,
"byteOffset": 7426080
},
{
"name": "model.layers.10.linear_attn.in_proj_a.q_weight",
"shape": [
16,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7426112
},
{
"name": "model.layers.10.linear_attn.in_proj_a.q_scale",
"shape": [
16,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 7434304
},
{
"name": "model.layers.10.linear_attn.in_proj_b.q_weight",
"shape": [
16,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7435328
},
{
"name": "model.layers.10.linear_attn.in_proj_b.q_scale",
"shape": [
16,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 7443520
},
{
"name": "model.layers.10.linear_attn.in_proj_qkv.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 7444544
},
{
"name": "model.layers.10.linear_attn.in_proj_qkv.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 10590272
},
{
"name": "model.layers.10.linear_attn.in_proj_z.q_weight",
"shape": [
2048,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 10983488
},
{
"name": "model.layers.10.linear_attn.in_proj_z.q_scale",
"shape": [
2048,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 12032064
},
{
"name": "model.layers.10.linear_attn.norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 12163136
},
{
"name": "model.layers.10.linear_attn.out_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 12163392
},
{
"name": "model.layers.10.linear_attn.out_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 13211968
},
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
1024,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 13343040
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
1024,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 229376,
"byteOffset": 15178048
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
7168,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 15407424
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
7168,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 458752,
"byteOffset": 19077440
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 19536192
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 19538240
},
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
1024,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 19540288
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
1024,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 229376,
"byteOffset": 21375296
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
7168,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 21604672
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
7168,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 458752,
"byteOffset": 25274688
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 25733440
},
{
"name": "model.layers.11.self_attn.k_norm.weight",
"shape": [
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 512,
"byteOffset": 25735488
},
{
"name": "model.layers.11.self_attn.c_attn.q_weight",
"shape": [
5120,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 25736000
},
{
"name": "model.layers.11.self_attn.c_attn.q_scale",
"shape": [
5120,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 28357440
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 28685120
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 29733696
},
{
"name": "model.layers.11.self_attn.q_norm.weight",
"shape": [
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 512,
"byteOffset": 29864768
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 29865280
},
{
"name": "model.layers.12.linear_attn.A_log",
"shape": [
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 32,
"byteOffset": 29867328
},
{
"name": "model.layers.12.linear_attn.conv1d_weight",
"shape": [
6144,
1,
4
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 29867360
},
{
"name": "model.layers.12.linear_attn.dt_bias",
"shape": [
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 32,
"byteOffset": 29916512
},
{
"name": "model.layers.12.linear_attn.in_proj_a.q_weight",
"shape": [
16,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29916544
},
{
"name": "model.layers.12.linear_attn.in_proj_a.q_scale",
"shape": [
16,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 29924736
},
{
"name": "model.layers.12.linear_attn.in_proj_b.q_weight",
"shape": [
16,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29925760
},
{
"name": "model.layers.12.linear_attn.in_proj_b.q_scale",
"shape": [
16,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 29933952
},
{
"name": "model.layers.12.linear_attn.in_proj_qkv.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 29934976
},
{
"name": "model.layers.12.linear_attn.in_proj_qkv.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 33080704
}
],
"md5sum": "4326282962d3cccd386c6ec6480dae26"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 32883584,
"records": [
{
"name": "model.layers.12.linear_attn.in_proj_z.q_weight",
"shape": [
2048,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 0
},
{
"name": "model.layers.12.linear_attn.in_proj_z.q_scale",
"shape": [
2048,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 1048576
},
{
"name": "model.layers.12.linear_attn.norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 1179648
},
{
"name": "model.layers.12.linear_attn.out_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 1179904
},
{
"name": "model.layers.12.linear_attn.out_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 2228480
},
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
1024,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 2359552
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
1024,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 229376,
"byteOffset": 4194560
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
7168,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 4423936
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
7168,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 458752,
"byteOffset": 8093952
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 8552704
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 8554752
},
{
"name": "model.layers.13.linear_attn.A_log",
"shape": [
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 32,
"byteOffset": 8556800
},
{
"name": "model.layers.13.linear_attn.conv1d_weight",
"shape": [
6144,
1,
4
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 8556832
},
{
"name": "model.layers.13.linear_attn.dt_bias",
"shape": [
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 32,
"byteOffset": 8605984
},
{
"name": "model.layers.13.linear_attn.in_proj_a.q_weight",
"shape": [
16,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8606016
},
{
"name": "model.layers.13.linear_attn.in_proj_a.q_scale",
"shape": [
16,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 8614208
},
{
"name": "model.layers.13.linear_attn.in_proj_b.q_weight",
"shape": [
16,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8615232
},
{
"name": "model.layers.13.linear_attn.in_proj_b.q_scale",
"shape": [
16,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 8623424
},
{
"name": "model.layers.13.linear_attn.in_proj_qkv.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 8624448
},
{
"name": "model.layers.13.linear_attn.in_proj_qkv.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 11770176
},
{
"name": "model.layers.13.linear_attn.in_proj_z.q_weight",
"shape": [
2048,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 12163392
},
{
"name": "model.layers.13.linear_attn.in_proj_z.q_scale",
"shape": [
2048,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 13211968
},
{
"name": "model.layers.13.linear_attn.norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 13343040
},
{
"name": "model.layers.13.linear_attn.out_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 13343296
},
{
"name": "model.layers.13.linear_attn.out_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 14391872
},
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
1024,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 14522944
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
1024,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 229376,
"byteOffset": 16357952
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
7168,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 16587328
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
7168,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 458752,
"byteOffset": 20257344
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 20716096
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 20718144
},
{
"name": "model.layers.14.linear_attn.A_log",
"shape": [
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 32,
"byteOffset": 20720192
},
{
"name": "model.layers.14.linear_attn.conv1d_weight",
"shape": [
6144,
1,
4
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 20720224
},
{
"name": "model.layers.14.linear_attn.dt_bias",
"shape": [
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 32,
"byteOffset": 20769376
},
{
"name": "model.layers.14.linear_attn.in_proj_a.q_weight",
"shape": [
16,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20769408
},
{
"name": "model.layers.14.linear_attn.in_proj_a.q_scale",
"shape": [
16,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 20777600
},
{
"name": "model.layers.14.linear_attn.in_proj_b.q_weight",
"shape": [
16,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20778624
},
{
"name": "model.layers.14.linear_attn.in_proj_b.q_scale",
"shape": [
16,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 20786816
},
{
"name": "model.layers.14.linear_attn.in_proj_qkv.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 20787840
},
{
"name": "model.layers.14.linear_attn.in_proj_qkv.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 23933568
},
{
"name": "model.layers.14.linear_attn.in_proj_z.q_weight",
"shape": [
2048,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 24326784
},
{
"name": "model.layers.14.linear_attn.in_proj_z.q_scale",
"shape": [
2048,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 25375360
},
{
"name": "model.layers.14.linear_attn.norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 25506432
},
{
"name": "model.layers.14.linear_attn.out_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 25506688
},
{
"name": "model.layers.14.linear_attn.out_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 26555264
},
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
1024,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 26686336
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
1024,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 229376,
"byteOffset": 28521344
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
7168,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 28750720
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
7168,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 458752,
"byteOffset": 32420736
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 32879488
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 32881536
}
],
"md5sum": "11507c7398d6ba6237ff7698fd1ad943"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 30520960,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
1024,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 0
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
1024,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 229376,
"byteOffset": 1835008
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
7168,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 2064384
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
7168,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 458752,
"byteOffset": 5734400
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 6193152
},
{
"name": "model.layers.15.self_attn.k_norm.weight",
"shape": [
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 512,
"byteOffset": 6195200
},
{
"name": "model.layers.15.self_attn.c_attn.q_weight",
"shape": [
5120,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 6195712
},
{
"name": "model.layers.15.self_attn.c_attn.q_scale",
"shape": [
5120,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 8817152
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 9144832
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 10193408
},
{
"name": "model.layers.15.self_attn.q_norm.weight",
"shape": [
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 512,
"byteOffset": 10324480
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 10324992
},
{
"name": "model.layers.16.linear_attn.A_log",
"shape": [
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 32,
"byteOffset": 10327040
},
{
"name": "model.layers.16.linear_attn.conv1d_weight",
"shape": [
6144,
1,
4
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 10327072
},
{
"name": "model.layers.16.linear_attn.dt_bias",
"shape": [
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 32,
"byteOffset": 10376224
},
{
"name": "model.layers.16.linear_attn.in_proj_a.q_weight",
"shape": [
16,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10376256
},
{
"name": "model.layers.16.linear_attn.in_proj_a.q_scale",
"shape": [
16,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 10384448
},
{
"name": "model.layers.16.linear_attn.in_proj_b.q_weight",
"shape": [
16,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10385472
},
{
"name": "model.layers.16.linear_attn.in_proj_b.q_scale",
"shape": [
16,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 10393664
},
{
"name": "model.layers.16.linear_attn.in_proj_qkv.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 10394688
},
{
"name": "model.layers.16.linear_attn.in_proj_qkv.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 13540416
},
{
"name": "model.layers.16.linear_attn.in_proj_z.q_weight",
"shape": [
2048,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 13933632
},
{
"name": "model.layers.16.linear_attn.in_proj_z.q_scale",
"shape": [
2048,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 14982208
},
{
"name": "model.layers.16.linear_attn.norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 15113280
},
{
"name": "model.layers.16.linear_attn.out_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15113536
},
{
"name": "model.layers.16.linear_attn.out_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16162112
},
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
1024,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 16293184
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
1024,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 229376,
"byteOffset": 18128192
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
7168,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 18357568
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
7168,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 458752,
"byteOffset": 22027584
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 22486336
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 22488384
},
{
"name": "model.layers.17.linear_attn.A_log",
"shape": [
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 32,
"byteOffset": 22490432
},
{
"name": "model.layers.17.linear_attn.conv1d_weight",
"shape": [
6144,
1,
4
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 22490464
},
{
"name": "model.layers.17.linear_attn.dt_bias",
"shape": [
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 32,
"byteOffset": 22539616
},
{
"name": "model.layers.17.linear_attn.in_proj_a.q_weight",
"shape": [
16,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22539648
},
{
"name": "model.layers.17.linear_attn.in_proj_a.q_scale",
"shape": [
16,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 22547840
},
{
"name": "model.layers.17.linear_attn.in_proj_b.q_weight",
"shape": [
16,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22548864
},
{
"name": "model.layers.17.linear_attn.in_proj_b.q_scale",
"shape": [
16,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 22557056
},
{
"name": "model.layers.17.linear_attn.in_proj_qkv.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 22558080
},
{
"name": "model.layers.17.linear_attn.in_proj_qkv.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 25703808
},
{
"name": "model.layers.17.linear_attn.in_proj_z.q_weight",
"shape": [
2048,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 26097024
},
{
"name": "model.layers.17.linear_attn.in_proj_z.q_scale",
"shape": [
2048,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 27145600
},
{
"name": "model.layers.17.linear_attn.norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 27276672
},
{
"name": "model.layers.17.linear_attn.out_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 27276928
},
{
"name": "model.layers.17.linear_attn.out_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 28325504
},
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
1024,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 28456576
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
1024,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 229376,
"byteOffset": 30291584
}
],
"md5sum": "d98f48b1c21e58e1f0e80cc08fd7deed"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 32589440,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
7168,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
7168,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 458752,
"byteOffset": 3670016
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 4128768
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 4130816
},
{
"name": "model.layers.18.linear_attn.A_log",
"shape": [
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 32,
"byteOffset": 4132864
},
{
"name": "model.layers.18.linear_attn.conv1d_weight",
"shape": [
6144,
1,
4
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 4132896
},
{
"name": "model.layers.18.linear_attn.dt_bias",
"shape": [
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 32,
"byteOffset": 4182048
},
{
"name": "model.layers.18.linear_attn.in_proj_a.q_weight",
"shape": [
16,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4182080
},
{
"name": "model.layers.18.linear_attn.in_proj_a.q_scale",
"shape": [
16,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 4190272
},
{
"name": "model.layers.18.linear_attn.in_proj_b.q_weight",
"shape": [
16,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4191296
},
{
"name": "model.layers.18.linear_attn.in_proj_b.q_scale",
"shape": [
16,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 4199488
},
{
"name": "model.layers.18.linear_attn.in_proj_qkv.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 4200512
},
{
"name": "model.layers.18.linear_attn.in_proj_qkv.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 7346240
},
{
"name": "model.layers.18.linear_attn.in_proj_z.q_weight",
"shape": [
2048,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 7739456
},
{
"name": "model.layers.18.linear_attn.in_proj_z.q_scale",
"shape": [
2048,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 8788032
},
{
"name": "model.layers.18.linear_attn.norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8919104
},
{
"name": "model.layers.18.linear_attn.out_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8919360
},
{
"name": "model.layers.18.linear_attn.out_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 9967936
},
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
1024,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 10099008
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
1024,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 229376,
"byteOffset": 11934016
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
7168,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 12163392
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
7168,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 458752,
"byteOffset": 15833408
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16292160
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16294208
},
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
1024,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 16296256
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
1024,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 229376,
"byteOffset": 18131264
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
7168,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 18360640
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
7168,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 458752,
"byteOffset": 22030656
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 22489408
},
{
"name": "model.layers.19.self_attn.k_norm.weight",
"shape": [
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 512,
"byteOffset": 22491456
},
{
"name": "model.layers.19.self_attn.c_attn.q_weight",
"shape": [
5120,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 22491968
},
{
"name": "model.layers.19.self_attn.c_attn.q_scale",
"shape": [
5120,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 25113408
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 25441088
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 26489664
},
{
"name": "model.layers.19.self_attn.q_norm.weight",
"shape": [
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 512,
"byteOffset": 26620736
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 26621248
},
{
"name": "model.layers.2.linear_attn.A_log",
"shape": [
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 32,
"byteOffset": 26623296
},
{
"name": "model.layers.2.linear_attn.conv1d_weight",
"shape": [
6144,
1,
4
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 26623328
},
{
"name": "model.layers.2.linear_attn.dt_bias",
"shape": [
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 32,
"byteOffset": 26672480
},
{
"name": "model.layers.2.linear_attn.in_proj_a.q_weight",
"shape": [
16,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 26672512
},
{
"name": "model.layers.2.linear_attn.in_proj_a.q_scale",
"shape": [
16,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 26680704
},
{
"name": "model.layers.2.linear_attn.in_proj_b.q_weight",
"shape": [
16,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 26681728
},
{
"name": "model.layers.2.linear_attn.in_proj_b.q_scale",
"shape": [
16,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 26689920
},
{
"name": "model.layers.2.linear_attn.in_proj_qkv.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 26690944
},
{
"name": "model.layers.2.linear_attn.in_proj_qkv.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 29836672
},
{
"name": "model.layers.2.linear_attn.in_proj_z.q_weight",
"shape": [
2048,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 30229888
},
{
"name": "model.layers.2.linear_attn.in_proj_z.q_scale",
"shape": [
2048,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 31278464
},
{
"name": "model.layers.2.linear_attn.norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31409536
},
{
"name": "model.layers.2.linear_attn.out_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 31409792
},
{
"name": "model.layers.2.linear_attn.out_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 32458368
}
],
"md5sum": "2f15bf82023905de99ee433bc410b6cd"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 30591680,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
1024,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 0
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
1024,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 229376,
"byteOffset": 1835008
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
7168,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 2064384
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
7168,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 458752,
"byteOffset": 5734400
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 6193152
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 6195200
},
{
"name": "model.layers.20.linear_attn.A_log",
"shape": [
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 32,
"byteOffset": 6197248
},
{
"name": "model.layers.20.linear_attn.conv1d_weight",
"shape": [
6144,
1,
4
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 6197280
},
{
"name": "model.layers.20.linear_attn.dt_bias",
"shape": [
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 32,
"byteOffset": 6246432
},
{
"name": "model.layers.20.linear_attn.in_proj_a.q_weight",
"shape": [
16,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 6246464
},
{
"name": "model.layers.20.linear_attn.in_proj_a.q_scale",
"shape": [
16,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 6254656
},
{
"name": "model.layers.20.linear_attn.in_proj_b.q_weight",
"shape": [
16,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 6255680
},
{
"name": "model.layers.20.linear_attn.in_proj_b.q_scale",
"shape": [
16,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 6263872
},
{
"name": "model.layers.20.linear_attn.in_proj_qkv.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 6264896
},
{
"name": "model.layers.20.linear_attn.in_proj_qkv.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 9410624
},
{
"name": "model.layers.20.linear_attn.in_proj_z.q_weight",
"shape": [
2048,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 9803840
},
{
"name": "model.layers.20.linear_attn.in_proj_z.q_scale",
"shape": [
2048,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 10852416
},
{
"name": "model.layers.20.linear_attn.norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 10983488
},
{
"name": "model.layers.20.linear_attn.out_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 10983744
},
{
"name": "model.layers.20.linear_attn.out_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 12032320
},
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
1024,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 12163392
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
1024,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 229376,
"byteOffset": 13998400
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
7168,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 14227776
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
7168,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 458752,
"byteOffset": 17897792
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 18356544
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 18358592
},
{
"name": "model.layers.21.linear_attn.A_log",
"shape": [
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 32,
"byteOffset": 18360640
},
{
"name": "model.layers.21.linear_attn.conv1d_weight",
"shape": [
6144,
1,
4
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 18360672
},
{
"name": "model.layers.21.linear_attn.dt_bias",
"shape": [
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 32,
"byteOffset": 18409824
},
{
"name": "model.layers.21.linear_attn.in_proj_a.q_weight",
"shape": [
16,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 18409856
},
{
"name": "model.layers.21.linear_attn.in_proj_a.q_scale",
"shape": [
16,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 18418048
},
{
"name": "model.layers.21.linear_attn.in_proj_b.q_weight",
"shape": [
16,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 18419072
},
{
"name": "model.layers.21.linear_attn.in_proj_b.q_scale",
"shape": [
16,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 18427264
},
{
"name": "model.layers.21.linear_attn.in_proj_qkv.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 18428288
},
{
"name": "model.layers.21.linear_attn.in_proj_qkv.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 21574016
},
{
"name": "model.layers.21.linear_attn.in_proj_z.q_weight",
"shape": [
2048,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 21967232
},
{
"name": "model.layers.21.linear_attn.in_proj_z.q_scale",
"shape": [
2048,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 23015808
},
{
"name": "model.layers.21.linear_attn.norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 23146880
},
{
"name": "model.layers.21.linear_attn.out_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 23147136
},
{
"name": "model.layers.21.linear_attn.out_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 24195712
},
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
1024,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 24326784
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
1024,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 229376,
"byteOffset": 26161792
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
7168,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 26391168
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
7168,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 458752,
"byteOffset": 30061184
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 30519936
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 30521984
},
{
"name": "model.layers.22.linear_attn.A_log",
"shape": [
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 32,
"byteOffset": 30524032
},
{
"name": "model.layers.22.linear_attn.conv1d_weight",
"shape": [
6144,
1,
4
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 30524064
},
{
"name": "model.layers.22.linear_attn.dt_bias",
"shape": [
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 32,
"byteOffset": 30573216
},
{
"name": "model.layers.22.linear_attn.in_proj_a.q_weight",
"shape": [
16,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 30573248
},
{
"name": "model.layers.22.linear_attn.in_proj_a.q_scale",
"shape": [
16,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 30581440
},
{
"name": "model.layers.22.linear_attn.in_proj_b.q_weight",
"shape": [
16,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 30582464
},
{
"name": "model.layers.22.linear_attn.in_proj_b.q_scale",
"shape": [
16,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 30590656
}
],
"md5sum": "36b6f955709784df769c59208b708993"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 32817472,
"records": [
{
"name": "model.layers.22.linear_attn.in_proj_qkv.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.22.linear_attn.in_proj_qkv.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 3145728
},
{
"name": "model.layers.22.linear_attn.in_proj_z.q_weight",
"shape": [
2048,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 3538944
},
{
"name": "model.layers.22.linear_attn.in_proj_z.q_scale",
"shape": [
2048,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 4587520
},
{
"name": "model.layers.22.linear_attn.norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 4718592
},
{
"name": "model.layers.22.linear_attn.out_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 4718848
},
{
"name": "model.layers.22.linear_attn.out_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 5767424
},
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
1024,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 5898496
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
1024,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 229376,
"byteOffset": 7733504
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
7168,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 7962880
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
7168,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 458752,
"byteOffset": 11632896
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 12091648
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 12093696
},
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
1024,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 12095744
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
1024,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 229376,
"byteOffset": 13930752
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
7168,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 14160128
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
7168,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 458752,
"byteOffset": 17830144
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 18288896
},
{
"name": "model.layers.23.self_attn.k_norm.weight",
"shape": [
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 512,
"byteOffset": 18290944
},
{
"name": "model.layers.23.self_attn.c_attn.q_weight",
"shape": [
5120,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 18291456
},
{
"name": "model.layers.23.self_attn.c_attn.q_scale",
"shape": [
5120,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 20912896
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 21240576
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 22289152
},
{
"name": "model.layers.23.self_attn.q_norm.weight",
"shape": [
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 512,
"byteOffset": 22420224
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 22420736
},
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
1024,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22422784
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
1024,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 229376,
"byteOffset": 24257792
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
7168,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 24487168
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
7168,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 458752,
"byteOffset": 28157184
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 28615936
},
{
"name": "model.layers.3.self_attn.k_norm.weight",
"shape": [
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 512,
"byteOffset": 28617984
},
{
"name": "model.layers.3.self_attn.c_attn.q_weight",
"shape": [
5120,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 28618496
},
{
"name": "model.layers.3.self_attn.c_attn.q_scale",
"shape": [
5120,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 31239936
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 31567616
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 32616192
},
{
"name": "model.layers.3.self_attn.q_norm.weight",
"shape": [
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 512,
"byteOffset": 32747264
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 32747776
},
{
"name": "model.layers.4.linear_attn.A_log",
"shape": [
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 32,
"byteOffset": 32749824
},
{
"name": "model.layers.4.linear_attn.conv1d_weight",
"shape": [
6144,
1,
4
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 32749856
},
{
"name": "model.layers.4.linear_attn.dt_bias",
"shape": [
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 32,
"byteOffset": 32799008
},
{
"name": "model.layers.4.linear_attn.in_proj_a.q_weight",
"shape": [
16,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32799040
},
{
"name": "model.layers.4.linear_attn.in_proj_a.q_scale",
"shape": [
16,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 32807232
},
{
"name": "model.layers.4.linear_attn.in_proj_b.q_weight",
"shape": [
16,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32808256
},
{
"name": "model.layers.4.linear_attn.in_proj_b.q_scale",
"shape": [
16,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 32816448
}
],
"md5sum": "e5c3bb181a1b2410f00104e82a97095b"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 32289664,
"records": [
{
"name": "model.layers.4.linear_attn.in_proj_qkv.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.4.linear_attn.in_proj_qkv.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 3145728
},
{
"name": "model.layers.4.linear_attn.in_proj_z.q_weight",
"shape": [
2048,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 3538944
},
{
"name": "model.layers.4.linear_attn.in_proj_z.q_scale",
"shape": [
2048,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 4587520
},
{
"name": "model.layers.4.linear_attn.norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 4718592
},
{
"name": "model.layers.4.linear_attn.out_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 4718848
},
{
"name": "model.layers.4.linear_attn.out_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 5767424
},
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
1024,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 5898496
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
1024,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 229376,
"byteOffset": 7733504
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
7168,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 7962880
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
7168,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 458752,
"byteOffset": 11632896
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 12091648
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 12093696
},
{
"name": "model.layers.5.linear_attn.A_log",
"shape": [
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 32,
"byteOffset": 12095744
},
{
"name": "model.layers.5.linear_attn.conv1d_weight",
"shape": [
6144,
1,
4
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 12095776
},
{
"name": "model.layers.5.linear_attn.dt_bias",
"shape": [
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 32,
"byteOffset": 12144928
},
{
"name": "model.layers.5.linear_attn.in_proj_a.q_weight",
"shape": [
16,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 12144960
},
{
"name": "model.layers.5.linear_attn.in_proj_a.q_scale",
"shape": [
16,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 12153152
},
{
"name": "model.layers.5.linear_attn.in_proj_b.q_weight",
"shape": [
16,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 12154176
},
{
"name": "model.layers.5.linear_attn.in_proj_b.q_scale",
"shape": [
16,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 12162368
},
{
"name": "model.layers.5.linear_attn.in_proj_qkv.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 12163392
},
{
"name": "model.layers.5.linear_attn.in_proj_qkv.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 15309120
},
{
"name": "model.layers.5.linear_attn.in_proj_z.q_weight",
"shape": [
2048,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15702336
},
{
"name": "model.layers.5.linear_attn.in_proj_z.q_scale",
"shape": [
2048,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16750912
},
{
"name": "model.layers.5.linear_attn.norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 16881984
},
{
"name": "model.layers.5.linear_attn.out_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 16882240
},
{
"name": "model.layers.5.linear_attn.out_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 17930816
},
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
1024,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 18061888
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
1024,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 229376,
"byteOffset": 19896896
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
7168,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 20126272
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
7168,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 458752,
"byteOffset": 23796288
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 24255040
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 24257088
},
{
"name": "model.layers.6.linear_attn.A_log",
"shape": [
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 32,
"byteOffset": 24259136
},
{
"name": "model.layers.6.linear_attn.conv1d_weight",
"shape": [
6144,
1,
4
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 24259168
},
{
"name": "model.layers.6.linear_attn.dt_bias",
"shape": [
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 32,
"byteOffset": 24308320
},
{
"name": "model.layers.6.linear_attn.in_proj_a.q_weight",
"shape": [
16,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24308352
},
{
"name": "model.layers.6.linear_attn.in_proj_a.q_scale",
"shape": [
16,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 24316544
},
{
"name": "model.layers.6.linear_attn.in_proj_b.q_weight",
"shape": [
16,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24317568
},
{
"name": "model.layers.6.linear_attn.in_proj_b.q_scale",
"shape": [
16,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 24325760
},
{
"name": "model.layers.6.linear_attn.in_proj_qkv.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 24326784
},
{
"name": "model.layers.6.linear_attn.in_proj_qkv.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27472512
},
{
"name": "model.layers.6.linear_attn.in_proj_z.q_weight",
"shape": [
2048,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 27865728
},
{
"name": "model.layers.6.linear_attn.in_proj_z.q_scale",
"shape": [
2048,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 28914304
},
{
"name": "model.layers.6.linear_attn.norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 29045376
},
{
"name": "model.layers.6.linear_attn.out_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 29045632
},
{
"name": "model.layers.6.linear_attn.out_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 30094208
},
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
1024,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 30225280
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
1024,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 229376,
"byteOffset": 32060288
}
],
"md5sum": "9062a4946203bfdf30f6a07a1cc91692"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 32589440,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
7168,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
7168,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 458752,
"byteOffset": 3670016
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 4128768
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 4130816
},
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
1024,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 4132864
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
1024,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 229376,
"byteOffset": 5967872
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
7168,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 6197248
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
7168,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 458752,
"byteOffset": 9867264
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 10326016
},
{
"name": "model.layers.7.self_attn.k_norm.weight",
"shape": [
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 512,
"byteOffset": 10328064
},
{
"name": "model.layers.7.self_attn.c_attn.q_weight",
"shape": [
5120,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 10328576
},
{
"name": "model.layers.7.self_attn.c_attn.q_scale",
"shape": [
5120,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 12950016
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 13277696
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 14326272
},
{
"name": "model.layers.7.self_attn.q_norm.weight",
"shape": [
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 512,
"byteOffset": 14457344
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 14457856
},
{
"name": "model.layers.8.linear_attn.A_log",
"shape": [
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 32,
"byteOffset": 14459904
},
{
"name": "model.layers.8.linear_attn.conv1d_weight",
"shape": [
6144,
1,
4
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 14459936
},
{
"name": "model.layers.8.linear_attn.dt_bias",
"shape": [
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 32,
"byteOffset": 14509088
},
{
"name": "model.layers.8.linear_attn.in_proj_a.q_weight",
"shape": [
16,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 14509120
},
{
"name": "model.layers.8.linear_attn.in_proj_a.q_scale",
"shape": [
16,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 14517312
},
{
"name": "model.layers.8.linear_attn.in_proj_b.q_weight",
"shape": [
16,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 14518336
},
{
"name": "model.layers.8.linear_attn.in_proj_b.q_scale",
"shape": [
16,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 14526528
},
{
"name": "model.layers.8.linear_attn.in_proj_qkv.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14527552
},
{
"name": "model.layers.8.linear_attn.in_proj_qkv.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 17673280
},
{
"name": "model.layers.8.linear_attn.in_proj_z.q_weight",
"shape": [
2048,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 18066496
},
{
"name": "model.layers.8.linear_attn.in_proj_z.q_scale",
"shape": [
2048,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 19115072
},
{
"name": "model.layers.8.linear_attn.norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 19246144
},
{
"name": "model.layers.8.linear_attn.out_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 19246400
},
{
"name": "model.layers.8.linear_attn.out_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 20294976
},
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
1024,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 20426048
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
1024,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 229376,
"byteOffset": 22261056
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
7168,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 22490432
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
7168,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 458752,
"byteOffset": 26160448
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 26619200
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 26621248
},
{
"name": "model.layers.9.linear_attn.A_log",
"shape": [
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 32,
"byteOffset": 26623296
},
{
"name": "model.layers.9.linear_attn.conv1d_weight",
"shape": [
6144,
1,
4
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 26623328
},
{
"name": "model.layers.9.linear_attn.dt_bias",
"shape": [
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 32,
"byteOffset": 26672480
},
{
"name": "model.layers.9.linear_attn.in_proj_a.q_weight",
"shape": [
16,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 26672512
},
{
"name": "model.layers.9.linear_attn.in_proj_a.q_scale",
"shape": [
16,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 26680704
},
{
"name": "model.layers.9.linear_attn.in_proj_b.q_weight",
"shape": [
16,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 26681728
},
{
"name": "model.layers.9.linear_attn.in_proj_b.q_scale",
"shape": [
16,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 26689920
},
{
"name": "model.layers.9.linear_attn.in_proj_qkv.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 26690944
},
{
"name": "model.layers.9.linear_attn.in_proj_qkv.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 29836672
},
{
"name": "model.layers.9.linear_attn.in_proj_z.q_weight",
"shape": [
2048,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 30229888
},
{
"name": "model.layers.9.linear_attn.in_proj_z.q_scale",
"shape": [
2048,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 31278464
},
{
"name": "model.layers.9.linear_attn.norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31409536
},
{
"name": "model.layers.9.linear_attn.out_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 31409792
},
{
"name": "model.layers.9.linear_attn.out_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 32458368
}
],
"md5sum": "3ce64a1408d65588ea658122f93b94fe"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 6197248,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
1024,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 0
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
1024,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 229376,
"byteOffset": 1835008
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
7168,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 2064384
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
7168,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 458752,
"byteOffset": 5734400
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 6193152
},
{
"name": "model.norm.weight",
"shape": [
1024
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 6195200
}
],
"md5sum": "d619f57e9a9104b6683591eb1e5826bf"
}
]
}