LJS_personalized-q4f16_0-MLC / ndarray-cache.json
JoonKimLang's picture
m
fee6451
{
"metadata": {
"ParamSize": 325,
"ParamBytes": 2149644288.0,
"BitsPerParam": 4.500600961055312
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 49250304,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
384,
32064
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 49250304,
"byteOffset": 0
}
],
"md5sum": "15573574494a1c2a922401367f6e7f3b"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 49250304,
"records": [
{
"name": "transformer.embd.q_weight",
"shape": [
32064,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 49250304,
"byteOffset": 0
}
],
"md5sum": "e0811e2336da65b98ffc27b4f7a39c39"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.0.mlp.gate_up_proj.q_weight",
"shape": [
384,
16384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "57cce677d212b33b86002a26f0290b9a"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 29626368,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
96,
32064
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6156288,
"byteOffset": 0
},
{
"name": "transformer.embd.q_scale",
"shape": [
32064,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6156288,
"byteOffset": 6156288
},
{
"name": "transformer.h.0.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 12312576
},
{
"name": "transformer.h.0.mlp.down_proj.q_weight",
"shape": [
1024,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 12318720
},
{
"name": "transformer.h.0.mlp.down_proj.q_scale",
"shape": [
256,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 24901632
},
{
"name": "transformer.h.0.mlp.gate_up_proj.q_scale",
"shape": [
96,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 26474496
},
{
"name": "transformer.h.0.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 29620224
}
],
"md5sum": "c7945d73cf6510708e40d9bd270fafe2"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "transformer.h.0.mixer.out_proj.q_weight",
"shape": [
384,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.0.mixer.out_proj.q_scale",
"shape": [
96,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 4718592
},
{
"name": "transformer.h.0.mixer.qkv_proj.q_weight",
"shape": [
384,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 5308416
},
{
"name": "transformer.h.0.mixer.qkv_proj.q_scale",
"shape": [
96,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 19464192
},
{
"name": "transformer.h.1.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "0973a8ea7d5f830946eeefac179c32c2"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.1.mlp.gate_up_proj.q_weight",
"shape": [
384,
16384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "6d8a9abf39555dd35b415d9d4d7e7101"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.1.mlp.down_proj.q_weight",
"shape": [
1024,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.1.mlp.down_proj.q_scale",
"shape": [
256,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.1.mlp.gate_up_proj.q_scale",
"shape": [
96,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.1.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.1.mixer.out_proj.q_weight",
"shape": [
384,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.1.mixer.out_proj.q_scale",
"shape": [
96,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "ad5c85ffaca9d0d19662b4cac8ca5a07"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "transformer.h.1.mixer.qkv_proj.q_weight",
"shape": [
384,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.1.mixer.qkv_proj.q_scale",
"shape": [
96,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.h.10.mixer.out_proj.q_weight",
"shape": [
384,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 15925248
},
{
"name": "transformer.h.10.mixer.out_proj.q_scale",
"shape": [
96,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 20643840
},
{
"name": "transformer.h.2.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "b4e6cb3260e02980fd9a419a1efce9be"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.2.mlp.gate_up_proj.q_weight",
"shape": [
384,
16384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "7ce305a61dda26f7f2998fcb6fc97fa9"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.2.mlp.down_proj.q_weight",
"shape": [
1024,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.2.mlp.down_proj.q_scale",
"shape": [
256,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.2.mlp.gate_up_proj.q_scale",
"shape": [
96,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.2.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.2.mixer.out_proj.q_weight",
"shape": [
384,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.2.mixer.out_proj.q_scale",
"shape": [
96,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "f731e2da5d3fa94b814aecec32633eb7"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.3.mlp.gate_up_proj.q_weight",
"shape": [
384,
16384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "e7cb25234d45f026724fb3e2e5af1e95"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.2.mixer.qkv_proj.q_weight",
"shape": [
384,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.2.mixer.qkv_proj.q_scale",
"shape": [
96,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.h.3.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15925248
},
{
"name": "transformer.h.3.mlp.down_proj.q_weight",
"shape": [
1024,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 15931392
},
{
"name": "transformer.h.3.mlp.down_proj.q_scale",
"shape": [
256,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 28514304
},
{
"name": "transformer.h.3.mlp.gate_up_proj.q_scale",
"shape": [
96,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 30087168
},
{
"name": "transformer.h.3.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "16ac4f10e573817faac0358da57da368"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "transformer.h.3.mixer.out_proj.q_weight",
"shape": [
384,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.3.mixer.out_proj.q_scale",
"shape": [
96,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 4718592
},
{
"name": "transformer.h.3.mixer.qkv_proj.q_weight",
"shape": [
384,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 5308416
},
{
"name": "transformer.h.3.mixer.qkv_proj.q_scale",
"shape": [
96,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 19464192
},
{
"name": "transformer.h.4.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "39337c844d6c74d95d95ee550dd16eba"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.4.mlp.gate_up_proj.q_weight",
"shape": [
384,
16384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "22176cdd0d0a787148d29824bac9beba"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.4.mlp.down_proj.q_weight",
"shape": [
1024,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.4.mlp.down_proj.q_scale",
"shape": [
256,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.4.mlp.gate_up_proj.q_scale",
"shape": [
96,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.4.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.4.mixer.out_proj.q_weight",
"shape": [
384,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.4.mixer.out_proj.q_scale",
"shape": [
96,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "66b53469588bb5fba4e9f7f04975d195"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.5.mlp.gate_up_proj.q_weight",
"shape": [
384,
16384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "b9a886819c9351787d018daf594cfbfb"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.4.mixer.qkv_proj.q_weight",
"shape": [
384,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.4.mixer.qkv_proj.q_scale",
"shape": [
96,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.h.5.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15925248
},
{
"name": "transformer.h.5.mlp.down_proj.q_weight",
"shape": [
1024,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 15931392
},
{
"name": "transformer.h.5.mlp.down_proj.q_scale",
"shape": [
256,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 28514304
},
{
"name": "transformer.h.5.mlp.gate_up_proj.q_scale",
"shape": [
96,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 30087168
},
{
"name": "transformer.h.5.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "d1293de91162eb14bdc56f3cb1b154d6"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "transformer.h.5.mixer.out_proj.q_weight",
"shape": [
384,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.5.mixer.out_proj.q_scale",
"shape": [
96,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 4718592
},
{
"name": "transformer.h.5.mixer.qkv_proj.q_weight",
"shape": [
384,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 5308416
},
{
"name": "transformer.h.5.mixer.qkv_proj.q_scale",
"shape": [
96,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 19464192
},
{
"name": "transformer.h.6.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "03fb6728a789d04c832a62153700a0fa"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.6.mlp.gate_up_proj.q_weight",
"shape": [
384,
16384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "d569473b4cb895c97c983bc6689e2005"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.6.mlp.down_proj.q_weight",
"shape": [
1024,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.6.mlp.down_proj.q_scale",
"shape": [
256,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.6.mlp.gate_up_proj.q_scale",
"shape": [
96,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.6.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.6.mixer.out_proj.q_weight",
"shape": [
384,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.6.mixer.out_proj.q_scale",
"shape": [
96,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "259697ad495fe0450b17a2fa433d4e20"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.7.mlp.gate_up_proj.q_weight",
"shape": [
384,
16384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "f92443ab0121ad3d052d29c4627ae773"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.6.mixer.qkv_proj.q_weight",
"shape": [
384,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.6.mixer.qkv_proj.q_scale",
"shape": [
96,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.h.7.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15925248
},
{
"name": "transformer.h.7.mlp.down_proj.q_weight",
"shape": [
1024,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 15931392
},
{
"name": "transformer.h.7.mlp.down_proj.q_scale",
"shape": [
256,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 28514304
},
{
"name": "transformer.h.7.mlp.gate_up_proj.q_scale",
"shape": [
96,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 30087168
},
{
"name": "transformer.h.7.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "41d8eba87badfa6068d0bc21bcd91283"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "transformer.h.7.mixer.out_proj.q_weight",
"shape": [
384,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.7.mixer.out_proj.q_scale",
"shape": [
96,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 4718592
},
{
"name": "transformer.h.7.mixer.qkv_proj.q_weight",
"shape": [
384,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 5308416
},
{
"name": "transformer.h.7.mixer.qkv_proj.q_scale",
"shape": [
96,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 19464192
},
{
"name": "transformer.h.8.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "93d602e1ab68537c64b6d22b99cc0a16"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.8.mlp.gate_up_proj.q_weight",
"shape": [
384,
16384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "9e5923d87038b2d83c2a0b713d5d0fad"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.8.mlp.down_proj.q_weight",
"shape": [
1024,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.8.mlp.down_proj.q_scale",
"shape": [
256,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.8.mlp.gate_up_proj.q_scale",
"shape": [
96,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.8.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.8.mixer.out_proj.q_weight",
"shape": [
384,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.8.mixer.out_proj.q_scale",
"shape": [
96,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "b2cd3de4a61dca4d61b4087ff646d397"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.9.mlp.gate_up_proj.q_weight",
"shape": [
384,
16384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "778b37b70f7d2ddce8fc9c2f6161487e"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.8.mixer.qkv_proj.q_weight",
"shape": [
384,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.8.mixer.qkv_proj.q_scale",
"shape": [
96,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.h.9.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15925248
},
{
"name": "transformer.h.9.mlp.down_proj.q_weight",
"shape": [
1024,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 15931392
},
{
"name": "transformer.h.9.mlp.down_proj.q_scale",
"shape": [
256,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 28514304
},
{
"name": "transformer.h.9.mlp.gate_up_proj.q_scale",
"shape": [
96,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 30087168
},
{
"name": "transformer.h.9.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "5c26e35f7a5d61e7b2032757ca061bc2"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "transformer.h.9.mixer.out_proj.q_weight",
"shape": [
384,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.9.mixer.out_proj.q_scale",
"shape": [
96,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 4718592
},
{
"name": "transformer.h.9.mixer.qkv_proj.q_weight",
"shape": [
384,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 5308416
},
{
"name": "transformer.h.9.mixer.qkv_proj.q_scale",
"shape": [
96,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 19464192
},
{
"name": "transformer.h.10.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "810074db81be7c76cbc305125964349e"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.10.mlp.gate_up_proj.q_weight",
"shape": [
384,
16384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "f2654d0ac6b5dfeee31e7e3baf20f3a8"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.10.mlp.down_proj.q_weight",
"shape": [
1024,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.10.mlp.down_proj.q_scale",
"shape": [
256,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.10.mlp.gate_up_proj.q_scale",
"shape": [
96,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.10.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.10.mixer.qkv_proj.q_weight",
"shape": [
384,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 17307648
},
{
"name": "transformer.h.10.mixer.qkv_proj.q_scale",
"shape": [
96,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 31463424
},
{
"name": "transformer.h.11.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "32cd966e5f218b8d68a20041c29fb6ea"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.11.mlp.gate_up_proj.q_weight",
"shape": [
384,
16384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "50bb2232262acc5ffd10a3e15527c1c2"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.11.mlp.down_proj.q_weight",
"shape": [
1024,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.11.mlp.down_proj.q_scale",
"shape": [
256,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.11.mlp.gate_up_proj.q_scale",
"shape": [
96,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.11.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.11.mixer.out_proj.q_weight",
"shape": [
384,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.11.mixer.out_proj.q_scale",
"shape": [
96,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "d0570c8ed2877ba18be0c349c0040b3c"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.12.mlp.gate_up_proj.q_weight",
"shape": [
384,
16384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "037148981118ea0af09c13713ef9a6c6"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.11.mixer.qkv_proj.q_weight",
"shape": [
384,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.11.mixer.qkv_proj.q_scale",
"shape": [
96,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.h.12.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15925248
},
{
"name": "transformer.h.12.mlp.down_proj.q_weight",
"shape": [
1024,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 15931392
},
{
"name": "transformer.h.12.mlp.down_proj.q_scale",
"shape": [
256,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 28514304
},
{
"name": "transformer.h.12.mlp.gate_up_proj.q_scale",
"shape": [
96,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 30087168
},
{
"name": "transformer.h.12.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "8f36ace95203b8f0f2c909d555828401"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "transformer.h.12.mixer.out_proj.q_weight",
"shape": [
384,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.12.mixer.out_proj.q_scale",
"shape": [
96,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 4718592
},
{
"name": "transformer.h.12.mixer.qkv_proj.q_weight",
"shape": [
384,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 5308416
},
{
"name": "transformer.h.12.mixer.qkv_proj.q_scale",
"shape": [
96,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 19464192
},
{
"name": "transformer.h.13.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "da870c7e39f363bf7db506fc30dad7ac"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.13.mlp.gate_up_proj.q_weight",
"shape": [
384,
16384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "606143a7382a22d851a119fecb78e13a"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.13.mlp.down_proj.q_weight",
"shape": [
1024,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.13.mlp.down_proj.q_scale",
"shape": [
256,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.13.mlp.gate_up_proj.q_scale",
"shape": [
96,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.13.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.13.mixer.out_proj.q_weight",
"shape": [
384,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.13.mixer.out_proj.q_scale",
"shape": [
96,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "c9ffba635aa2250f444a4bf49a68ba29"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.14.mlp.gate_up_proj.q_weight",
"shape": [
384,
16384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "b19fa817d1ca04693bcea95e1c826d60"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.13.mixer.qkv_proj.q_weight",
"shape": [
384,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.13.mixer.qkv_proj.q_scale",
"shape": [
96,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.h.14.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15925248
},
{
"name": "transformer.h.14.mlp.down_proj.q_weight",
"shape": [
1024,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 15931392
},
{
"name": "transformer.h.14.mlp.down_proj.q_scale",
"shape": [
256,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 28514304
},
{
"name": "transformer.h.14.mlp.gate_up_proj.q_scale",
"shape": [
96,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 30087168
},
{
"name": "transformer.h.14.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "8df125dd76e1250ab80e29a1157542e0"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "transformer.h.14.mixer.out_proj.q_weight",
"shape": [
384,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.14.mixer.out_proj.q_scale",
"shape": [
96,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 4718592
},
{
"name": "transformer.h.14.mixer.qkv_proj.q_weight",
"shape": [
384,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 5308416
},
{
"name": "transformer.h.14.mixer.qkv_proj.q_scale",
"shape": [
96,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 19464192
},
{
"name": "transformer.h.15.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "e395bf392a91418df0471920fa6c16ce"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.15.mlp.gate_up_proj.q_weight",
"shape": [
384,
16384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "1d5166ea8d4c980891830d2e32669ed5"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.15.mlp.down_proj.q_weight",
"shape": [
1024,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.15.mlp.down_proj.q_scale",
"shape": [
256,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.15.mlp.gate_up_proj.q_scale",
"shape": [
96,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.15.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.15.mixer.out_proj.q_weight",
"shape": [
384,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.15.mixer.out_proj.q_scale",
"shape": [
96,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "7bb3d6e2c2b6e044139d1092cc0a314d"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.16.mlp.gate_up_proj.q_weight",
"shape": [
384,
16384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "19b9258916fda726e3d4bce7a1bcd9d7"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.15.mixer.qkv_proj.q_weight",
"shape": [
384,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.15.mixer.qkv_proj.q_scale",
"shape": [
96,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.h.16.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15925248
},
{
"name": "transformer.h.16.mlp.down_proj.q_weight",
"shape": [
1024,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 15931392
},
{
"name": "transformer.h.16.mlp.down_proj.q_scale",
"shape": [
256,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 28514304
},
{
"name": "transformer.h.16.mlp.gate_up_proj.q_scale",
"shape": [
96,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 30087168
},
{
"name": "transformer.h.16.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "c09e12b4bb167451d6cbcd724dafe4bc"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "transformer.h.16.mixer.out_proj.q_weight",
"shape": [
384,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.16.mixer.out_proj.q_scale",
"shape": [
96,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 4718592
},
{
"name": "transformer.h.16.mixer.qkv_proj.q_weight",
"shape": [
384,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 5308416
},
{
"name": "transformer.h.16.mixer.qkv_proj.q_scale",
"shape": [
96,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 19464192
},
{
"name": "transformer.h.17.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "ef184de8023d63d4bf2439594989a206"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.17.mlp.gate_up_proj.q_weight",
"shape": [
384,
16384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "8335ed38e0c95d452dca3359b2552697"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.17.mlp.down_proj.q_weight",
"shape": [
1024,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.17.mlp.down_proj.q_scale",
"shape": [
256,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.17.mlp.gate_up_proj.q_scale",
"shape": [
96,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.17.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.17.mixer.out_proj.q_weight",
"shape": [
384,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.17.mixer.out_proj.q_scale",
"shape": [
96,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "a2b0c8ef4197aa85a49c3ef5e76c2e3e"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.18.mlp.gate_up_proj.q_weight",
"shape": [
384,
16384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "f7a8cdc82546dd050feb8fc2a1c1f0ae"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.17.mixer.qkv_proj.q_weight",
"shape": [
384,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.17.mixer.qkv_proj.q_scale",
"shape": [
96,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.h.18.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15925248
},
{
"name": "transformer.h.18.mlp.down_proj.q_weight",
"shape": [
1024,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 15931392
},
{
"name": "transformer.h.18.mlp.down_proj.q_scale",
"shape": [
256,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 28514304
},
{
"name": "transformer.h.18.mlp.gate_up_proj.q_scale",
"shape": [
96,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 30087168
},
{
"name": "transformer.h.18.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "cadb90ccceaac98b8afbc74a34d551ac"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "transformer.h.18.mixer.out_proj.q_weight",
"shape": [
384,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.18.mixer.out_proj.q_scale",
"shape": [
96,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 4718592
},
{
"name": "transformer.h.18.mixer.qkv_proj.q_weight",
"shape": [
384,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 5308416
},
{
"name": "transformer.h.18.mixer.qkv_proj.q_scale",
"shape": [
96,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 19464192
},
{
"name": "transformer.h.19.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "fdd3ee02c80722a9621f30fc19710099"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.19.mlp.gate_up_proj.q_weight",
"shape": [
384,
16384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "67ccfdf8554ddeefe7c1ccba24ff3bd9"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.19.mlp.down_proj.q_weight",
"shape": [
1024,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.19.mlp.down_proj.q_scale",
"shape": [
256,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.19.mlp.gate_up_proj.q_scale",
"shape": [
96,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.19.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.19.mixer.out_proj.q_weight",
"shape": [
384,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.19.mixer.out_proj.q_scale",
"shape": [
96,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "89fb0a1117b0bb1f237ef366639288d5"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.20.mlp.gate_up_proj.q_weight",
"shape": [
384,
16384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "2e7902c3c7ccc11280982bf79a2b98f2"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.19.mixer.qkv_proj.q_weight",
"shape": [
384,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.19.mixer.qkv_proj.q_scale",
"shape": [
96,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.h.20.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15925248
},
{
"name": "transformer.h.20.mlp.down_proj.q_weight",
"shape": [
1024,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 15931392
},
{
"name": "transformer.h.20.mlp.down_proj.q_scale",
"shape": [
256,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 28514304
},
{
"name": "transformer.h.20.mlp.gate_up_proj.q_scale",
"shape": [
96,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 30087168
},
{
"name": "transformer.h.20.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "55560d55c6b59f6cca31ff3bd8c8d30a"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 26548224,
"records": [
{
"name": "transformer.h.20.mixer.out_proj.q_weight",
"shape": [
384,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.20.mixer.out_proj.q_scale",
"shape": [
96,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 4718592
},
{
"name": "transformer.h.20.mixer.qkv_proj.q_weight",
"shape": [
384,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 5308416
},
{
"name": "transformer.h.20.mixer.qkv_proj.q_scale",
"shape": [
96,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 19464192
},
{
"name": "transformer.h.21.mixer.out_proj.q_weight",
"shape": [
384,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 21233664
},
{
"name": "transformer.h.21.mixer.out_proj.q_scale",
"shape": [
96,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 25952256
},
{
"name": "transformer.h.21.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 26542080
}
],
"md5sum": "46c100046bb09cff36b8d7a256c6d0ab"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.21.mlp.gate_up_proj.q_weight",
"shape": [
384,
16384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "8816fbc05ee9a74f5892c265bd3dc2c4"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.21.mlp.down_proj.q_weight",
"shape": [
1024,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.21.mlp.down_proj.q_scale",
"shape": [
256,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.21.mlp.gate_up_proj.q_scale",
"shape": [
96,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.21.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.21.mixer.qkv_proj.q_weight",
"shape": [
384,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 17307648
},
{
"name": "transformer.h.21.mixer.qkv_proj.q_scale",
"shape": [
96,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 31463424
},
{
"name": "transformer.h.22.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "bcbc3352e09e474ccca9d86f33ac29f3"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.22.mlp.gate_up_proj.q_weight",
"shape": [
384,
16384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "8d702eacf26050fb080bc5cd14e19d57"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.22.mlp.down_proj.q_weight",
"shape": [
1024,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.22.mlp.down_proj.q_scale",
"shape": [
256,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.22.mlp.gate_up_proj.q_scale",
"shape": [
96,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.22.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.22.mixer.out_proj.q_weight",
"shape": [
384,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.22.mixer.out_proj.q_scale",
"shape": [
96,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "fa20a67df7969bc8dad3f21654c0752c"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.23.mlp.gate_up_proj.q_weight",
"shape": [
384,
16384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "3e07a8344b4b92175ecd0bc4bab1aec3"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.22.mixer.qkv_proj.q_weight",
"shape": [
384,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.22.mixer.qkv_proj.q_scale",
"shape": [
96,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.h.23.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15925248
},
{
"name": "transformer.h.23.mlp.down_proj.q_weight",
"shape": [
1024,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 15931392
},
{
"name": "transformer.h.23.mlp.down_proj.q_scale",
"shape": [
256,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 28514304
},
{
"name": "transformer.h.23.mlp.gate_up_proj.q_scale",
"shape": [
96,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 30087168
},
{
"name": "transformer.h.23.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "375c44afc0c4ebe5836349e8d8ac8a8f"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "transformer.h.23.mixer.out_proj.q_weight",
"shape": [
384,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.23.mixer.out_proj.q_scale",
"shape": [
96,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 4718592
},
{
"name": "transformer.h.23.mixer.qkv_proj.q_weight",
"shape": [
384,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 5308416
},
{
"name": "transformer.h.23.mixer.qkv_proj.q_scale",
"shape": [
96,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 19464192
},
{
"name": "transformer.h.24.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "68b02c9a4c80b038e183fe307085895a"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.24.mlp.gate_up_proj.q_weight",
"shape": [
384,
16384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "8276fc82581b00b6b6407e1b0b0dec78"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.24.mlp.down_proj.q_weight",
"shape": [
1024,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.24.mlp.down_proj.q_scale",
"shape": [
256,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.24.mlp.gate_up_proj.q_scale",
"shape": [
96,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.24.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.24.mixer.out_proj.q_weight",
"shape": [
384,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.24.mixer.out_proj.q_scale",
"shape": [
96,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "50f295bc5b7dc74eb65a7dfcfdb5d567"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.25.mlp.gate_up_proj.q_weight",
"shape": [
384,
16384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "c6beb8450f10c815f8d8fb341868aafa"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.24.mixer.qkv_proj.q_weight",
"shape": [
384,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.24.mixer.qkv_proj.q_scale",
"shape": [
96,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.h.25.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15925248
},
{
"name": "transformer.h.25.mlp.down_proj.q_weight",
"shape": [
1024,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 15931392
},
{
"name": "transformer.h.25.mlp.down_proj.q_scale",
"shape": [
256,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 28514304
},
{
"name": "transformer.h.25.mlp.gate_up_proj.q_scale",
"shape": [
96,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 30087168
},
{
"name": "transformer.h.25.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "eea14bd61b23509a9681a8aea9d0f88f"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "transformer.h.25.mixer.out_proj.q_weight",
"shape": [
384,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.25.mixer.out_proj.q_scale",
"shape": [
96,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 4718592
},
{
"name": "transformer.h.25.mixer.qkv_proj.q_weight",
"shape": [
384,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 5308416
},
{
"name": "transformer.h.25.mixer.qkv_proj.q_scale",
"shape": [
96,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 19464192
},
{
"name": "transformer.h.26.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "e2d9c3c554f18832a5f68986e1226fa2"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.26.mlp.gate_up_proj.q_weight",
"shape": [
384,
16384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "f43c540d5081e9b723c65609c6da35f9"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.26.mlp.down_proj.q_weight",
"shape": [
1024,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.26.mlp.down_proj.q_scale",
"shape": [
256,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.26.mlp.gate_up_proj.q_scale",
"shape": [
96,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.26.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.26.mixer.out_proj.q_weight",
"shape": [
384,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.26.mixer.out_proj.q_scale",
"shape": [
96,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "2159b9922b85d2bebb4d2b412e099201"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.27.mlp.gate_up_proj.q_weight",
"shape": [
384,
16384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "a43e60b6b9d8f2fd040d8ef99b8d64db"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.26.mixer.qkv_proj.q_weight",
"shape": [
384,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.26.mixer.qkv_proj.q_scale",
"shape": [
96,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.h.27.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15925248
},
{
"name": "transformer.h.27.mlp.down_proj.q_weight",
"shape": [
1024,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 15931392
},
{
"name": "transformer.h.27.mlp.down_proj.q_scale",
"shape": [
256,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 28514304
},
{
"name": "transformer.h.27.mlp.gate_up_proj.q_scale",
"shape": [
96,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 30087168
},
{
"name": "transformer.h.27.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "6a3c6248a18aa152c7bb25619ea6768e"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "transformer.h.27.mixer.out_proj.q_weight",
"shape": [
384,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.27.mixer.out_proj.q_scale",
"shape": [
96,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 4718592
},
{
"name": "transformer.h.27.mixer.qkv_proj.q_weight",
"shape": [
384,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 5308416
},
{
"name": "transformer.h.27.mixer.qkv_proj.q_scale",
"shape": [
96,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 19464192
},
{
"name": "transformer.h.28.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "d173e8c7ccda6dc7954eedd9ed32e710"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.28.mlp.gate_up_proj.q_weight",
"shape": [
384,
16384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "e4ddbedb9da323ef1ca6bad4418dba84"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.28.mlp.down_proj.q_weight",
"shape": [
1024,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.28.mlp.down_proj.q_scale",
"shape": [
256,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.28.mlp.gate_up_proj.q_scale",
"shape": [
96,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.28.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.28.mixer.out_proj.q_weight",
"shape": [
384,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.28.mixer.out_proj.q_scale",
"shape": [
96,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "e2267ec7de4f7e6448e700b69afe2ac0"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.29.mlp.gate_up_proj.q_weight",
"shape": [
384,
16384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "4731380fc4e6fa2cd05780f97c37e196"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.28.mixer.qkv_proj.q_weight",
"shape": [
384,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.28.mixer.qkv_proj.q_scale",
"shape": [
96,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.h.29.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15925248
},
{
"name": "transformer.h.29.mlp.down_proj.q_weight",
"shape": [
1024,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 15931392
},
{
"name": "transformer.h.29.mlp.down_proj.q_scale",
"shape": [
256,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 28514304
},
{
"name": "transformer.h.29.mlp.gate_up_proj.q_scale",
"shape": [
96,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 30087168
},
{
"name": "transformer.h.29.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "6fbefb8f267a24d2dbbe3692ac8938df"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "transformer.h.29.mixer.out_proj.q_weight",
"shape": [
384,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.29.mixer.out_proj.q_scale",
"shape": [
96,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 4718592
},
{
"name": "transformer.h.29.mixer.qkv_proj.q_weight",
"shape": [
384,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 5308416
},
{
"name": "transformer.h.29.mixer.qkv_proj.q_scale",
"shape": [
96,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 19464192
},
{
"name": "transformer.h.30.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "20326e21f01c9acec60fe07813a88106"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.30.mlp.gate_up_proj.q_weight",
"shape": [
384,
16384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "1cf52771f676084daa2f52db40a66a15"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.30.mlp.down_proj.q_weight",
"shape": [
1024,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.30.mlp.down_proj.q_scale",
"shape": [
256,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.30.mlp.gate_up_proj.q_scale",
"shape": [
96,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.30.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.30.mixer.out_proj.q_weight",
"shape": [
384,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.30.mixer.out_proj.q_scale",
"shape": [
96,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "599b6c83579422db40a7901197cea68d"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.31.mlp.gate_up_proj.q_weight",
"shape": [
384,
16384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "281ef6455f25be49db81cbc3ffdbed75"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.30.mixer.qkv_proj.q_weight",
"shape": [
384,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.30.mixer.qkv_proj.q_scale",
"shape": [
96,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.h.31.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15925248
},
{
"name": "transformer.h.31.mlp.down_proj.q_weight",
"shape": [
1024,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 15931392
},
{
"name": "transformer.h.31.mlp.down_proj.q_scale",
"shape": [
256,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 28514304
},
{
"name": "transformer.h.31.mlp.gate_up_proj.q_scale",
"shape": [
96,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 30087168
},
{
"name": "transformer.h.31.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "60a3fd15c38c7944538eb65e85dee95c"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "transformer.h.31.mixer.out_proj.q_weight",
"shape": [
384,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.31.mixer.out_proj.q_scale",
"shape": [
96,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 4718592
},
{
"name": "transformer.h.31.mixer.qkv_proj.q_weight",
"shape": [
384,
9216
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 5308416
},
{
"name": "transformer.h.31.mixer.qkv_proj.q_scale",
"shape": [
96,
9216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 19464192
},
{
"name": "transformer.norm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "f2f09439ad76038cd0956a7cb98943b6"
}
]
}