phi2_final-q0f16 / ndarray-cache.json
Ericao's picture
Upload 89 files
a70ebc9 verified
{
"metadata": {
"ParamSize": 325,
"ParamBytes": 4300707840.0,
"BitsPerParam": 16.0
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 262144000,
"records": [
{
"name": "lm_head.linear.weight",
"shape": [
51200,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144000,
"byteOffset": 0
}
],
"md5sum": "d975f6e26151b33f352a07426b0e0b04"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.31.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "f23dbb82fd6dd312dcf6143e22b7ed11"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.31.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "1da1926a2468df6a44a5a5b6466f343b"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 262144000,
"records": [
{
"name": "transformer.embd.weight",
"shape": [
51200,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144000,
"byteOffset": 0
}
],
"md5sum": "92e1da58eb645448e09ca03417db18f3"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.0.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "06643c4cf1355512c432025490149331"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.0.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "16176aff35a8d755aac87792370132d3"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.1.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "159f4e35177c8e668ce734407e469bcf"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.1.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "a5d4b12f35eeb85f2ef3bd17ed299f96"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.10.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "80634adb38332c934930f1b28c1bccfd"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.10.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "90ab9865764843adfbd08975e606c4f6"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 29774080,
"records": [
{
"name": "lm_head.linear.bias",
"shape": [
51200
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 102400,
"byteOffset": 0
},
{
"name": "lm_head.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 102400
},
{
"name": "lm_head.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 107520
},
{
"name": "transformer.h.31.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 112640
},
{
"name": "transformer.h.31.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 117760
},
{
"name": "transformer.h.31.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 122880
},
{
"name": "transformer.h.31.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 143360
},
{
"name": "transformer.h.0.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 148480
},
{
"name": "transformer.h.0.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 153600
},
{
"name": "transformer.h.0.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 158720
},
{
"name": "transformer.h.0.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 179200
},
{
"name": "transformer.h.0.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 184320
},
{
"name": "transformer.h.0.mixer.out_proj.weight",
"shape": [
2560,
640
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 189440
},
{
"name": "transformer.h.0.mixer.Wqkv.bias",
"shape": [
1920
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3840,
"byteOffset": 3466240
},
{
"name": "transformer.h.0.mixer.Wqkv.weight",
"shape": [
1920,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 3470080
},
{
"name": "transformer.h.1.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13300480
},
{
"name": "transformer.h.1.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13305600
},
{
"name": "transformer.h.1.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 13310720
},
{
"name": "transformer.h.1.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13331200
},
{
"name": "transformer.h.1.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13336320
},
{
"name": "transformer.h.1.mixer.out_proj.weight",
"shape": [
2560,
640
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 13341440
},
{
"name": "transformer.h.1.mixer.Wqkv.bias",
"shape": [
1920
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3840,
"byteOffset": 16618240
},
{
"name": "transformer.h.1.mixer.Wqkv.weight",
"shape": [
1920,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 16622080
},
{
"name": "transformer.h.10.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26452480
},
{
"name": "transformer.h.10.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26457600
},
{
"name": "transformer.h.10.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 26462720
},
{
"name": "transformer.h.10.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26483200
},
{
"name": "transformer.h.10.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26488320
},
{
"name": "transformer.h.10.mixer.out_proj.weight",
"shape": [
2560,
640
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 26493440
},
{
"name": "transformer.h.10.mixer.Wqkv.bias",
"shape": [
1920
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3840,
"byteOffset": 29770240
}
],
"md5sum": "3916d5f402a8cac121576445b23853e4"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.11.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "542fdb89d63a08d4e54b3ef4c99e79d7"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.11.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "409edfe7217787b66d516287d16135e4"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.12.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "4b54c6cc75911ea29a6fd512bb9a6a85"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.12.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "30a8025916cde9e6bf881befcaba59ba"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 26304000,
"records": [
{
"name": "transformer.h.10.mixer.Wqkv.weight",
"shape": [
1920,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "transformer.h.11.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9830400
},
{
"name": "transformer.h.11.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9835520
},
{
"name": "transformer.h.11.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 9840640
},
{
"name": "transformer.h.11.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9861120
},
{
"name": "transformer.h.11.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9866240
},
{
"name": "transformer.h.11.mixer.out_proj.weight",
"shape": [
2560,
640
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 9871360
},
{
"name": "transformer.h.11.mixer.Wqkv.bias",
"shape": [
1920
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3840,
"byteOffset": 13148160
},
{
"name": "transformer.h.11.mixer.Wqkv.weight",
"shape": [
1920,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 13152000
},
{
"name": "transformer.h.12.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 22982400
},
{
"name": "transformer.h.12.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 22987520
},
{
"name": "transformer.h.12.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 22992640
},
{
"name": "transformer.h.12.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23013120
},
{
"name": "transformer.h.12.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23018240
},
{
"name": "transformer.h.12.mixer.out_proj.weight",
"shape": [
2560,
640
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 23023360
},
{
"name": "transformer.h.12.mixer.Wqkv.bias",
"shape": [
1920
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3840,
"byteOffset": 26300160
}
],
"md5sum": "23ee52fd56c33fe11da594c34969a6f8"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.13.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "9713853cc85ec38b24d4b548444ee76b"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.13.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "74130282439ed4aafba2acdf09e04b7f"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.14.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "94cee659b45a7ef5191b2a92f94c6c08"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 26288640,
"records": [
{
"name": "transformer.h.12.mixer.Wqkv.weight",
"shape": [
1920,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "transformer.h.13.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9830400
},
{
"name": "transformer.h.13.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9835520
},
{
"name": "transformer.h.13.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 9840640
},
{
"name": "transformer.h.13.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9861120
},
{
"name": "transformer.h.13.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9866240
},
{
"name": "transformer.h.13.mixer.out_proj.weight",
"shape": [
2560,
640
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 9871360
},
{
"name": "transformer.h.13.mixer.Wqkv.bias",
"shape": [
1920
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3840,
"byteOffset": 13148160
},
{
"name": "transformer.h.13.mixer.Wqkv.weight",
"shape": [
1920,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 13152000
},
{
"name": "transformer.h.14.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 22982400
},
{
"name": "transformer.h.14.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23002880
},
{
"name": "transformer.h.14.mixer.out_proj.weight",
"shape": [
2560,
640
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 23008000
},
{
"name": "transformer.h.14.mixer.Wqkv.bias",
"shape": [
1920
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3840,
"byteOffset": 26284800
}
],
"md5sum": "08512f46e09cbf3826555c354c9f26ae"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.2.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "6ba164fe25bdbc50bf6741eff134b325"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.2.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "aa7186d1325ce49118dadb569e3314d8"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.3.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "0b73ad242f4eee635f8cdedc345b9283"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.3.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "007c2eab956fecfd8b5c5de82066fe1f"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 26304000,
"records": [
{
"name": "transformer.h.14.mixer.Wqkv.weight",
"shape": [
1920,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "transformer.h.2.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9830400
},
{
"name": "transformer.h.2.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9835520
},
{
"name": "transformer.h.2.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 9840640
},
{
"name": "transformer.h.2.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9861120
},
{
"name": "transformer.h.2.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9866240
},
{
"name": "transformer.h.2.mixer.out_proj.weight",
"shape": [
2560,
640
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 9871360
},
{
"name": "transformer.h.2.mixer.Wqkv.bias",
"shape": [
1920
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3840,
"byteOffset": 13148160
},
{
"name": "transformer.h.2.mixer.Wqkv.weight",
"shape": [
1920,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 13152000
},
{
"name": "transformer.h.3.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 22982400
},
{
"name": "transformer.h.3.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 22987520
},
{
"name": "transformer.h.3.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 22992640
},
{
"name": "transformer.h.3.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23013120
},
{
"name": "transformer.h.3.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23018240
},
{
"name": "transformer.h.3.mixer.out_proj.weight",
"shape": [
2560,
640
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 23023360
},
{
"name": "transformer.h.3.mixer.Wqkv.bias",
"shape": [
1920
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3840,
"byteOffset": 26300160
}
],
"md5sum": "73ffc8bbdafdf7510685fa4fbf6fe65f"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.4.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "d9cb3496b819790cfa6d61edf9b5dbe4"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.4.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "82a2a1233c6289df34b78e2133a858fc"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.5.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "7dfeae57c9eeb8d6642f80c2230bb5f7"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.5.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "03bcab4f0956bd1bbbe05f16a3389f27"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 26304000,
"records": [
{
"name": "transformer.h.3.mixer.Wqkv.weight",
"shape": [
1920,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "transformer.h.4.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9830400
},
{
"name": "transformer.h.4.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9835520
},
{
"name": "transformer.h.4.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 9840640
},
{
"name": "transformer.h.4.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9861120
},
{
"name": "transformer.h.4.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9866240
},
{
"name": "transformer.h.4.mixer.out_proj.weight",
"shape": [
2560,
640
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 9871360
},
{
"name": "transformer.h.4.mixer.Wqkv.bias",
"shape": [
1920
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3840,
"byteOffset": 13148160
},
{
"name": "transformer.h.4.mixer.Wqkv.weight",
"shape": [
1920,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 13152000
},
{
"name": "transformer.h.5.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 22982400
},
{
"name": "transformer.h.5.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 22987520
},
{
"name": "transformer.h.5.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 22992640
},
{
"name": "transformer.h.5.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23013120
},
{
"name": "transformer.h.5.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23018240
},
{
"name": "transformer.h.5.mixer.out_proj.weight",
"shape": [
2560,
640
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 23023360
},
{
"name": "transformer.h.5.mixer.Wqkv.bias",
"shape": [
1920
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3840,
"byteOffset": 26300160
}
],
"md5sum": "9cc5df02378d5f4e9401c87a828b5f42"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.6.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "fb1ece1d04b2ad5d2551ba295bdb84fc"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.6.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "9a05a675400198338c0257076995605a"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.7.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "ef5fda286fffac394564c3cd8d58b9ad"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.7.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "1865e0949a7c3c44cf5e951b7e1ce459"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 26304000,
"records": [
{
"name": "transformer.h.5.mixer.Wqkv.weight",
"shape": [
1920,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "transformer.h.6.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9830400
},
{
"name": "transformer.h.6.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9835520
},
{
"name": "transformer.h.6.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 9840640
},
{
"name": "transformer.h.6.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9861120
},
{
"name": "transformer.h.6.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9866240
},
{
"name": "transformer.h.6.mixer.out_proj.weight",
"shape": [
2560,
640
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 9871360
},
{
"name": "transformer.h.6.mixer.Wqkv.bias",
"shape": [
1920
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3840,
"byteOffset": 13148160
},
{
"name": "transformer.h.6.mixer.Wqkv.weight",
"shape": [
1920,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 13152000
},
{
"name": "transformer.h.7.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 22982400
},
{
"name": "transformer.h.7.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 22987520
},
{
"name": "transformer.h.7.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 22992640
},
{
"name": "transformer.h.7.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23013120
},
{
"name": "transformer.h.7.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23018240
},
{
"name": "transformer.h.7.mixer.out_proj.weight",
"shape": [
2560,
640
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 23023360
},
{
"name": "transformer.h.7.mixer.Wqkv.bias",
"shape": [
1920
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3840,
"byteOffset": 26300160
}
],
"md5sum": "94ba6f873c00c81cec9389a135172a94"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.8.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "327309c4a1ec9e5f3df05bebde532c70"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.8.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "34fe66a01091b3a28ad3ef1153551b25"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.9.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "c7f2bbf214d1edc8b19c2f006af0afdd"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.9.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "1a231d574ff8a3826c19b7d9e825f2b4"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 26304000,
"records": [
{
"name": "transformer.h.7.mixer.Wqkv.weight",
"shape": [
1920,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "transformer.h.8.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9830400
},
{
"name": "transformer.h.8.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9835520
},
{
"name": "transformer.h.8.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 9840640
},
{
"name": "transformer.h.8.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9861120
},
{
"name": "transformer.h.8.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9866240
},
{
"name": "transformer.h.8.mixer.out_proj.weight",
"shape": [
2560,
640
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 9871360
},
{
"name": "transformer.h.8.mixer.Wqkv.bias",
"shape": [
1920
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3840,
"byteOffset": 13148160
},
{
"name": "transformer.h.8.mixer.Wqkv.weight",
"shape": [
1920,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 13152000
},
{
"name": "transformer.h.9.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 22982400
},
{
"name": "transformer.h.9.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 22987520
},
{
"name": "transformer.h.9.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 22992640
},
{
"name": "transformer.h.9.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23013120
},
{
"name": "transformer.h.9.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23018240
},
{
"name": "transformer.h.9.mixer.out_proj.weight",
"shape": [
2560,
640
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 23023360
},
{
"name": "transformer.h.9.mixer.Wqkv.bias",
"shape": [
1920
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3840,
"byteOffset": 26300160
}
],
"md5sum": "3c2fafaa741c7ebe478cb19365f52442"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.14.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "5872e8dbd1cdb4845470d1d0474ed2da"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.15.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "ebb1819399ea2ba7255684558b22d044"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.15.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "beb90ecf97840c25388feaae5b3a1eeb"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.16.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "64e2abc845f69a976d24f2068cdd477c"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.16.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "b5e5beed9f784121eaa4b70c78efb76b"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 26319360,
"records": [
{
"name": "transformer.h.9.mixer.Wqkv.weight",
"shape": [
1920,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "transformer.h.14.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9830400
},
{
"name": "transformer.h.14.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9835520
},
{
"name": "transformer.h.14.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9840640
},
{
"name": "transformer.h.15.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9845760
},
{
"name": "transformer.h.15.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9850880
},
{
"name": "transformer.h.15.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 9856000
},
{
"name": "transformer.h.15.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9876480
},
{
"name": "transformer.h.15.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9881600
},
{
"name": "transformer.h.15.mixer.out_proj.weight",
"shape": [
2560,
640
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 9886720
},
{
"name": "transformer.h.15.mixer.Wqkv.bias",
"shape": [
1920
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3840,
"byteOffset": 13163520
},
{
"name": "transformer.h.15.mixer.Wqkv.weight",
"shape": [
1920,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 13167360
},
{
"name": "transformer.h.16.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 22997760
},
{
"name": "transformer.h.16.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23002880
},
{
"name": "transformer.h.16.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 23008000
},
{
"name": "transformer.h.16.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23028480
},
{
"name": "transformer.h.16.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23033600
},
{
"name": "transformer.h.16.mixer.out_proj.weight",
"shape": [
2560,
640
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 23038720
},
{
"name": "transformer.h.16.mixer.Wqkv.bias",
"shape": [
1920
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3840,
"byteOffset": 26315520
}
],
"md5sum": "e273300e40456e3a550b4d9a78c48c2b"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.17.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "6f3b18780cad91797c93fa45b85a86a7"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.17.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "86e52c30a3f11bf17d863d12761e4b7b"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.18.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "b913cbe31eb05080139ecff883c69864"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.18.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "fae9535f797876d0666edf53e6db6449"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 26304000,
"records": [
{
"name": "transformer.h.16.mixer.Wqkv.weight",
"shape": [
1920,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "transformer.h.17.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9830400
},
{
"name": "transformer.h.17.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9835520
},
{
"name": "transformer.h.17.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 9840640
},
{
"name": "transformer.h.17.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9861120
},
{
"name": "transformer.h.17.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9866240
},
{
"name": "transformer.h.17.mixer.out_proj.weight",
"shape": [
2560,
640
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 9871360
},
{
"name": "transformer.h.17.mixer.Wqkv.bias",
"shape": [
1920
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3840,
"byteOffset": 13148160
},
{
"name": "transformer.h.17.mixer.Wqkv.weight",
"shape": [
1920,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 13152000
},
{
"name": "transformer.h.18.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 22982400
},
{
"name": "transformer.h.18.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 22987520
},
{
"name": "transformer.h.18.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 22992640
},
{
"name": "transformer.h.18.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23013120
},
{
"name": "transformer.h.18.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23018240
},
{
"name": "transformer.h.18.mixer.out_proj.weight",
"shape": [
2560,
640
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 23023360
},
{
"name": "transformer.h.18.mixer.Wqkv.bias",
"shape": [
1920
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3840,
"byteOffset": 26300160
}
],
"md5sum": "2ba1eafa04c4caf1bf129aa6db290ca6"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.19.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "e2c902dcb420f5c8cc55e678376ebdc4"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.19.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "71ea52dea7b1b442a2f1da92156d9d3f"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.20.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "343e7eef37a242ef1719f27c2c662c60"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.20.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "14cdda37b9dcccf904d96cf04280404d"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 26304000,
"records": [
{
"name": "transformer.h.18.mixer.Wqkv.weight",
"shape": [
1920,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "transformer.h.19.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9830400
},
{
"name": "transformer.h.19.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9835520
},
{
"name": "transformer.h.19.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 9840640
},
{
"name": "transformer.h.19.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9861120
},
{
"name": "transformer.h.19.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9866240
},
{
"name": "transformer.h.19.mixer.out_proj.weight",
"shape": [
2560,
640
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 9871360
},
{
"name": "transformer.h.19.mixer.Wqkv.bias",
"shape": [
1920
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3840,
"byteOffset": 13148160
},
{
"name": "transformer.h.19.mixer.Wqkv.weight",
"shape": [
1920,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 13152000
},
{
"name": "transformer.h.20.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 22982400
},
{
"name": "transformer.h.20.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 22987520
},
{
"name": "transformer.h.20.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 22992640
},
{
"name": "transformer.h.20.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23013120
},
{
"name": "transformer.h.20.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23018240
},
{
"name": "transformer.h.20.mixer.out_proj.weight",
"shape": [
2560,
640
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 23023360
},
{
"name": "transformer.h.20.mixer.Wqkv.bias",
"shape": [
1920
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3840,
"byteOffset": 26300160
}
],
"md5sum": "0ad45f216f510ecb47aaf7d28d2c998c"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.21.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "99879209630b84ada840c1157bb92324"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.21.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "03b469bafc73b560b206a2cb0211e725"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.22.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "5bd5b0d996dbac57ef1c61ce4f5a774d"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.22.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "4c942820a1410dec5bf15cbb15357e11"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 26304000,
"records": [
{
"name": "transformer.h.20.mixer.Wqkv.weight",
"shape": [
1920,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "transformer.h.21.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9830400
},
{
"name": "transformer.h.21.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9835520
},
{
"name": "transformer.h.21.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 9840640
},
{
"name": "transformer.h.21.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9861120
},
{
"name": "transformer.h.21.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9866240
},
{
"name": "transformer.h.21.mixer.out_proj.weight",
"shape": [
2560,
640
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 9871360
},
{
"name": "transformer.h.21.mixer.Wqkv.bias",
"shape": [
1920
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3840,
"byteOffset": 13148160
},
{
"name": "transformer.h.21.mixer.Wqkv.weight",
"shape": [
1920,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 13152000
},
{
"name": "transformer.h.22.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 22982400
},
{
"name": "transformer.h.22.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 22987520
},
{
"name": "transformer.h.22.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 22992640
},
{
"name": "transformer.h.22.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23013120
},
{
"name": "transformer.h.22.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23018240
},
{
"name": "transformer.h.22.mixer.out_proj.weight",
"shape": [
2560,
640
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 23023360
},
{
"name": "transformer.h.22.mixer.Wqkv.bias",
"shape": [
1920
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3840,
"byteOffset": 26300160
}
],
"md5sum": "8b7ebd68a6c0ee436af6566c39af6b42"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.23.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "7eaeee3a74339fe092fd73d561b35710"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.23.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "34e8de47310f3d87008c085ba70ff33c"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.24.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "f57b3e81eb281f16aaa4c9d55203da7f"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.24.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "f31736218cfe9ad3479c692756a2aca6"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 26304000,
"records": [
{
"name": "transformer.h.22.mixer.Wqkv.weight",
"shape": [
1920,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "transformer.h.23.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9830400
},
{
"name": "transformer.h.23.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9835520
},
{
"name": "transformer.h.23.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 9840640
},
{
"name": "transformer.h.23.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9861120
},
{
"name": "transformer.h.23.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9866240
},
{
"name": "transformer.h.23.mixer.out_proj.weight",
"shape": [
2560,
640
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 9871360
},
{
"name": "transformer.h.23.mixer.Wqkv.bias",
"shape": [
1920
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3840,
"byteOffset": 13148160
},
{
"name": "transformer.h.23.mixer.Wqkv.weight",
"shape": [
1920,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 13152000
},
{
"name": "transformer.h.24.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 22982400
},
{
"name": "transformer.h.24.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 22987520
},
{
"name": "transformer.h.24.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 22992640
},
{
"name": "transformer.h.24.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23013120
},
{
"name": "transformer.h.24.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23018240
},
{
"name": "transformer.h.24.mixer.out_proj.weight",
"shape": [
2560,
640
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 23023360
},
{
"name": "transformer.h.24.mixer.Wqkv.bias",
"shape": [
1920
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3840,
"byteOffset": 26300160
}
],
"md5sum": "e0364658dc38ff9dd0c11c2f35a2b308"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.25.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "de0b9bfb5aca71fc8645f7f73b0a40f4"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.25.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "344532f2b63189d1207727bb5a0cdceb"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.26.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "bd493e86bd7af49ace1c0f5c59b5e16d"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.26.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "c09105f6dc6fa66c08e3904452773660"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 26304000,
"records": [
{
"name": "transformer.h.24.mixer.Wqkv.weight",
"shape": [
1920,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "transformer.h.25.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9830400
},
{
"name": "transformer.h.25.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9835520
},
{
"name": "transformer.h.25.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 9840640
},
{
"name": "transformer.h.25.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9861120
},
{
"name": "transformer.h.25.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9866240
},
{
"name": "transformer.h.25.mixer.out_proj.weight",
"shape": [
2560,
640
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 9871360
},
{
"name": "transformer.h.25.mixer.Wqkv.bias",
"shape": [
1920
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3840,
"byteOffset": 13148160
},
{
"name": "transformer.h.25.mixer.Wqkv.weight",
"shape": [
1920,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 13152000
},
{
"name": "transformer.h.26.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 22982400
},
{
"name": "transformer.h.26.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 22987520
},
{
"name": "transformer.h.26.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 22992640
},
{
"name": "transformer.h.26.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23013120
},
{
"name": "transformer.h.26.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23018240
},
{
"name": "transformer.h.26.mixer.out_proj.weight",
"shape": [
2560,
640
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 23023360
},
{
"name": "transformer.h.26.mixer.Wqkv.bias",
"shape": [
1920
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3840,
"byteOffset": 26300160
}
],
"md5sum": "67cc5b7deb7a6ab658bc8a61d5d3218c"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.27.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "9c70aa55124f7ac2583c3ad70846eabd"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.27.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "accc65787d53e2825f2db36dfd12ae20"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.28.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "d1ab9190ecc43be15da3e2f39326651f"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.28.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "c25a5b813451672307ac3ec8487a9fca"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 26304000,
"records": [
{
"name": "transformer.h.26.mixer.Wqkv.weight",
"shape": [
1920,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "transformer.h.27.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9830400
},
{
"name": "transformer.h.27.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9835520
},
{
"name": "transformer.h.27.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 9840640
},
{
"name": "transformer.h.27.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9861120
},
{
"name": "transformer.h.27.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9866240
},
{
"name": "transformer.h.27.mixer.out_proj.weight",
"shape": [
2560,
640
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 9871360
},
{
"name": "transformer.h.27.mixer.Wqkv.bias",
"shape": [
1920
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3840,
"byteOffset": 13148160
},
{
"name": "transformer.h.27.mixer.Wqkv.weight",
"shape": [
1920,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 13152000
},
{
"name": "transformer.h.28.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 22982400
},
{
"name": "transformer.h.28.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 22987520
},
{
"name": "transformer.h.28.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 22992640
},
{
"name": "transformer.h.28.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23013120
},
{
"name": "transformer.h.28.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23018240
},
{
"name": "transformer.h.28.mixer.out_proj.weight",
"shape": [
2560,
640
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 23023360
},
{
"name": "transformer.h.28.mixer.Wqkv.bias",
"shape": [
1920
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3840,
"byteOffset": 26300160
}
],
"md5sum": "112f1bdd0398b672709c03aa83cfe677"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.29.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "2bf8fdd040827bf1ce443d974b155ea2"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.29.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "14fd5ce1ad0eaaaa45ecc7173169af95"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.30.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "97b87e746f7d162db5bef69b670d83d9"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.30.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "e43c1723fdb3ea46ffa7b6c0b9b65d7b"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 26304000,
"records": [
{
"name": "transformer.h.28.mixer.Wqkv.weight",
"shape": [
1920,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "transformer.h.29.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9830400
},
{
"name": "transformer.h.29.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9835520
},
{
"name": "transformer.h.29.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 9840640
},
{
"name": "transformer.h.29.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9861120
},
{
"name": "transformer.h.29.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9866240
},
{
"name": "transformer.h.29.mixer.out_proj.weight",
"shape": [
2560,
640
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 9871360
},
{
"name": "transformer.h.29.mixer.Wqkv.bias",
"shape": [
1920
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3840,
"byteOffset": 13148160
},
{
"name": "transformer.h.29.mixer.Wqkv.weight",
"shape": [
1920,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 13152000
},
{
"name": "transformer.h.30.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 22982400
},
{
"name": "transformer.h.30.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 22987520
},
{
"name": "transformer.h.30.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 22992640
},
{
"name": "transformer.h.30.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23013120
},
{
"name": "transformer.h.30.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23018240
},
{
"name": "transformer.h.30.mixer.out_proj.weight",
"shape": [
2560,
640
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 23023360
},
{
"name": "transformer.h.30.mixer.Wqkv.bias",
"shape": [
1920
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3840,
"byteOffset": 26300160
}
],
"md5sum": "108cc3c01c0da74fedf752f3cd37a088"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 22946560,
"records": [
{
"name": "transformer.h.30.mixer.Wqkv.weight",
"shape": [
1920,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "transformer.h.31.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 9830400
},
{
"name": "transformer.h.31.mixer.out_proj.weight",
"shape": [
2560,
640
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 9835520
},
{
"name": "transformer.h.31.mixer.Wqkv.bias",
"shape": [
1920
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3840,
"byteOffset": 13112320
},
{
"name": "transformer.h.31.mixer.Wqkv.weight",
"shape": [
1920,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 13116160
}
],
"md5sum": "af432cb8e09a87fa3dda25cd617d0ae7"
}
]
}