scigemma_fine_tuned_quantized_MLC / ndarray-cache.json
congthien's picture
Fine-tuned quantized Gemma 2b-it model.
b760d28 verified
{
"metadata": {
"ParamSize": 183,
"ParamBytes": 1409830912.0,
"BitsPerParam": 4.500347711112945
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 262144000,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
256000,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 262144000,
"byteOffset": 0
}
],
"md5sum": "4500be88779baf43abfc8597bf8cf1fe"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
2048,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "38b83948d3c53ae7adbdd39ae9b0d0ae"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 32772096,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
256000,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32768000,
"byteOffset": 0
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 32768000
}
],
"md5sum": "f379b208c95b8217c994896c7b6f2498"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
32768,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "0a3f68c7b3b6f52b44c5d9b20ac77a39"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
32768,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "b2cff37297312a43839c99d907191f4f"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
2048,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 0
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
32768,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 2097152
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 6291456
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 6295552
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 8916992
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 9244672
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 11341824
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11603968
},
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
2048,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 11608064
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
2048,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 28385280
}
],
"md5sum": "d5a1e548387f4eeccb286f32acc06912"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
32768,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "4b3e5aaa0689fabeb1a3ed693c711cab"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 32583680,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
32768,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 0
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 4194304
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 4198400
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 6819840
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 7147520
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 9244672
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 9506816
},
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
2048,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 9510912
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
2048,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 26288128
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
32768,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 28385280
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 32579584
}
],
"md5sum": "9d38ef147a2cec00b866f4ac8e794093"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
32768,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "92f2e31e4087301785c89481fbbaf8a7"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 33431552,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 2621440
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 2949120
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 5046272
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 5308416
},
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
2048,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 5312512
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
2048,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 22089728
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
32768,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 24186880
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 28381184
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 28385280
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 31006720
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 31334400
}
],
"md5sum": "91d188a050a683a22005344db4bae131"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
32768,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "2777281b787c40027bb55ceaf3cf0a12"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
2048,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "0421d576b5c3902d8ba05c55c5b40f73"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
32768,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "b9f802b9670264b05cffac5f4a6c7771"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 30748672,
"records": [
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 0
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 262144
},
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
2048,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 266240
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
2048,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 17043456
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
32768,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19140608
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 23334912
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 23339008
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 25960448
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 26288128
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 28385280
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 28647424
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
2048,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 28651520
}
],
"md5sum": "269ace3f93d312dfe2a20d4e9319f958"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
32768,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "ec3c9c987df95dc1c093df1d775a6387"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 32583680,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
32768,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 0
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 4194304
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 4198400
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 6819840
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 7147520
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 9244672
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 9506816
},
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
2048,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 9510912
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
2048,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 26288128
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
32768,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 28385280
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 32579584
}
],
"md5sum": "3e788f26fed33cf073e70f549b2352e7"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
32768,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "5481cab12af77a8b3c8a3e95ced9dac0"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 33431552,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 2621440
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 2949120
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 5046272
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 5308416
},
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
2048,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 5312512
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
2048,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 22089728
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
32768,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 24186880
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 28381184
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 28385280
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 31006720
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 31334400
}
],
"md5sum": "dcd0914382879300c806526fc1b11d9b"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
32768,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "2bca8e3c0105370bb26567dd960f1817"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
32768,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "2d42afab792b4e68fdbef8e24f570618"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 32841728,
"records": [
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 0
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 262144
},
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
2048,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 266240
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
2048,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 17043456
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
32768,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19140608
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 23334912
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 23339008
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 25960448
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 26288128
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 28385280
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
32768,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 28647424
}
],
"md5sum": "dfea8cc17425b18f12e9b380d3b83afc"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
32768,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "a6fda00b44983456688deaf3260db086"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 33431552,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 2621440
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 2949120
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 5046272
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 5308416
},
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
2048,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 5312512
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
2048,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 22089728
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
32768,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 24186880
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 28381184
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 28385280
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 31006720
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 31334400
}
],
"md5sum": "322cca4b881fc857e6bb88708cb45a2a"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
32768,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "1867886dbb9e220bdd09b76260ffdbfa"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
2048,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "12dc6ec2879cc02154c1b39da83ca6d4"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
32768,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "d5dfdc2802eea11f09dd70a868384be8"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 30748672,
"records": [
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 0
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 262144
},
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
2048,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 266240
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
2048,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 17043456
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
32768,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19140608
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 23334912
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 23339008
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 25960448
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 26288128
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 28385280
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 28647424
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
2048,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 28651520
}
],
"md5sum": "06b384baa082e9596acae6f5557a3b7b"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
32768,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "e9530a52888412eecf7c229d1cf349ed"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 32583680,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
32768,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 0
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 4194304
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 4198400
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 6819840
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 7147520
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 9244672
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 9506816
},
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
2048,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 9510912
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
2048,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 26288128
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
32768,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 28385280
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 32579584
}
],
"md5sum": "3c4e108837fe16fe2a9cc10c82cb382c"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
32768,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "dbf327059484344e672afc5ff4409bb7"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 33431552,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 2621440
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 2949120
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 5046272
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 5308416
},
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
2048,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 5312512
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
2048,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 22089728
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
32768,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 24186880
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 28381184
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 28385280
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 31006720
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 31334400
}
],
"md5sum": "8073a80137594cb7989b9a2fa82dac8c"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
32768,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "abf0c9807e37d44233396f6123632a32"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
2048,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "c45c83f7e06c1a97ffdcc5b7b061f66b"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
32768,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "de8c35220aee0229f943ac6aa6dff81f"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 30748672,
"records": [
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 0
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 262144
},
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
2048,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 266240
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
2048,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 17043456
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
32768,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19140608
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 23334912
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 23339008
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 25960448
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 26288128
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 28385280
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 28647424
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
2048,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 28651520
}
],
"md5sum": "578f24087be1c5071b0ebd6ec3f0c1a7"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
32768,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "6d6e2a52dee3864fb0dc7460fc2f0bdf"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 32583680,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
32768,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 0
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 4194304
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 4198400
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 6819840
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 7147520
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 9244672
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 9506816
},
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
2048,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 9510912
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
2048,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 26288128
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
32768,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 28385280
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 32579584
}
],
"md5sum": "035a8f14159215951cb33a2ce473e3d4"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 24195072,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
2560,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 2621440
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 2949120
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 5046272
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 5308416
},
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
2048,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 5312512
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
2048,
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 22089728
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 24186880
},
{
"name": "model.norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 24190976
}
],
"md5sum": "671b05e460bd7cd2b80687b97051c91b"
}
]
}