roberta-cls-model-q4f16_1-MLC / ndarray-cache.json
JLake310's picture
add models
a571ae2 verified
{
"metadata": {
"ParamSize": 278,
"ParamBytes": 70289572.0,
"BitsPerParam": 4.511266288677072
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 32950948,
"records": [
{
"name": "classifier.dense.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 0
},
{
"name": "classifier.dense.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 1536
},
{
"name": "classifier.dense.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 296448
},
{
"name": "classifier.out_proj.bias",
"shape": [
2
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4,
"byteOffset": 333312
},
{
"name": "classifier.out_proj.q_weight",
"shape": [
2,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 768,
"byteOffset": 333316
},
{
"name": "classifier.out_proj.q_scale",
"shape": [
2,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 96,
"byteOffset": 334084
},
{
"name": "roberta.embeddings.LayerNorm.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 334180
},
{
"name": "roberta.embeddings.LayerNorm.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 335716
},
{
"name": "roberta.embeddings.position_embeddings.q_weight",
"shape": [
514,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 197376,
"byteOffset": 337252
},
{
"name": "roberta.embeddings.position_embeddings.q_scale",
"shape": [
514,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 24672,
"byteOffset": 534628
},
{
"name": "roberta.embeddings.token_type_embeddings.q_weight",
"shape": [
1,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 384,
"byteOffset": 559300
},
{
"name": "roberta.embeddings.token_type_embeddings.q_scale",
"shape": [
1,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 48,
"byteOffset": 559684
},
{
"name": "roberta.embeddings.word_embeddings.q_weight",
"shape": [
50265,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19301760,
"byteOffset": 559732
},
{
"name": "roberta.embeddings.word_embeddings.q_scale",
"shape": [
50265,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2412720,
"byteOffset": 19861492
},
{
"name": "roberta.encoder.layer.0.attention.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 22274212
},
{
"name": "roberta.encoder.layer.0.attention.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 22275748
},
{
"name": "roberta.encoder.layer.0.attention.output.dense.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 22277284
},
{
"name": "roberta.encoder.layer.0.attention.output.dense.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 22278820
},
{
"name": "roberta.encoder.layer.0.attention.output.dense.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 22573732
},
{
"name": "roberta.encoder.layer.0.attention.self.key.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 22610596
},
{
"name": "roberta.encoder.layer.0.attention.self.key.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 22612132
},
{
"name": "roberta.encoder.layer.0.attention.self.key.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 22907044
},
{
"name": "roberta.encoder.layer.0.attention.self.query.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 22943908
},
{
"name": "roberta.encoder.layer.0.attention.self.query.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 22945444
},
{
"name": "roberta.encoder.layer.0.attention.self.query.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 23240356
},
{
"name": "roberta.encoder.layer.0.attention.self.value.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 23277220
},
{
"name": "roberta.encoder.layer.0.attention.self.value.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 23278756
},
{
"name": "roberta.encoder.layer.0.attention.self.value.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 23573668
},
{
"name": "roberta.encoder.layer.0.intermediate.dense.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23610532
},
{
"name": "roberta.encoder.layer.0.intermediate.dense.q_weight",
"shape": [
3072,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 23616676
},
{
"name": "roberta.encoder.layer.0.intermediate.dense.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 24796324
},
{
"name": "roberta.encoder.layer.0.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 24943780
},
{
"name": "roberta.encoder.layer.0.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 24945316
},
{
"name": "roberta.encoder.layer.0.output.dense.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 24946852
},
{
"name": "roberta.encoder.layer.0.output.dense.q_weight",
"shape": [
768,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 24948388
},
{
"name": "roberta.encoder.layer.0.output.dense.q_scale",
"shape": [
768,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 26128036
},
{
"name": "roberta.encoder.layer.1.attention.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 26275492
},
{
"name": "roberta.encoder.layer.1.attention.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 26277028
},
{
"name": "roberta.encoder.layer.1.attention.output.dense.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 26278564
},
{
"name": "roberta.encoder.layer.1.attention.output.dense.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 26280100
},
{
"name": "roberta.encoder.layer.1.attention.output.dense.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 26575012
},
{
"name": "roberta.encoder.layer.1.attention.self.key.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 26611876
},
{
"name": "roberta.encoder.layer.1.attention.self.key.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 26613412
},
{
"name": "roberta.encoder.layer.1.attention.self.key.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 26908324
},
{
"name": "roberta.encoder.layer.1.attention.self.query.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 26945188
},
{
"name": "roberta.encoder.layer.1.attention.self.query.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 26946724
},
{
"name": "roberta.encoder.layer.1.attention.self.query.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 27241636
},
{
"name": "roberta.encoder.layer.1.attention.self.value.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 27278500
},
{
"name": "roberta.encoder.layer.1.attention.self.value.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 27280036
},
{
"name": "roberta.encoder.layer.1.attention.self.value.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 27574948
},
{
"name": "roberta.encoder.layer.1.intermediate.dense.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 27611812
},
{
"name": "roberta.encoder.layer.1.intermediate.dense.q_weight",
"shape": [
3072,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 27617956
},
{
"name": "roberta.encoder.layer.1.intermediate.dense.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 28797604
},
{
"name": "roberta.encoder.layer.1.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 28945060
},
{
"name": "roberta.encoder.layer.1.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 28946596
},
{
"name": "roberta.encoder.layer.1.output.dense.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 28948132
},
{
"name": "roberta.encoder.layer.1.output.dense.q_weight",
"shape": [
768,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 28949668
},
{
"name": "roberta.encoder.layer.1.output.dense.q_scale",
"shape": [
768,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 30129316
},
{
"name": "roberta.encoder.layer.10.attention.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 30276772
},
{
"name": "roberta.encoder.layer.10.attention.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 30278308
},
{
"name": "roberta.encoder.layer.10.attention.output.dense.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 30279844
},
{
"name": "roberta.encoder.layer.10.attention.output.dense.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 30281380
},
{
"name": "roberta.encoder.layer.10.attention.output.dense.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 30576292
},
{
"name": "roberta.encoder.layer.10.attention.self.key.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 30613156
},
{
"name": "roberta.encoder.layer.10.attention.self.key.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 30614692
},
{
"name": "roberta.encoder.layer.10.attention.self.key.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 30909604
},
{
"name": "roberta.encoder.layer.10.attention.self.query.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 30946468
},
{
"name": "roberta.encoder.layer.10.attention.self.query.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 30948004
},
{
"name": "roberta.encoder.layer.10.attention.self.query.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 31242916
},
{
"name": "roberta.encoder.layer.10.attention.self.value.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 31279780
},
{
"name": "roberta.encoder.layer.10.attention.self.value.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 31281316
},
{
"name": "roberta.encoder.layer.10.attention.self.value.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 31576228
},
{
"name": "roberta.encoder.layer.10.intermediate.dense.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 31613092
},
{
"name": "roberta.encoder.layer.10.intermediate.dense.q_weight",
"shape": [
3072,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 31619236
},
{
"name": "roberta.encoder.layer.10.intermediate.dense.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 32798884
},
{
"name": "roberta.encoder.layer.10.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 32946340
},
{
"name": "roberta.encoder.layer.10.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 32947876
},
{
"name": "roberta.encoder.layer.10.output.dense.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 32949412
}
],
"md5sum": "e8e1b5159d63050d7bbc3a3cff38c00e"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 33341952,
"records": [
{
"name": "roberta.encoder.layer.10.output.dense.q_weight",
"shape": [
768,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 0
},
{
"name": "roberta.encoder.layer.10.output.dense.q_scale",
"shape": [
768,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 1179648
},
{
"name": "roberta.encoder.layer.11.attention.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 1327104
},
{
"name": "roberta.encoder.layer.11.attention.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 1328640
},
{
"name": "roberta.encoder.layer.11.attention.output.dense.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 1330176
},
{
"name": "roberta.encoder.layer.11.attention.output.dense.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 1331712
},
{
"name": "roberta.encoder.layer.11.attention.output.dense.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 1626624
},
{
"name": "roberta.encoder.layer.11.attention.self.key.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 1663488
},
{
"name": "roberta.encoder.layer.11.attention.self.key.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 1665024
},
{
"name": "roberta.encoder.layer.11.attention.self.key.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 1959936
},
{
"name": "roberta.encoder.layer.11.attention.self.query.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 1996800
},
{
"name": "roberta.encoder.layer.11.attention.self.query.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 1998336
},
{
"name": "roberta.encoder.layer.11.attention.self.query.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 2293248
},
{
"name": "roberta.encoder.layer.11.attention.self.value.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 2330112
},
{
"name": "roberta.encoder.layer.11.attention.self.value.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 2331648
},
{
"name": "roberta.encoder.layer.11.attention.self.value.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 2626560
},
{
"name": "roberta.encoder.layer.11.intermediate.dense.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 2663424
},
{
"name": "roberta.encoder.layer.11.intermediate.dense.q_weight",
"shape": [
3072,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 2669568
},
{
"name": "roberta.encoder.layer.11.intermediate.dense.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 3849216
},
{
"name": "roberta.encoder.layer.11.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 3996672
},
{
"name": "roberta.encoder.layer.11.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 3998208
},
{
"name": "roberta.encoder.layer.11.output.dense.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 3999744
},
{
"name": "roberta.encoder.layer.11.output.dense.q_weight",
"shape": [
768,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 4001280
},
{
"name": "roberta.encoder.layer.11.output.dense.q_scale",
"shape": [
768,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 5180928
},
{
"name": "roberta.encoder.layer.2.attention.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 5328384
},
{
"name": "roberta.encoder.layer.2.attention.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 5329920
},
{
"name": "roberta.encoder.layer.2.attention.output.dense.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 5331456
},
{
"name": "roberta.encoder.layer.2.attention.output.dense.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 5332992
},
{
"name": "roberta.encoder.layer.2.attention.output.dense.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 5627904
},
{
"name": "roberta.encoder.layer.2.attention.self.key.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 5664768
},
{
"name": "roberta.encoder.layer.2.attention.self.key.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 5666304
},
{
"name": "roberta.encoder.layer.2.attention.self.key.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 5961216
},
{
"name": "roberta.encoder.layer.2.attention.self.query.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 5998080
},
{
"name": "roberta.encoder.layer.2.attention.self.query.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 5999616
},
{
"name": "roberta.encoder.layer.2.attention.self.query.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 6294528
},
{
"name": "roberta.encoder.layer.2.attention.self.value.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 6331392
},
{
"name": "roberta.encoder.layer.2.attention.self.value.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 6332928
},
{
"name": "roberta.encoder.layer.2.attention.self.value.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 6627840
},
{
"name": "roberta.encoder.layer.2.intermediate.dense.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 6664704
},
{
"name": "roberta.encoder.layer.2.intermediate.dense.q_weight",
"shape": [
3072,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 6670848
},
{
"name": "roberta.encoder.layer.2.intermediate.dense.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 7850496
},
{
"name": "roberta.encoder.layer.2.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 7997952
},
{
"name": "roberta.encoder.layer.2.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 7999488
},
{
"name": "roberta.encoder.layer.2.output.dense.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 8001024
},
{
"name": "roberta.encoder.layer.2.output.dense.q_weight",
"shape": [
768,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 8002560
},
{
"name": "roberta.encoder.layer.2.output.dense.q_scale",
"shape": [
768,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 9182208
},
{
"name": "roberta.encoder.layer.3.attention.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 9329664
},
{
"name": "roberta.encoder.layer.3.attention.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 9331200
},
{
"name": "roberta.encoder.layer.3.attention.output.dense.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 9332736
},
{
"name": "roberta.encoder.layer.3.attention.output.dense.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 9334272
},
{
"name": "roberta.encoder.layer.3.attention.output.dense.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 9629184
},
{
"name": "roberta.encoder.layer.3.attention.self.key.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 9666048
},
{
"name": "roberta.encoder.layer.3.attention.self.key.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 9667584
},
{
"name": "roberta.encoder.layer.3.attention.self.key.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 9962496
},
{
"name": "roberta.encoder.layer.3.attention.self.query.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 9999360
},
{
"name": "roberta.encoder.layer.3.attention.self.query.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 10000896
},
{
"name": "roberta.encoder.layer.3.attention.self.query.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 10295808
},
{
"name": "roberta.encoder.layer.3.attention.self.value.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 10332672
},
{
"name": "roberta.encoder.layer.3.attention.self.value.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 10334208
},
{
"name": "roberta.encoder.layer.3.attention.self.value.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 10629120
},
{
"name": "roberta.encoder.layer.3.intermediate.dense.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 10665984
},
{
"name": "roberta.encoder.layer.3.intermediate.dense.q_weight",
"shape": [
3072,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 10672128
},
{
"name": "roberta.encoder.layer.3.intermediate.dense.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 11851776
},
{
"name": "roberta.encoder.layer.3.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 11999232
},
{
"name": "roberta.encoder.layer.3.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 12000768
},
{
"name": "roberta.encoder.layer.3.output.dense.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 12002304
},
{
"name": "roberta.encoder.layer.3.output.dense.q_weight",
"shape": [
768,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 12003840
},
{
"name": "roberta.encoder.layer.3.output.dense.q_scale",
"shape": [
768,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 13183488
},
{
"name": "roberta.encoder.layer.4.attention.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 13330944
},
{
"name": "roberta.encoder.layer.4.attention.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 13332480
},
{
"name": "roberta.encoder.layer.4.attention.output.dense.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 13334016
},
{
"name": "roberta.encoder.layer.4.attention.output.dense.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 13335552
},
{
"name": "roberta.encoder.layer.4.attention.output.dense.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 13630464
},
{
"name": "roberta.encoder.layer.4.attention.self.key.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 13667328
},
{
"name": "roberta.encoder.layer.4.attention.self.key.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 13668864
},
{
"name": "roberta.encoder.layer.4.attention.self.key.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 13963776
},
{
"name": "roberta.encoder.layer.4.attention.self.query.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 14000640
},
{
"name": "roberta.encoder.layer.4.attention.self.query.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 14002176
},
{
"name": "roberta.encoder.layer.4.attention.self.query.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 14297088
},
{
"name": "roberta.encoder.layer.4.attention.self.value.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 14333952
},
{
"name": "roberta.encoder.layer.4.attention.self.value.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 14335488
},
{
"name": "roberta.encoder.layer.4.attention.self.value.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 14630400
},
{
"name": "roberta.encoder.layer.4.intermediate.dense.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14667264
},
{
"name": "roberta.encoder.layer.4.intermediate.dense.q_weight",
"shape": [
3072,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 14673408
},
{
"name": "roberta.encoder.layer.4.intermediate.dense.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 15853056
},
{
"name": "roberta.encoder.layer.4.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 16000512
},
{
"name": "roberta.encoder.layer.4.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 16002048
},
{
"name": "roberta.encoder.layer.4.output.dense.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 16003584
},
{
"name": "roberta.encoder.layer.4.output.dense.q_weight",
"shape": [
768,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 16005120
},
{
"name": "roberta.encoder.layer.4.output.dense.q_scale",
"shape": [
768,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 17184768
},
{
"name": "roberta.encoder.layer.5.attention.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 17332224
},
{
"name": "roberta.encoder.layer.5.attention.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 17333760
},
{
"name": "roberta.encoder.layer.5.attention.output.dense.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 17335296
},
{
"name": "roberta.encoder.layer.5.attention.output.dense.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 17336832
},
{
"name": "roberta.encoder.layer.5.attention.output.dense.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 17631744
},
{
"name": "roberta.encoder.layer.5.attention.self.key.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 17668608
},
{
"name": "roberta.encoder.layer.5.attention.self.key.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 17670144
},
{
"name": "roberta.encoder.layer.5.attention.self.key.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 17965056
},
{
"name": "roberta.encoder.layer.5.attention.self.query.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 18001920
},
{
"name": "roberta.encoder.layer.5.attention.self.query.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 18003456
},
{
"name": "roberta.encoder.layer.5.attention.self.query.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 18298368
},
{
"name": "roberta.encoder.layer.5.attention.self.value.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 18335232
},
{
"name": "roberta.encoder.layer.5.attention.self.value.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 18336768
},
{
"name": "roberta.encoder.layer.5.attention.self.value.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 18631680
},
{
"name": "roberta.encoder.layer.5.intermediate.dense.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 18668544
},
{
"name": "roberta.encoder.layer.5.intermediate.dense.q_weight",
"shape": [
3072,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 18674688
},
{
"name": "roberta.encoder.layer.5.intermediate.dense.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 19854336
},
{
"name": "roberta.encoder.layer.5.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 20001792
},
{
"name": "roberta.encoder.layer.5.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 20003328
},
{
"name": "roberta.encoder.layer.5.output.dense.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 20004864
},
{
"name": "roberta.encoder.layer.5.output.dense.q_weight",
"shape": [
768,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 20006400
},
{
"name": "roberta.encoder.layer.5.output.dense.q_scale",
"shape": [
768,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 21186048
},
{
"name": "roberta.encoder.layer.6.attention.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 21333504
},
{
"name": "roberta.encoder.layer.6.attention.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 21335040
},
{
"name": "roberta.encoder.layer.6.attention.output.dense.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 21336576
},
{
"name": "roberta.encoder.layer.6.attention.output.dense.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 21338112
},
{
"name": "roberta.encoder.layer.6.attention.output.dense.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 21633024
},
{
"name": "roberta.encoder.layer.6.attention.self.key.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 21669888
},
{
"name": "roberta.encoder.layer.6.attention.self.key.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 21671424
},
{
"name": "roberta.encoder.layer.6.attention.self.key.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 21966336
},
{
"name": "roberta.encoder.layer.6.attention.self.query.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 22003200
},
{
"name": "roberta.encoder.layer.6.attention.self.query.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 22004736
},
{
"name": "roberta.encoder.layer.6.attention.self.query.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 22299648
},
{
"name": "roberta.encoder.layer.6.attention.self.value.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 22336512
},
{
"name": "roberta.encoder.layer.6.attention.self.value.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 22338048
},
{
"name": "roberta.encoder.layer.6.attention.self.value.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 22632960
},
{
"name": "roberta.encoder.layer.6.intermediate.dense.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 22669824
},
{
"name": "roberta.encoder.layer.6.intermediate.dense.q_weight",
"shape": [
3072,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 22675968
},
{
"name": "roberta.encoder.layer.6.intermediate.dense.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 23855616
},
{
"name": "roberta.encoder.layer.6.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 24003072
},
{
"name": "roberta.encoder.layer.6.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 24004608
},
{
"name": "roberta.encoder.layer.6.output.dense.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 24006144
},
{
"name": "roberta.encoder.layer.6.output.dense.q_weight",
"shape": [
768,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 24007680
},
{
"name": "roberta.encoder.layer.6.output.dense.q_scale",
"shape": [
768,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 25187328
},
{
"name": "roberta.encoder.layer.7.attention.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 25334784
},
{
"name": "roberta.encoder.layer.7.attention.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 25336320
},
{
"name": "roberta.encoder.layer.7.attention.output.dense.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 25337856
},
{
"name": "roberta.encoder.layer.7.attention.output.dense.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 25339392
},
{
"name": "roberta.encoder.layer.7.attention.output.dense.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 25634304
},
{
"name": "roberta.encoder.layer.7.attention.self.key.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 25671168
},
{
"name": "roberta.encoder.layer.7.attention.self.key.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 25672704
},
{
"name": "roberta.encoder.layer.7.attention.self.key.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 25967616
},
{
"name": "roberta.encoder.layer.7.attention.self.query.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 26004480
},
{
"name": "roberta.encoder.layer.7.attention.self.query.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 26006016
},
{
"name": "roberta.encoder.layer.7.attention.self.query.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 26300928
},
{
"name": "roberta.encoder.layer.7.attention.self.value.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 26337792
},
{
"name": "roberta.encoder.layer.7.attention.self.value.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 26339328
},
{
"name": "roberta.encoder.layer.7.attention.self.value.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 26634240
},
{
"name": "roberta.encoder.layer.7.intermediate.dense.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 26671104
},
{
"name": "roberta.encoder.layer.7.intermediate.dense.q_weight",
"shape": [
3072,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 26677248
},
{
"name": "roberta.encoder.layer.7.intermediate.dense.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 27856896
},
{
"name": "roberta.encoder.layer.7.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 28004352
},
{
"name": "roberta.encoder.layer.7.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 28005888
},
{
"name": "roberta.encoder.layer.7.output.dense.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 28007424
},
{
"name": "roberta.encoder.layer.7.output.dense.q_weight",
"shape": [
768,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 28008960
},
{
"name": "roberta.encoder.layer.7.output.dense.q_scale",
"shape": [
768,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 29188608
},
{
"name": "roberta.encoder.layer.8.attention.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 29336064
},
{
"name": "roberta.encoder.layer.8.attention.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 29337600
},
{
"name": "roberta.encoder.layer.8.attention.output.dense.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 29339136
},
{
"name": "roberta.encoder.layer.8.attention.output.dense.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 29340672
},
{
"name": "roberta.encoder.layer.8.attention.output.dense.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 29635584
},
{
"name": "roberta.encoder.layer.8.attention.self.key.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 29672448
},
{
"name": "roberta.encoder.layer.8.attention.self.key.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 29673984
},
{
"name": "roberta.encoder.layer.8.attention.self.key.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 29968896
},
{
"name": "roberta.encoder.layer.8.attention.self.query.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 30005760
},
{
"name": "roberta.encoder.layer.8.attention.self.query.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 30007296
},
{
"name": "roberta.encoder.layer.8.attention.self.query.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 30302208
},
{
"name": "roberta.encoder.layer.8.attention.self.value.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 30339072
},
{
"name": "roberta.encoder.layer.8.attention.self.value.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 30340608
},
{
"name": "roberta.encoder.layer.8.attention.self.value.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 30635520
},
{
"name": "roberta.encoder.layer.8.intermediate.dense.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 30672384
},
{
"name": "roberta.encoder.layer.8.intermediate.dense.q_weight",
"shape": [
3072,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 30678528
},
{
"name": "roberta.encoder.layer.8.intermediate.dense.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 31858176
},
{
"name": "roberta.encoder.layer.8.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 32005632
},
{
"name": "roberta.encoder.layer.8.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 32007168
},
{
"name": "roberta.encoder.layer.8.output.dense.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 32008704
},
{
"name": "roberta.encoder.layer.8.output.dense.q_weight",
"shape": [
768,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 32010240
},
{
"name": "roberta.encoder.layer.8.output.dense.q_scale",
"shape": [
768,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33189888
},
{
"name": "roberta.encoder.layer.9.attention.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 33337344
},
{
"name": "roberta.encoder.layer.9.attention.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 33338880
},
{
"name": "roberta.encoder.layer.9.attention.output.dense.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 33340416
}
],
"md5sum": "8b0fd20e5a0855fa66c9117ece20708e"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 3996672,
"records": [
{
"name": "roberta.encoder.layer.9.attention.output.dense.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 0
},
{
"name": "roberta.encoder.layer.9.attention.output.dense.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 294912
},
{
"name": "roberta.encoder.layer.9.attention.self.key.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 331776
},
{
"name": "roberta.encoder.layer.9.attention.self.key.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 333312
},
{
"name": "roberta.encoder.layer.9.attention.self.key.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 628224
},
{
"name": "roberta.encoder.layer.9.attention.self.query.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 665088
},
{
"name": "roberta.encoder.layer.9.attention.self.query.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 666624
},
{
"name": "roberta.encoder.layer.9.attention.self.query.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 961536
},
{
"name": "roberta.encoder.layer.9.attention.self.value.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 998400
},
{
"name": "roberta.encoder.layer.9.attention.self.value.q_weight",
"shape": [
768,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 999936
},
{
"name": "roberta.encoder.layer.9.attention.self.value.q_scale",
"shape": [
768,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 36864,
"byteOffset": 1294848
},
{
"name": "roberta.encoder.layer.9.intermediate.dense.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 1331712
},
{
"name": "roberta.encoder.layer.9.intermediate.dense.q_weight",
"shape": [
3072,
96
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 1337856
},
{
"name": "roberta.encoder.layer.9.intermediate.dense.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 2517504
},
{
"name": "roberta.encoder.layer.9.output.LayerNorm.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 2664960
},
{
"name": "roberta.encoder.layer.9.output.LayerNorm.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 2666496
},
{
"name": "roberta.encoder.layer.9.output.dense.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 2668032
},
{
"name": "roberta.encoder.layer.9.output.dense.q_weight",
"shape": [
768,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 2669568
},
{
"name": "roberta.encoder.layer.9.output.dense.q_scale",
"shape": [
768,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 3849216
}
],
"md5sum": "1866e62a76c020b89886cd93720b6256"
}
]
}