{ "metadata": { "ParamSize": 278, "ParamBytes": 70289572.0, "BitsPerParam": 4.511266288677072 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 32950948, "records": [ { "name": "classifier.dense.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 0 }, { "name": "classifier.dense.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 1536 }, { "name": "classifier.dense.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 296448 }, { "name": "classifier.out_proj.bias", "shape": [ 2 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4, "byteOffset": 333312 }, { "name": "classifier.out_proj.q_weight", "shape": [ 2, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 333316 }, { "name": "classifier.out_proj.q_scale", "shape": [ 2, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 96, "byteOffset": 334084 }, { "name": "roberta.embeddings.LayerNorm.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 334180 }, { "name": "roberta.embeddings.LayerNorm.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 335716 }, { "name": "roberta.embeddings.position_embeddings.q_weight", "shape": [ 514, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 197376, "byteOffset": 337252 }, { "name": "roberta.embeddings.position_embeddings.q_scale", "shape": [ 514, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24672, "byteOffset": 534628 }, { "name": "roberta.embeddings.token_type_embeddings.q_weight", "shape": [ 1, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 384, "byteOffset": 559300 }, { "name": "roberta.embeddings.token_type_embeddings.q_scale", "shape": [ 1, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 48, "byteOffset": 559684 }, { "name": "roberta.embeddings.word_embeddings.q_weight", "shape": [ 50265, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19301760, "byteOffset": 559732 }, { "name": "roberta.embeddings.word_embeddings.q_scale", "shape": [ 50265, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2412720, "byteOffset": 19861492 }, { "name": "roberta.encoder.layer.0.attention.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 22274212 }, { "name": "roberta.encoder.layer.0.attention.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 22275748 }, { "name": "roberta.encoder.layer.0.attention.output.dense.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 22277284 }, { "name": "roberta.encoder.layer.0.attention.output.dense.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 22278820 }, { "name": "roberta.encoder.layer.0.attention.output.dense.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 22573732 }, { "name": "roberta.encoder.layer.0.attention.self.key.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 22610596 }, { "name": "roberta.encoder.layer.0.attention.self.key.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 22612132 }, { "name": "roberta.encoder.layer.0.attention.self.key.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 22907044 }, { "name": "roberta.encoder.layer.0.attention.self.query.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 22943908 }, { "name": "roberta.encoder.layer.0.attention.self.query.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 22945444 }, { "name": "roberta.encoder.layer.0.attention.self.query.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 23240356 }, { "name": "roberta.encoder.layer.0.attention.self.value.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 23277220 }, { "name": "roberta.encoder.layer.0.attention.self.value.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 23278756 }, { "name": "roberta.encoder.layer.0.attention.self.value.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 23573668 }, { "name": "roberta.encoder.layer.0.intermediate.dense.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23610532 }, { "name": "roberta.encoder.layer.0.intermediate.dense.q_weight", "shape": [ 3072, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 23616676 }, { "name": "roberta.encoder.layer.0.intermediate.dense.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 24796324 }, { "name": "roberta.encoder.layer.0.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 24943780 }, { "name": "roberta.encoder.layer.0.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 24945316 }, { "name": "roberta.encoder.layer.0.output.dense.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 24946852 }, { "name": "roberta.encoder.layer.0.output.dense.q_weight", "shape": [ 768, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 24948388 }, { "name": "roberta.encoder.layer.0.output.dense.q_scale", "shape": [ 768, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26128036 }, { "name": "roberta.encoder.layer.1.attention.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 26275492 }, { "name": "roberta.encoder.layer.1.attention.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 26277028 }, { "name": "roberta.encoder.layer.1.attention.output.dense.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 26278564 }, { "name": "roberta.encoder.layer.1.attention.output.dense.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 26280100 }, { "name": "roberta.encoder.layer.1.attention.output.dense.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 26575012 }, { "name": "roberta.encoder.layer.1.attention.self.key.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 26611876 }, { "name": "roberta.encoder.layer.1.attention.self.key.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 26613412 }, { "name": "roberta.encoder.layer.1.attention.self.key.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 26908324 }, { "name": "roberta.encoder.layer.1.attention.self.query.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 26945188 }, { "name": "roberta.encoder.layer.1.attention.self.query.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 26946724 }, { "name": "roberta.encoder.layer.1.attention.self.query.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 27241636 }, { "name": "roberta.encoder.layer.1.attention.self.value.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 27278500 }, { "name": "roberta.encoder.layer.1.attention.self.value.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 27280036 }, { "name": "roberta.encoder.layer.1.attention.self.value.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 27574948 }, { "name": "roberta.encoder.layer.1.intermediate.dense.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 27611812 }, { "name": "roberta.encoder.layer.1.intermediate.dense.q_weight", "shape": [ 3072, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 27617956 }, { "name": "roberta.encoder.layer.1.intermediate.dense.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 28797604 }, { "name": "roberta.encoder.layer.1.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 28945060 }, { "name": "roberta.encoder.layer.1.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 28946596 }, { "name": "roberta.encoder.layer.1.output.dense.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 28948132 }, { "name": "roberta.encoder.layer.1.output.dense.q_weight", "shape": [ 768, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 28949668 }, { "name": "roberta.encoder.layer.1.output.dense.q_scale", "shape": [ 768, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 30129316 }, { "name": "roberta.encoder.layer.10.attention.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 30276772 }, { "name": "roberta.encoder.layer.10.attention.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 30278308 }, { "name": "roberta.encoder.layer.10.attention.output.dense.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 30279844 }, { "name": "roberta.encoder.layer.10.attention.output.dense.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 30281380 }, { "name": "roberta.encoder.layer.10.attention.output.dense.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 30576292 }, { "name": "roberta.encoder.layer.10.attention.self.key.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 30613156 }, { "name": "roberta.encoder.layer.10.attention.self.key.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 30614692 }, { "name": "roberta.encoder.layer.10.attention.self.key.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 30909604 }, { "name": "roberta.encoder.layer.10.attention.self.query.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 30946468 }, { "name": "roberta.encoder.layer.10.attention.self.query.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 30948004 }, { "name": "roberta.encoder.layer.10.attention.self.query.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 31242916 }, { "name": "roberta.encoder.layer.10.attention.self.value.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 31279780 }, { "name": "roberta.encoder.layer.10.attention.self.value.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 31281316 }, { "name": "roberta.encoder.layer.10.attention.self.value.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 31576228 }, { "name": "roberta.encoder.layer.10.intermediate.dense.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31613092 }, { "name": "roberta.encoder.layer.10.intermediate.dense.q_weight", "shape": [ 3072, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 31619236 }, { "name": "roberta.encoder.layer.10.intermediate.dense.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 32798884 }, { "name": "roberta.encoder.layer.10.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 32946340 }, { "name": "roberta.encoder.layer.10.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 32947876 }, { "name": "roberta.encoder.layer.10.output.dense.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 32949412 } ], "md5sum": "e8e1b5159d63050d7bbc3a3cff38c00e" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 33341952, "records": [ { "name": "roberta.encoder.layer.10.output.dense.q_weight", "shape": [ 768, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 0 }, { "name": "roberta.encoder.layer.10.output.dense.q_scale", "shape": [ 768, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 1179648 }, { "name": "roberta.encoder.layer.11.attention.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 1327104 }, { "name": "roberta.encoder.layer.11.attention.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 1328640 }, { "name": "roberta.encoder.layer.11.attention.output.dense.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 1330176 }, { "name": "roberta.encoder.layer.11.attention.output.dense.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 1331712 }, { "name": "roberta.encoder.layer.11.attention.output.dense.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 1626624 }, { "name": "roberta.encoder.layer.11.attention.self.key.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 1663488 }, { "name": "roberta.encoder.layer.11.attention.self.key.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 1665024 }, { "name": "roberta.encoder.layer.11.attention.self.key.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 1959936 }, { "name": "roberta.encoder.layer.11.attention.self.query.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 1996800 }, { "name": "roberta.encoder.layer.11.attention.self.query.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 1998336 }, { "name": "roberta.encoder.layer.11.attention.self.query.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 2293248 }, { "name": "roberta.encoder.layer.11.attention.self.value.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 2330112 }, { "name": "roberta.encoder.layer.11.attention.self.value.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 2331648 }, { "name": "roberta.encoder.layer.11.attention.self.value.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 2626560 }, { "name": "roberta.encoder.layer.11.intermediate.dense.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 2663424 }, { "name": "roberta.encoder.layer.11.intermediate.dense.q_weight", "shape": [ 3072, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 2669568 }, { "name": "roberta.encoder.layer.11.intermediate.dense.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 3849216 }, { "name": "roberta.encoder.layer.11.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 3996672 }, { "name": "roberta.encoder.layer.11.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 3998208 }, { "name": "roberta.encoder.layer.11.output.dense.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 3999744 }, { "name": "roberta.encoder.layer.11.output.dense.q_weight", "shape": [ 768, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 4001280 }, { "name": "roberta.encoder.layer.11.output.dense.q_scale", "shape": [ 768, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 5180928 }, { "name": "roberta.encoder.layer.2.attention.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 5328384 }, { "name": "roberta.encoder.layer.2.attention.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 5329920 }, { "name": "roberta.encoder.layer.2.attention.output.dense.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 5331456 }, { "name": "roberta.encoder.layer.2.attention.output.dense.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 5332992 }, { "name": "roberta.encoder.layer.2.attention.output.dense.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 5627904 }, { "name": "roberta.encoder.layer.2.attention.self.key.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 5664768 }, { "name": "roberta.encoder.layer.2.attention.self.key.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 5666304 }, { "name": "roberta.encoder.layer.2.attention.self.key.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 5961216 }, { "name": "roberta.encoder.layer.2.attention.self.query.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 5998080 }, { "name": "roberta.encoder.layer.2.attention.self.query.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 5999616 }, { "name": "roberta.encoder.layer.2.attention.self.query.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 6294528 }, { "name": "roberta.encoder.layer.2.attention.self.value.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 6331392 }, { "name": "roberta.encoder.layer.2.attention.self.value.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 6332928 }, { "name": "roberta.encoder.layer.2.attention.self.value.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 6627840 }, { "name": "roberta.encoder.layer.2.intermediate.dense.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 6664704 }, { "name": "roberta.encoder.layer.2.intermediate.dense.q_weight", "shape": [ 3072, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 6670848 }, { "name": "roberta.encoder.layer.2.intermediate.dense.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 7850496 }, { "name": "roberta.encoder.layer.2.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 7997952 }, { "name": "roberta.encoder.layer.2.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 7999488 }, { "name": "roberta.encoder.layer.2.output.dense.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 8001024 }, { "name": "roberta.encoder.layer.2.output.dense.q_weight", "shape": [ 768, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 8002560 }, { "name": "roberta.encoder.layer.2.output.dense.q_scale", "shape": [ 768, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 9182208 }, { "name": "roberta.encoder.layer.3.attention.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 9329664 }, { "name": "roberta.encoder.layer.3.attention.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 9331200 }, { "name": "roberta.encoder.layer.3.attention.output.dense.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 9332736 }, { "name": "roberta.encoder.layer.3.attention.output.dense.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 9334272 }, { "name": "roberta.encoder.layer.3.attention.output.dense.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 9629184 }, { "name": "roberta.encoder.layer.3.attention.self.key.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 9666048 }, { "name": "roberta.encoder.layer.3.attention.self.key.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 9667584 }, { "name": "roberta.encoder.layer.3.attention.self.key.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 9962496 }, { "name": "roberta.encoder.layer.3.attention.self.query.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 9999360 }, { "name": "roberta.encoder.layer.3.attention.self.query.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 10000896 }, { "name": "roberta.encoder.layer.3.attention.self.query.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 10295808 }, { "name": "roberta.encoder.layer.3.attention.self.value.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 10332672 }, { "name": "roberta.encoder.layer.3.attention.self.value.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 10334208 }, { "name": "roberta.encoder.layer.3.attention.self.value.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 10629120 }, { "name": "roberta.encoder.layer.3.intermediate.dense.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 10665984 }, { "name": "roberta.encoder.layer.3.intermediate.dense.q_weight", "shape": [ 3072, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 10672128 }, { "name": "roberta.encoder.layer.3.intermediate.dense.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 11851776 }, { "name": "roberta.encoder.layer.3.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 11999232 }, { "name": "roberta.encoder.layer.3.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 12000768 }, { "name": "roberta.encoder.layer.3.output.dense.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 12002304 }, { "name": "roberta.encoder.layer.3.output.dense.q_weight", "shape": [ 768, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 12003840 }, { "name": "roberta.encoder.layer.3.output.dense.q_scale", "shape": [ 768, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 13183488 }, { "name": "roberta.encoder.layer.4.attention.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 13330944 }, { "name": "roberta.encoder.layer.4.attention.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 13332480 }, { "name": "roberta.encoder.layer.4.attention.output.dense.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 13334016 }, { "name": "roberta.encoder.layer.4.attention.output.dense.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 13335552 }, { "name": "roberta.encoder.layer.4.attention.output.dense.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 13630464 }, { "name": "roberta.encoder.layer.4.attention.self.key.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 13667328 }, { "name": "roberta.encoder.layer.4.attention.self.key.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 13668864 }, { "name": "roberta.encoder.layer.4.attention.self.key.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 13963776 }, { "name": "roberta.encoder.layer.4.attention.self.query.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 14000640 }, { "name": "roberta.encoder.layer.4.attention.self.query.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 14002176 }, { "name": "roberta.encoder.layer.4.attention.self.query.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 14297088 }, { "name": "roberta.encoder.layer.4.attention.self.value.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 14333952 }, { "name": "roberta.encoder.layer.4.attention.self.value.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 14335488 }, { "name": "roberta.encoder.layer.4.attention.self.value.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 14630400 }, { "name": "roberta.encoder.layer.4.intermediate.dense.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 14667264 }, { "name": "roberta.encoder.layer.4.intermediate.dense.q_weight", "shape": [ 3072, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 14673408 }, { "name": "roberta.encoder.layer.4.intermediate.dense.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 15853056 }, { "name": "roberta.encoder.layer.4.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 16000512 }, { "name": "roberta.encoder.layer.4.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 16002048 }, { "name": "roberta.encoder.layer.4.output.dense.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 16003584 }, { "name": "roberta.encoder.layer.4.output.dense.q_weight", "shape": [ 768, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 16005120 }, { "name": "roberta.encoder.layer.4.output.dense.q_scale", "shape": [ 768, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 17184768 }, { "name": "roberta.encoder.layer.5.attention.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 17332224 }, { "name": "roberta.encoder.layer.5.attention.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 17333760 }, { "name": "roberta.encoder.layer.5.attention.output.dense.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 17335296 }, { "name": "roberta.encoder.layer.5.attention.output.dense.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 17336832 }, { "name": "roberta.encoder.layer.5.attention.output.dense.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 17631744 }, { "name": "roberta.encoder.layer.5.attention.self.key.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 17668608 }, { "name": "roberta.encoder.layer.5.attention.self.key.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 17670144 }, { "name": "roberta.encoder.layer.5.attention.self.key.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 17965056 }, { "name": "roberta.encoder.layer.5.attention.self.query.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 18001920 }, { "name": "roberta.encoder.layer.5.attention.self.query.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 18003456 }, { "name": "roberta.encoder.layer.5.attention.self.query.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 18298368 }, { "name": "roberta.encoder.layer.5.attention.self.value.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 18335232 }, { "name": "roberta.encoder.layer.5.attention.self.value.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 18336768 }, { "name": "roberta.encoder.layer.5.attention.self.value.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 18631680 }, { "name": "roberta.encoder.layer.5.intermediate.dense.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 18668544 }, { "name": "roberta.encoder.layer.5.intermediate.dense.q_weight", "shape": [ 3072, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 18674688 }, { "name": "roberta.encoder.layer.5.intermediate.dense.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 19854336 }, { "name": "roberta.encoder.layer.5.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 20001792 }, { "name": "roberta.encoder.layer.5.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 20003328 }, { "name": "roberta.encoder.layer.5.output.dense.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 20004864 }, { "name": "roberta.encoder.layer.5.output.dense.q_weight", "shape": [ 768, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 20006400 }, { "name": "roberta.encoder.layer.5.output.dense.q_scale", "shape": [ 768, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 21186048 }, { "name": "roberta.encoder.layer.6.attention.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 21333504 }, { "name": "roberta.encoder.layer.6.attention.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 21335040 }, { "name": "roberta.encoder.layer.6.attention.output.dense.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 21336576 }, { "name": "roberta.encoder.layer.6.attention.output.dense.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 21338112 }, { "name": "roberta.encoder.layer.6.attention.output.dense.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 21633024 }, { "name": "roberta.encoder.layer.6.attention.self.key.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 21669888 }, { "name": "roberta.encoder.layer.6.attention.self.key.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 21671424 }, { "name": "roberta.encoder.layer.6.attention.self.key.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 21966336 }, { "name": "roberta.encoder.layer.6.attention.self.query.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 22003200 }, { "name": "roberta.encoder.layer.6.attention.self.query.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 22004736 }, { "name": "roberta.encoder.layer.6.attention.self.query.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 22299648 }, { "name": "roberta.encoder.layer.6.attention.self.value.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 22336512 }, { "name": "roberta.encoder.layer.6.attention.self.value.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 22338048 }, { "name": "roberta.encoder.layer.6.attention.self.value.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 22632960 }, { "name": "roberta.encoder.layer.6.intermediate.dense.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 22669824 }, { "name": "roberta.encoder.layer.6.intermediate.dense.q_weight", "shape": [ 3072, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 22675968 }, { "name": "roberta.encoder.layer.6.intermediate.dense.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 23855616 }, { "name": "roberta.encoder.layer.6.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 24003072 }, { "name": "roberta.encoder.layer.6.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 24004608 }, { "name": "roberta.encoder.layer.6.output.dense.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 24006144 }, { "name": "roberta.encoder.layer.6.output.dense.q_weight", "shape": [ 768, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 24007680 }, { "name": "roberta.encoder.layer.6.output.dense.q_scale", "shape": [ 768, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 25187328 }, { "name": "roberta.encoder.layer.7.attention.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 25334784 }, { "name": "roberta.encoder.layer.7.attention.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 25336320 }, { "name": "roberta.encoder.layer.7.attention.output.dense.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 25337856 }, { "name": "roberta.encoder.layer.7.attention.output.dense.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 25339392 }, { "name": "roberta.encoder.layer.7.attention.output.dense.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 25634304 }, { "name": "roberta.encoder.layer.7.attention.self.key.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 25671168 }, { "name": "roberta.encoder.layer.7.attention.self.key.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 25672704 }, { "name": "roberta.encoder.layer.7.attention.self.key.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 25967616 }, { "name": "roberta.encoder.layer.7.attention.self.query.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 26004480 }, { "name": "roberta.encoder.layer.7.attention.self.query.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 26006016 }, { "name": "roberta.encoder.layer.7.attention.self.query.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 26300928 }, { "name": "roberta.encoder.layer.7.attention.self.value.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 26337792 }, { "name": "roberta.encoder.layer.7.attention.self.value.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 26339328 }, { "name": "roberta.encoder.layer.7.attention.self.value.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 26634240 }, { "name": "roberta.encoder.layer.7.intermediate.dense.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26671104 }, { "name": "roberta.encoder.layer.7.intermediate.dense.q_weight", "shape": [ 3072, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 26677248 }, { "name": "roberta.encoder.layer.7.intermediate.dense.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 27856896 }, { "name": "roberta.encoder.layer.7.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 28004352 }, { "name": "roberta.encoder.layer.7.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 28005888 }, { "name": "roberta.encoder.layer.7.output.dense.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 28007424 }, { "name": "roberta.encoder.layer.7.output.dense.q_weight", "shape": [ 768, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 28008960 }, { "name": "roberta.encoder.layer.7.output.dense.q_scale", "shape": [ 768, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 29188608 }, { "name": "roberta.encoder.layer.8.attention.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 29336064 }, { "name": "roberta.encoder.layer.8.attention.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 29337600 }, { "name": "roberta.encoder.layer.8.attention.output.dense.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 29339136 }, { "name": "roberta.encoder.layer.8.attention.output.dense.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 29340672 }, { "name": "roberta.encoder.layer.8.attention.output.dense.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 29635584 }, { "name": "roberta.encoder.layer.8.attention.self.key.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 29672448 }, { "name": "roberta.encoder.layer.8.attention.self.key.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 29673984 }, { "name": "roberta.encoder.layer.8.attention.self.key.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 29968896 }, { "name": "roberta.encoder.layer.8.attention.self.query.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 30005760 }, { "name": "roberta.encoder.layer.8.attention.self.query.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 30007296 }, { "name": "roberta.encoder.layer.8.attention.self.query.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 30302208 }, { "name": "roberta.encoder.layer.8.attention.self.value.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 30339072 }, { "name": "roberta.encoder.layer.8.attention.self.value.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 30340608 }, { "name": "roberta.encoder.layer.8.attention.self.value.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 30635520 }, { "name": "roberta.encoder.layer.8.intermediate.dense.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 30672384 }, { "name": "roberta.encoder.layer.8.intermediate.dense.q_weight", "shape": [ 3072, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 30678528 }, { "name": "roberta.encoder.layer.8.intermediate.dense.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 31858176 }, { "name": "roberta.encoder.layer.8.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 32005632 }, { "name": "roberta.encoder.layer.8.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 32007168 }, { "name": "roberta.encoder.layer.8.output.dense.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 32008704 }, { "name": "roberta.encoder.layer.8.output.dense.q_weight", "shape": [ 768, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 32010240 }, { "name": "roberta.encoder.layer.8.output.dense.q_scale", "shape": [ 768, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 33189888 }, { "name": "roberta.encoder.layer.9.attention.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 33337344 }, { "name": "roberta.encoder.layer.9.attention.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 33338880 }, { "name": "roberta.encoder.layer.9.attention.output.dense.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 33340416 } ], "md5sum": "8b0fd20e5a0855fa66c9117ece20708e" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 3996672, "records": [ { "name": "roberta.encoder.layer.9.attention.output.dense.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 0 }, { "name": "roberta.encoder.layer.9.attention.output.dense.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 294912 }, { "name": "roberta.encoder.layer.9.attention.self.key.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 331776 }, { "name": "roberta.encoder.layer.9.attention.self.key.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 333312 }, { "name": "roberta.encoder.layer.9.attention.self.key.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 628224 }, { "name": "roberta.encoder.layer.9.attention.self.query.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 665088 }, { "name": "roberta.encoder.layer.9.attention.self.query.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 666624 }, { "name": "roberta.encoder.layer.9.attention.self.query.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 961536 }, { "name": "roberta.encoder.layer.9.attention.self.value.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 998400 }, { "name": "roberta.encoder.layer.9.attention.self.value.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 999936 }, { "name": "roberta.encoder.layer.9.attention.self.value.q_scale", "shape": [ 768, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 1294848 }, { "name": "roberta.encoder.layer.9.intermediate.dense.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 1331712 }, { "name": "roberta.encoder.layer.9.intermediate.dense.q_weight", "shape": [ 3072, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 1337856 }, { "name": "roberta.encoder.layer.9.intermediate.dense.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 2517504 }, { "name": "roberta.encoder.layer.9.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 2664960 }, { "name": "roberta.encoder.layer.9.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 2666496 }, { "name": "roberta.encoder.layer.9.output.dense.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 2668032 }, { "name": "roberta.encoder.layer.9.output.dense.q_weight", "shape": [ 768, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 2669568 }, { "name": "roberta.encoder.layer.9.output.dense.q_scale", "shape": [ 768, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 3849216 } ], "md5sum": "1866e62a76c020b89886cd93720b6256" } ] }