| { | |
| "metadata": { | |
| "ParamSize": 201, | |
| "ParamBytes": 498588680.0, | |
| "BitsPerParam": 32.0 | |
| }, | |
| "records": [ | |
| { | |
| "dataPath": "params_shard_0.bin", | |
| "format": "raw-shard", | |
| "nbytes": 77207040, | |
| "records": [ | |
| { | |
| "name": "roberta.embeddings.word_embeddings.weight", | |
| "shape": [ | |
| 50265, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 77207040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4a61cba31613349f9ffc22e1606a39c1" | |
| }, | |
| { | |
| "dataPath": "params_shard_1.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32696836, | |
| "records": [ | |
| { | |
| "name": "classifier.dense.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "classifier.dense.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 1536 | |
| }, | |
| { | |
| "name": "classifier.out_proj.bias", | |
| "shape": [ | |
| 2 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4, | |
| "byteOffset": 1181184 | |
| }, | |
| { | |
| "name": "classifier.out_proj.weight", | |
| "shape": [ | |
| 2, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3072, | |
| "byteOffset": 1181188 | |
| }, | |
| { | |
| "name": "roberta.embeddings.LayerNorm.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 1184260 | |
| }, | |
| { | |
| "name": "roberta.embeddings.LayerNorm.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 1185796 | |
| }, | |
| { | |
| "name": "roberta.embeddings.position_embeddings.weight", | |
| "shape": [ | |
| 514, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 789504, | |
| "byteOffset": 1187332 | |
| }, | |
| { | |
| "name": "roberta.embeddings.token_type_embeddings.weight", | |
| "shape": [ | |
| 1, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 1976836 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.0.attention.output.LayerNorm.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 1978372 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.0.attention.output.LayerNorm.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 1979908 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.0.attention.output.dense.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 1981444 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.0.attention.output.dense.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 1982980 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.0.attention.self.key.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 3162628 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.0.attention.self.key.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 3164164 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.0.attention.self.query.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 4343812 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.0.attention.self.query.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 4345348 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.0.attention.self.value.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 5524996 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.0.attention.self.value.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 5526532 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.0.intermediate.dense.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 6706180 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.0.intermediate.dense.weight", | |
| "shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 6712324 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.0.output.LayerNorm.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 11430916 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.0.output.LayerNorm.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 11432452 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.0.output.dense.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 11433988 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.0.output.dense.weight", | |
| "shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 11435524 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.1.attention.output.LayerNorm.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 16154116 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.1.attention.output.LayerNorm.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 16155652 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.1.attention.output.dense.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 16157188 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.1.attention.output.dense.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 16158724 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.1.attention.self.key.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 17338372 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.1.attention.self.key.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 17339908 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.1.attention.self.query.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 18519556 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.1.attention.self.query.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 18521092 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.1.attention.self.value.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 19700740 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.1.attention.self.value.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 19702276 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.1.intermediate.dense.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 20881924 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.1.intermediate.dense.weight", | |
| "shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 20888068 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.1.output.LayerNorm.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 25606660 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.1.output.LayerNorm.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 25608196 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.1.output.dense.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 25609732 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.1.output.dense.weight", | |
| "shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 25611268 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.10.attention.output.LayerNorm.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 30329860 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.10.attention.output.LayerNorm.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 30331396 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.10.attention.output.dense.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 30332932 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.10.attention.output.dense.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 30334468 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.10.attention.self.key.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 31514116 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.10.attention.self.key.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 31515652 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.10.attention.self.query.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 32695300 | |
| } | |
| ], | |
| "md5sum": "bb1b378319baec7f1ca617bf8f86ff32" | |
| }, | |
| { | |
| "dataPath": "params_shard_2.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30718464, | |
| "records": [ | |
| { | |
| "name": "roberta.encoder.layer.10.attention.self.query.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.10.attention.self.value.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 1179648 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.10.attention.self.value.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 1181184 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.10.intermediate.dense.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 2360832 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.10.intermediate.dense.weight", | |
| "shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 2366976 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.10.output.LayerNorm.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 7085568 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.10.output.LayerNorm.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 7087104 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.10.output.dense.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 7088640 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.10.output.dense.weight", | |
| "shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 7090176 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.11.attention.output.LayerNorm.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 11808768 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.11.attention.output.LayerNorm.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 11810304 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.11.attention.output.dense.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 11811840 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.11.attention.output.dense.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 11813376 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.11.attention.self.key.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 12993024 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.11.attention.self.key.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 12994560 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.11.attention.self.query.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 14174208 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.11.attention.self.query.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 14175744 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.11.attention.self.value.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 15355392 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.11.attention.self.value.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 15356928 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.11.intermediate.dense.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 16536576 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.11.intermediate.dense.weight", | |
| "shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 16542720 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.11.output.LayerNorm.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 21261312 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.11.output.LayerNorm.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 21262848 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.11.output.dense.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 21264384 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.11.output.dense.weight", | |
| "shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 21265920 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.2.attention.output.LayerNorm.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 25984512 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.2.attention.output.LayerNorm.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 25986048 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.2.attention.output.dense.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 25987584 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.2.attention.output.dense.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 25989120 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.2.attention.self.key.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 27168768 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.2.attention.self.key.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 27170304 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.2.attention.self.query.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 28349952 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.2.attention.self.query.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 28351488 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.2.attention.self.value.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 29531136 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.2.attention.self.value.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 29532672 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.2.intermediate.dense.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 30712320 | |
| } | |
| ], | |
| "md5sum": "894bfb02ebd46757ea2fa28fdc8f9079" | |
| }, | |
| { | |
| "dataPath": "params_shard_3.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33074688, | |
| "records": [ | |
| { | |
| "name": "roberta.encoder.layer.2.intermediate.dense.weight", | |
| "shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.2.output.LayerNorm.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 4718592 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.2.output.LayerNorm.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 4720128 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.2.output.dense.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 4721664 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.2.output.dense.weight", | |
| "shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 4723200 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.3.attention.output.LayerNorm.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 9441792 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.3.attention.output.LayerNorm.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 9443328 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.3.attention.output.dense.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 9444864 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.3.attention.output.dense.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 9446400 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.3.attention.self.key.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 10626048 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.3.attention.self.key.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 10627584 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.3.attention.self.query.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 11807232 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.3.attention.self.query.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 11808768 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.3.attention.self.value.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 12988416 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.3.attention.self.value.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 12989952 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.3.intermediate.dense.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 14169600 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.3.intermediate.dense.weight", | |
| "shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 14175744 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.3.output.LayerNorm.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 18894336 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.3.output.LayerNorm.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 18895872 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.3.output.dense.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 18897408 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.3.output.dense.weight", | |
| "shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 18898944 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.4.attention.output.LayerNorm.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 23617536 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.4.attention.output.LayerNorm.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 23619072 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.4.attention.output.dense.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 23620608 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.4.attention.output.dense.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 23622144 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.4.attention.self.key.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24801792 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.4.attention.self.key.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 24803328 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.4.attention.self.query.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 25982976 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.4.attention.self.query.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 25984512 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.4.attention.self.value.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 27164160 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.4.attention.self.value.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 27165696 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.4.intermediate.dense.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 28345344 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.4.intermediate.dense.weight", | |
| "shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 28351488 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.4.output.LayerNorm.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 33070080 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.4.output.LayerNorm.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 33071616 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.4.output.dense.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 33073152 | |
| } | |
| ], | |
| "md5sum": "0204eb9ffd7d0691cdba514cfc076112" | |
| }, | |
| { | |
| "dataPath": "params_shard_4.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33074688, | |
| "records": [ | |
| { | |
| "name": "roberta.encoder.layer.4.output.dense.weight", | |
| "shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.5.attention.output.LayerNorm.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 4718592 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.5.attention.output.LayerNorm.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 4720128 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.5.attention.output.dense.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 4721664 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.5.attention.output.dense.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 4723200 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.5.attention.self.key.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 5902848 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.5.attention.self.key.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 5904384 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.5.attention.self.query.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 7084032 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.5.attention.self.query.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 7085568 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.5.attention.self.value.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 8265216 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.5.attention.self.value.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 8266752 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.5.intermediate.dense.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 9446400 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.5.intermediate.dense.weight", | |
| "shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 9452544 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.5.output.LayerNorm.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 14171136 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.5.output.LayerNorm.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 14172672 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.5.output.dense.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 14174208 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.5.output.dense.weight", | |
| "shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 14175744 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.6.attention.output.LayerNorm.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 18894336 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.6.attention.output.LayerNorm.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 18895872 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.6.attention.output.dense.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 18897408 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.6.attention.output.dense.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 18898944 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.6.attention.self.key.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 20078592 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.6.attention.self.key.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 20080128 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.6.attention.self.query.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 21259776 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.6.attention.self.query.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 21261312 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.6.attention.self.value.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 22440960 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.6.attention.self.value.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 22442496 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.6.intermediate.dense.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 23622144 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.6.intermediate.dense.weight", | |
| "shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 23628288 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.6.output.LayerNorm.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 28346880 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.6.output.LayerNorm.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 28348416 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.6.output.dense.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 28349952 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.6.output.dense.weight", | |
| "shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 28351488 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.7.attention.output.LayerNorm.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 33070080 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.7.attention.output.LayerNorm.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 33071616 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.7.attention.output.dense.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 33073152 | |
| } | |
| ], | |
| "md5sum": "f2720ca40469da131d390ea8694443fd" | |
| }, | |
| { | |
| "dataPath": "params_shard_5.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33080832, | |
| "records": [ | |
| { | |
| "name": "roberta.encoder.layer.7.attention.output.dense.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.7.attention.self.key.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 1179648 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.7.attention.self.key.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 1181184 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.7.attention.self.query.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 2360832 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.7.attention.self.query.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 2362368 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.7.attention.self.value.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 3542016 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.7.attention.self.value.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 3543552 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.7.intermediate.dense.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 4723200 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.7.intermediate.dense.weight", | |
| "shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 4729344 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.7.output.LayerNorm.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 9447936 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.7.output.LayerNorm.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 9449472 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.7.output.dense.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 9451008 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.7.output.dense.weight", | |
| "shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 9452544 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.8.attention.output.LayerNorm.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 14171136 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.8.attention.output.LayerNorm.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 14172672 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.8.attention.output.dense.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 14174208 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.8.attention.output.dense.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 14175744 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.8.attention.self.key.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 15355392 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.8.attention.self.key.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 15356928 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.8.attention.self.query.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 16536576 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.8.attention.self.query.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 16538112 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.8.attention.self.value.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 17717760 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.8.attention.self.value.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 17719296 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.8.intermediate.dense.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 18898944 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.8.intermediate.dense.weight", | |
| "shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 18905088 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.8.output.LayerNorm.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 23623680 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.8.output.LayerNorm.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 23625216 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.8.output.dense.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 23626752 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.8.output.dense.weight", | |
| "shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 23628288 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.9.attention.output.LayerNorm.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 28346880 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.9.attention.output.LayerNorm.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 28348416 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.9.attention.output.dense.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 28349952 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.9.attention.output.dense.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 28351488 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.9.attention.self.key.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 29531136 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.9.attention.self.key.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 29532672 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.9.attention.self.query.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 30712320 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.9.attention.self.query.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 30713856 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.9.attention.self.value.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 31893504 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.9.attention.self.value.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 31895040 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.9.intermediate.dense.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 33074688 | |
| } | |
| ], | |
| "md5sum": "96b9d77f860a578f5bc124742a2447aa" | |
| }, | |
| { | |
| "dataPath": "params_shard_6.bin", | |
| "format": "raw-shard", | |
| "nbytes": 9441792, | |
| "records": [ | |
| { | |
| "name": "roberta.encoder.layer.9.intermediate.dense.weight", | |
| "shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.9.output.LayerNorm.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 4718592 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.9.output.LayerNorm.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 4720128 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.9.output.dense.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 4721664 | |
| }, | |
| { | |
| "name": "roberta.encoder.layer.9.output.dense.weight", | |
| "shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 4723200 | |
| } | |
| ], | |
| "md5sum": "369c5d4f9e3ffd5ee22f0023b2faba04" | |
| } | |
| ] | |
| } |