{ "metadata": { "ParamSize": 77, "ParamBytes": 90261504.0, "BitsPerParam": 32.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 33304320, "records": [ { "name": "embeddings.LayerNorm.bias", "shape": [ 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 0 }, { "name": "embeddings.LayerNorm.weight", "shape": [ 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 768 }, { "name": "embeddings.position_embeddings.weight", "shape": [ 512, 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 1536 }, { "name": "embeddings.token_type_embeddings.weight", "shape": [ 2, 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 394752 }, { "name": "embeddings.word_embeddings.weight", "shape": [ 30522, 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 23440896, "byteOffset": 396288 }, { "name": "encoder.layer.0.attention.output.LayerNorm.bias", "shape": [ 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 23837184 }, { "name": "encoder.layer.0.attention.output.LayerNorm.weight", "shape": [ 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 23837952 }, { "name": "encoder.layer.0.attention.output.dense.bias", "shape": [ 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 23838720 }, { "name": "encoder.layer.0.attention.output.dense.weight", "shape": [ 384, 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 23839488 }, { "name": "encoder.layer.0.attention.self.qkv.bias", "shape": [ 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 24134400 }, { "name": "encoder.layer.0.attention.self.qkv.weight", "shape": [ 1152, 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 884736, "byteOffset": 24136704 }, { "name": "encoder.layer.0.intermediate.dense.bias", "shape": [ 1536 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 25021440 }, { "name": "encoder.layer.0.intermediate.dense.weight", "shape": [ 1536, 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 25024512 }, { "name": "encoder.layer.0.output.LayerNorm.bias", "shape": [ 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 26204160 }, { "name": "encoder.layer.0.output.LayerNorm.weight", "shape": [ 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 26204928 }, { "name": "encoder.layer.0.output.dense.bias", "shape": [ 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 26205696 }, { "name": "encoder.layer.0.output.dense.weight", "shape": [ 384, 1536 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 26206464 }, { "name": "encoder.layer.1.attention.output.LayerNorm.bias", "shape": [ 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 27386112 }, { "name": "encoder.layer.1.attention.output.LayerNorm.weight", "shape": [ 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 27386880 }, { "name": "encoder.layer.1.attention.output.dense.bias", "shape": [ 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 27387648 }, { "name": "encoder.layer.1.attention.output.dense.weight", "shape": [ 384, 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 27388416 }, { "name": "encoder.layer.1.attention.self.qkv.bias", "shape": [ 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 27683328 }, { "name": "encoder.layer.1.attention.self.qkv.weight", "shape": [ 1152, 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 884736, "byteOffset": 27685632 }, { "name": "encoder.layer.1.intermediate.dense.bias", "shape": [ 1536 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 28570368 }, { "name": "encoder.layer.1.intermediate.dense.weight", "shape": [ 1536, 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 28573440 }, { "name": "encoder.layer.1.output.LayerNorm.bias", "shape": [ 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 29753088 }, { "name": "encoder.layer.1.output.LayerNorm.weight", "shape": [ 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 29753856 }, { "name": "encoder.layer.1.output.dense.bias", "shape": [ 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 29754624 }, { "name": "encoder.layer.1.output.dense.weight", "shape": [ 384, 1536 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 29755392 }, { "name": "encoder.layer.2.attention.output.LayerNorm.bias", "shape": [ 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 30935040 }, { "name": "encoder.layer.2.attention.output.LayerNorm.weight", "shape": [ 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 30935808 }, { "name": "encoder.layer.2.attention.output.dense.bias", "shape": [ 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 30936576 }, { "name": "encoder.layer.2.attention.output.dense.weight", "shape": [ 384, 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 30937344 }, { "name": "encoder.layer.2.attention.self.qkv.bias", "shape": [ 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 31232256 }, { "name": "encoder.layer.2.attention.self.qkv.weight", "shape": [ 1152, 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 884736, "byteOffset": 31234560 }, { "name": "encoder.layer.2.intermediate.dense.bias", "shape": [ 1536 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 32119296 }, { "name": "encoder.layer.2.intermediate.dense.weight", "shape": [ 1536, 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 32122368 }, { "name": "encoder.layer.2.output.LayerNorm.bias", "shape": [ 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 33302016 }, { "name": "encoder.layer.2.output.LayerNorm.weight", "shape": [ 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 33302784 }, { "name": "encoder.layer.2.output.dense.bias", "shape": [ 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 33303552 } ], "md5sum": "0edfb5a40fc667e51e9fd0c9bcfe9f80" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 11826432, "records": [ { "name": "encoder.layer.2.output.dense.weight", "shape": [ 384, 1536 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 0 }, { "name": "encoder.layer.3.attention.output.LayerNorm.bias", "shape": [ 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 1179648 }, { "name": "encoder.layer.3.attention.output.LayerNorm.weight", "shape": [ 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 1180416 }, { "name": "encoder.layer.3.attention.output.dense.bias", "shape": [ 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 1181184 }, { "name": "encoder.layer.3.attention.output.dense.weight", "shape": [ 384, 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 1181952 }, { "name": "encoder.layer.3.attention.self.qkv.bias", "shape": [ 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 1476864 }, { "name": "encoder.layer.3.attention.self.qkv.weight", "shape": [ 1152, 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 884736, "byteOffset": 1479168 }, { "name": "encoder.layer.3.intermediate.dense.bias", "shape": [ 1536 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 2363904 }, { "name": "encoder.layer.3.intermediate.dense.weight", "shape": [ 1536, 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 2366976 }, { "name": "encoder.layer.3.output.LayerNorm.bias", "shape": [ 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 3546624 }, { "name": "encoder.layer.3.output.LayerNorm.weight", "shape": [ 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 3547392 }, { "name": "encoder.layer.3.output.dense.bias", "shape": [ 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 3548160 }, { "name": "encoder.layer.3.output.dense.weight", "shape": [ 384, 1536 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 3548928 }, { "name": "encoder.layer.4.attention.output.LayerNorm.bias", "shape": [ 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 4728576 }, { "name": "encoder.layer.4.attention.output.LayerNorm.weight", "shape": [ 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 4729344 }, { "name": "encoder.layer.4.attention.output.dense.bias", "shape": [ 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 4730112 }, { "name": "encoder.layer.4.attention.output.dense.weight", "shape": [ 384, 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 4730880 }, { "name": "encoder.layer.4.attention.self.qkv.bias", "shape": [ 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 5025792 }, { "name": "encoder.layer.4.attention.self.qkv.weight", "shape": [ 1152, 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 884736, "byteOffset": 5028096 }, { "name": "encoder.layer.4.intermediate.dense.bias", "shape": [ 1536 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 5912832 }, { "name": "encoder.layer.4.intermediate.dense.weight", "shape": [ 1536, 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 5915904 }, { "name": "encoder.layer.4.output.LayerNorm.bias", "shape": [ 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 7095552 }, { "name": "encoder.layer.4.output.LayerNorm.weight", "shape": [ 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 7096320 }, { "name": "encoder.layer.4.output.dense.bias", "shape": [ 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 7097088 }, { "name": "encoder.layer.4.output.dense.weight", "shape": [ 384, 1536 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 7097856 }, { "name": "encoder.layer.5.attention.output.LayerNorm.bias", "shape": [ 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 8277504 }, { "name": "encoder.layer.5.attention.output.LayerNorm.weight", "shape": [ 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 8278272 }, { "name": "encoder.layer.5.attention.output.dense.bias", "shape": [ 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 8279040 }, { "name": "encoder.layer.5.attention.output.dense.weight", "shape": [ 384, 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 8279808 }, { "name": "encoder.layer.5.attention.self.qkv.bias", "shape": [ 1152 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 8574720 }, { "name": "encoder.layer.5.attention.self.qkv.weight", "shape": [ 1152, 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 884736, "byteOffset": 8577024 }, { "name": "encoder.layer.5.intermediate.dense.bias", "shape": [ 1536 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 9461760 }, { "name": "encoder.layer.5.intermediate.dense.weight", "shape": [ 1536, 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 9464832 }, { "name": "encoder.layer.5.output.LayerNorm.bias", "shape": [ 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 10644480 }, { "name": "encoder.layer.5.output.LayerNorm.weight", "shape": [ 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 10645248 }, { "name": "encoder.layer.5.output.dense.bias", "shape": [ 384 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 768, "byteOffset": 10646016 }, { "name": "encoder.layer.5.output.dense.weight", "shape": [ 384, 1536 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 10646784 } ], "md5sum": "332c311d4607c5d5df6ea9eb19273e4e" } ] }