{ "metadata": { "total_size": 5869110528 }, "weight_map": { "bert.embeddings.LayerNorm.bias": "pytorch_model-00001-of-00002.bin", "bert.embeddings.LayerNorm.weight": "pytorch_model-00001-of-00002.bin", "bert.embeddings.token_type_embeddings.weight": "pytorch_model-00001-of-00002.bin", "bert.embeddings.word_embeddings.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.0.attention.output.LayerNorm.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.0.attention.output.LayerNorm.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.0.attention.output.dense.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.0.attention.output.dense.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.0.attention.self.Wqkv.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.0.attention.self.Wqkv.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.0.mlp.gated_layers.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.0.mlp.layernorm.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.0.mlp.layernorm.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.0.mlp.wo.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.0.mlp.wo.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.1.attention.output.LayerNorm.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.1.attention.output.LayerNorm.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.1.attention.output.dense.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.1.attention.output.dense.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.1.attention.self.Wqkv.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.1.attention.self.Wqkv.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.1.mlp.gated_layers.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.1.mlp.layernorm.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.1.mlp.layernorm.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.1.mlp.wo.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.1.mlp.wo.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.10.attention.output.LayerNorm.bias": "pytorch_model-00002-of-00002.bin", "bert.encoder.layer.10.attention.output.LayerNorm.weight": "pytorch_model-00002-of-00002.bin", "bert.encoder.layer.10.attention.output.dense.bias": "pytorch_model-00002-of-00002.bin", "bert.encoder.layer.10.attention.output.dense.weight": "pytorch_model-00002-of-00002.bin", "bert.encoder.layer.10.attention.self.Wqkv.bias": "pytorch_model-00002-of-00002.bin", "bert.encoder.layer.10.attention.self.Wqkv.weight": "pytorch_model-00002-of-00002.bin", "bert.encoder.layer.10.mlp.gated_layers.weight": "pytorch_model-00002-of-00002.bin", "bert.encoder.layer.10.mlp.layernorm.bias": "pytorch_model-00002-of-00002.bin", "bert.encoder.layer.10.mlp.layernorm.weight": "pytorch_model-00002-of-00002.bin", "bert.encoder.layer.10.mlp.wo.bias": "pytorch_model-00002-of-00002.bin", "bert.encoder.layer.10.mlp.wo.weight": "pytorch_model-00002-of-00002.bin", "bert.encoder.layer.11.attention.output.LayerNorm.bias": "pytorch_model-00002-of-00002.bin", "bert.encoder.layer.11.attention.output.LayerNorm.weight": "pytorch_model-00002-of-00002.bin", "bert.encoder.layer.11.attention.output.dense.bias": "pytorch_model-00002-of-00002.bin", "bert.encoder.layer.11.attention.output.dense.weight": "pytorch_model-00002-of-00002.bin", "bert.encoder.layer.11.attention.self.Wqkv.bias": "pytorch_model-00002-of-00002.bin", "bert.encoder.layer.11.attention.self.Wqkv.weight": "pytorch_model-00002-of-00002.bin", "bert.encoder.layer.11.mlp.gated_layers.weight": "pytorch_model-00002-of-00002.bin", "bert.encoder.layer.11.mlp.layernorm.bias": "pytorch_model-00002-of-00002.bin", "bert.encoder.layer.11.mlp.layernorm.weight": "pytorch_model-00002-of-00002.bin", "bert.encoder.layer.11.mlp.wo.bias": "pytorch_model-00002-of-00002.bin", "bert.encoder.layer.11.mlp.wo.weight": "pytorch_model-00002-of-00002.bin", "bert.encoder.layer.2.attention.output.LayerNorm.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.2.attention.output.LayerNorm.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.2.attention.output.dense.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.2.attention.output.dense.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.2.attention.self.Wqkv.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.2.attention.self.Wqkv.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.2.mlp.gated_layers.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.2.mlp.layernorm.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.2.mlp.layernorm.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.2.mlp.wo.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.2.mlp.wo.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.3.attention.output.LayerNorm.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.3.attention.output.LayerNorm.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.3.attention.output.dense.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.3.attention.output.dense.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.3.attention.self.Wqkv.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.3.attention.self.Wqkv.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.3.mlp.gated_layers.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.3.mlp.layernorm.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.3.mlp.layernorm.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.3.mlp.wo.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.3.mlp.wo.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.4.attention.output.LayerNorm.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.4.attention.output.LayerNorm.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.4.attention.output.dense.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.4.attention.output.dense.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.4.attention.self.Wqkv.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.4.attention.self.Wqkv.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.4.mlp.gated_layers.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.4.mlp.layernorm.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.4.mlp.layernorm.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.4.mlp.wo.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.4.mlp.wo.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.5.attention.output.LayerNorm.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.5.attention.output.LayerNorm.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.5.attention.output.dense.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.5.attention.output.dense.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.5.attention.self.Wqkv.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.5.attention.self.Wqkv.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.5.mlp.gated_layers.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.5.mlp.layernorm.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.5.mlp.layernorm.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.5.mlp.wo.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.5.mlp.wo.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.6.attention.output.LayerNorm.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.6.attention.output.LayerNorm.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.6.attention.output.dense.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.6.attention.output.dense.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.6.attention.self.Wqkv.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.6.attention.self.Wqkv.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.6.mlp.gated_layers.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.6.mlp.layernorm.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.6.mlp.layernorm.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.6.mlp.wo.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.6.mlp.wo.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.7.attention.output.LayerNorm.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.7.attention.output.LayerNorm.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.7.attention.output.dense.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.7.attention.output.dense.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.7.attention.self.Wqkv.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.7.attention.self.Wqkv.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.7.mlp.gated_layers.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.7.mlp.layernorm.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.7.mlp.layernorm.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.7.mlp.wo.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.7.mlp.wo.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.8.attention.output.LayerNorm.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.8.attention.output.LayerNorm.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.8.attention.output.dense.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.8.attention.output.dense.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.8.attention.self.Wqkv.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.8.attention.self.Wqkv.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.8.mlp.gated_layers.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.8.mlp.layernorm.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.8.mlp.layernorm.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.8.mlp.wo.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.8.mlp.wo.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.9.attention.output.LayerNorm.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.9.attention.output.LayerNorm.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.9.attention.output.dense.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.9.attention.output.dense.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.9.attention.self.Wqkv.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.9.attention.self.Wqkv.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.9.mlp.gated_layers.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.9.mlp.layernorm.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.9.mlp.layernorm.weight": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.9.mlp.wo.bias": "pytorch_model-00001-of-00002.bin", "bert.encoder.layer.9.mlp.wo.weight": "pytorch_model-00001-of-00002.bin", "cls.predictions.decoder.bias": "pytorch_model-00002-of-00002.bin", "cls.predictions.decoder.weight": "pytorch_model-00001-of-00002.bin", "cls.predictions.transform.LayerNorm.bias": "pytorch_model-00002-of-00002.bin", "cls.predictions.transform.LayerNorm.weight": "pytorch_model-00002-of-00002.bin", "cls.predictions.transform.dense.bias": "pytorch_model-00002-of-00002.bin", "cls.predictions.transform.dense.weight": "pytorch_model-00002-of-00002.bin" } }