XLM-R_L19_H12_FF3072 / config.json
Created by Ansible
Pruned version of XLM-R
ae643ec
raw
history blame
1.81 kB
{
"_name_or_path": "Unbabel/XLM-R-19L",
"architectures": [
"XLMRobertaModel"
],
"attention_probs_dropout_prob": 0.1,
"bos_token_id": 0,
"classifier_dropout": null,
"eos_token_id": 2,
"gradient_checkpointing": false,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.1,
"hidden_size": 1024,
"initializer_range": 0.02,
"intermediate_size": 3072,
"layer_norm_eps": 1e-05,
"max_position_embeddings": 514,
"model_type": "xlm-roberta",
"num_attention_heads": 16,
"num_hidden_layers": 19,
"output_past": true,
"pad_token_id": 1,
"position_embedding_type": "absolute",
"pruned_heads": {
"0": [
12,
5,
6,
14
],
"1": [
13,
12,
5,
15
],
"2": [
8,
9,
10,
13
],
"3": [
9,
10,
4,
12
],
"4": [
8,
9,
3,
13
],
"5": [
2,
3,
4,
12
],
"6": [
0,
9,
3,
12
],
"7": [
0,
9,
2,
15
],
"8": [
0,
8,
3,
13
],
"9": [
0,
15,
14,
7
],
"10": [
0,
1,
11,
12
],
"11": [
11,
15,
6,
7
],
"12": [
8,
10,
3,
14
],
"13": [
8,
2,
4,
5
],
"14": [
0,
9,
11,
14
],
"15": [
8,
3,
4,
6
],
"16": [
8,
10,
2,
3
],
"17": [
0,
8,
12,
6
],
"18": [
0,
8,
15,
7
]
},
"torch_dtype": "float32",
"transformers_version": "4.10.3",
"type_vocab_size": 1,
"use_cache": true,
"vocab_size": 250002
}