niobures commited on
Commit
a8fd9e4
·
verified ·
1 Parent(s): 9a85975

multilingual-MiniLMv2-L6-mnli-xnli

Browse files
.gitattributes CHANGED
@@ -37,3 +37,5 @@ paraphrase-multilingual-MiniLM-L12-v2/unigram.json filter=lfs diff=lfs merge=lfs
37
  crossencoder-mMiniLMv2-L12-mmarcoFR_ONNX/tokenizer.json filter=lfs diff=lfs merge=lfs -text
38
  mmarco-mMiniLMv2-L6-H384-v1-onnx-o4/tokenizer.json filter=lfs diff=lfs merge=lfs -text
39
  mMiniLM-1215/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
 
 
37
  crossencoder-mMiniLMv2-L12-mmarcoFR_ONNX/tokenizer.json filter=lfs diff=lfs merge=lfs -text
38
  mmarco-mMiniLMv2-L6-H384-v1-onnx-o4/tokenizer.json filter=lfs diff=lfs merge=lfs -text
39
  mMiniLM-1215/tokenizer.json filter=lfs diff=lfs merge=lfs -text
40
+ multilingual-MiniLMv2-L6-mnli-xnli/onnx/tokenizer.json filter=lfs diff=lfs merge=lfs -text
41
+ multilingual-MiniLMv2-L6-mnli-xnli/tokenizer.json filter=lfs diff=lfs merge=lfs -text
multilingual-MiniLMv2-L6-mnli-xnli/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
multilingual-MiniLMv2-L6-mnli-xnli/README.md ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ---
3
+ language:
4
+ - multilingual
5
+ - en
6
+ - ar
7
+ - bg
8
+ - de
9
+ - el
10
+ - es
11
+ - fr
12
+ - hi
13
+ - ru
14
+ - sw
15
+ - th
16
+ - tr
17
+ - ur
18
+ - vi
19
+ - zh
20
+ license: mit
21
+ tags:
22
+ - zero-shot-classification
23
+ - text-classification
24
+ - nli
25
+ - pytorch
26
+ metrics:
27
+ - accuracy
28
+ datasets:
29
+ - multi_nli
30
+ - xnli
31
+ pipeline_tag: zero-shot-classification
32
+ widget:
33
+ - text: "Angela Merkel ist eine Politikerin in Deutschland und Vorsitzende der CDU"
34
+ candidate_labels: "politics, economy, entertainment, environment"
35
+ ---
36
+
37
+
38
+ ---
39
+ # Multilingual MiniLMv2-L6-mnli-xnli
40
+ ## Model description
41
+ This multilingual model can perform natural language inference (NLI) on 100+ languages and is therefore also
42
+ suitable for multilingual zero-shot classification. The underlying multilingual-MiniLM-L6 model was created
43
+ by Microsoft and was distilled from XLM-RoBERTa-large (see details [in the original paper](https://arxiv.org/pdf/2002.10957.pdf)
44
+ and newer information in [this repo](https://github.com/microsoft/unilm/tree/master/minilm)).
45
+ The model was then fine-tuned on the [XNLI dataset](https://huggingface.co/datasets/xnli), which contains hypothesis-premise pairs from 15 languages,
46
+ as well as the English [MNLI dataset](https://huggingface.co/datasets/multi_nli).
47
+
48
+ The main advantage of distilled models is that they are smaller (faster inference, lower memory requirements) than their teachers (XLM-RoBERTa-large).
49
+ The disadvantage is that they lose some of the performance of their larger teachers.
50
+
51
+ For highest inference speed, I recommend using this 6-layer model. For higher performance I recommend
52
+ [mDeBERTa-v3-base-mnli-xnli](https://huggingface.co/MoritzLaurer/mDeBERTa-v3-base-mnli-xnli) (as of 14.02.2023).
53
+
54
+ ### How to use the model
55
+ #### Simple zero-shot classification pipeline
56
+ ```python
57
+ from transformers import pipeline
58
+ classifier = pipeline("zero-shot-classification", model="MoritzLaurer/multilingual-MiniLMv2-L6-mnli-xnli")
59
+
60
+ sequence_to_classify = "Angela Merkel ist eine Politikerin in Deutschland und Vorsitzende der CDU"
61
+ candidate_labels = ["politics", "economy", "entertainment", "environment"]
62
+ output = classifier(sequence_to_classify, candidate_labels, multi_label=False)
63
+ print(output)
64
+ ```
65
+ #### NLI use-case
66
+ ```python
67
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
68
+ import torch
69
+ device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
70
+
71
+ model_name = "MoritzLaurer/multilingual-MiniLMv2-L6-mnli-xnli"
72
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
73
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
74
+
75
+ premise = "Angela Merkel ist eine Politikerin in Deutschland und Vorsitzende der CDU"
76
+ hypothesis = "Emmanuel Macron is the President of France"
77
+
78
+ input = tokenizer(premise, hypothesis, truncation=True, return_tensors="pt")
79
+ output = model(input["input_ids"].to(device)) # device = "cuda:0" or "cpu"
80
+ prediction = torch.softmax(output["logits"][0], -1).tolist()
81
+ label_names = ["entailment", "neutral", "contradiction"]
82
+ prediction = {name: round(float(pred) * 100, 1) for pred, name in zip(prediction, label_names)}
83
+ print(prediction)
84
+ ```
85
+
86
+ ### Training data
87
+ This model was trained on the XNLI development dataset and the MNLI train dataset.
88
+ The XNLI development set consists of 2490 professionally translated texts from English
89
+ to 14 other languages (37350 texts in total) (see [this paper](https://arxiv.org/pdf/1809.05053.pdf)).
90
+ Note that the XNLI contains a training set of 15 machine translated versions of the MNLI dataset for 15 languages,
91
+ but due to quality issues with these machine translations, this model was only trained on the professional translations
92
+ from the XNLI development set and the original English MNLI training set (392 702 texts).
93
+ Not using machine translated texts can avoid overfitting the model to the 15 languages;
94
+ avoids catastrophic forgetting of the other languages it was pre-trained on;
95
+ and significantly reduces training costs.
96
+
97
+ ### Training procedure
98
+ The model was trained using the Hugging Face trainer with the following hyperparameters.
99
+ The exact underlying model is [mMiniLMv2-L6-H384-distilled-from-XLMR-Large](https://huggingface.co/nreimers/mMiniLMv2-L6-H384-distilled-from-XLMR-Large).
100
+ ```
101
+ training_args = TrainingArguments(
102
+ num_train_epochs=3, # total number of training epochs
103
+ learning_rate=4e-05,
104
+ per_device_train_batch_size=64, # batch size per device during training
105
+ per_device_eval_batch_size=120, # batch size for evaluation
106
+ warmup_ratio=0.06, # number of warmup steps for learning rate scheduler
107
+ weight_decay=0.01, # strength of weight decay
108
+ )
109
+ ```
110
+
111
+ ### Eval results
112
+ The model was evaluated on the XNLI test set on 15 languages (5010 texts per language, 75150 in total).
113
+ Note that multilingual NLI models are capable of classifying NLI texts without receiving NLI training data
114
+ in the specific language (cross-lingual transfer). This means that the model is also able of doing NLI on
115
+ the other languages it was training on, but performance is most likely lower than for those languages available in XNLI.
116
+
117
+ The average XNLI performance of multilingual-MiniLM-L6 reported in the paper is 0.68 ([see table 11](https://arxiv.org/pdf/2002.10957.pdf)).
118
+ This reimplementation has an average performance of 0.713.
119
+ This increase in performance is probably thanks to the addition of MNLI in the training data and this model was distilled from
120
+ XLM-RoBERTa-large instead of -base (multilingual-MiniLM-L6-v2).
121
+
122
+ |Datasets|avg_xnli|ar|bg|de|el|en|es|fr|hi|ru|sw|th|tr|ur|vi|zh|
123
+ | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |
124
+ |Accuracy|0.713|0.687|0.742|0.719|0.723|0.789|0.748|0.741|0.691|0.714|0.642|0.699|0.696|0.664|0.723|0.721|
125
+ |Speed text/sec (A100 GPU, eval_batch=120)|6093.0|6210.0|6003.0|6053.0|5409.0|6531.0|6205.0|5615.0|5734.0|5970.0|6219.0|6289.0|6533.0|5851.0|5970.0|6798.0|
126
+
127
+
128
+ |Datasets|mnli_m|mnli_mm|
129
+ | :---: | :---: | :---: |
130
+ |Accuracy|0.782|0.8|
131
+ |Speed text/sec (A100 GPU, eval_batch=120)|4430.0|4395.0|
132
+
133
+
134
+
135
+ ## Limitations and bias
136
+ Please consult the original paper and literature on different NLI datasets for potential biases.
137
+
138
+ ## Citation
139
+ If you use this model, please cite: Laurer, Moritz, Wouter van Atteveldt, Andreu Salleras Casas, and Kasper Welbers. 2022.
140
+ ‘Less Annotating, More Classifying – Addressing the Data Scarcity Issue of Supervised Machine Learning with Deep Transfer Learning and BERT - NLI’.
141
+ Preprint, June. Open Science Framework. https://osf.io/74b8k.
142
+
143
+ ## Ideas for cooperation or questions?
144
+ If you have questions or ideas for cooperation, contact me at m{dot}laurer{at}vu{dot}nl or [LinkedIn](https://www.linkedin.com/in/moritz-laurer/)
145
+
146
+
147
+
148
+
149
+
150
+
151
+
152
+
multilingual-MiniLMv2-L6-mnli-xnli/config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "./results/mnli-xnli/mMiniLMv2-L6-H384-distilled-from-XLMR-Large-20220211/best-mMiniLMv2-L6-H384-distilled-from-XLMR-Large-20220211",
3
+ "architectures": [
4
+ "XLMRobertaForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 384,
13
+ "id2label": {
14
+ "0": "entailment",
15
+ "1": "neutral",
16
+ "2": "contradiction"
17
+ },
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 1536,
20
+ "label2id": {
21
+ "contradiction": 2,
22
+ "entailment": 0,
23
+ "neutral": 1
24
+ },
25
+ "layer_norm_eps": 1e-05,
26
+ "max_position_embeddings": 514,
27
+ "model_type": "xlm-roberta",
28
+ "num_attention_heads": 12,
29
+ "num_hidden_layers": 6,
30
+ "pad_token_id": 1,
31
+ "position_embedding_type": "absolute",
32
+ "problem_type": "single_label_classification",
33
+ "torch_dtype": "float32",
34
+ "transformers_version": "4.26.0",
35
+ "type_vocab_size": 1,
36
+ "use_cache": true,
37
+ "vocab_size": 250002
38
+ }
multilingual-MiniLMv2-L6-mnli-xnli/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91b323ccf247ec1e3b5925d566230bae7c52de8147e6062b42e250089a3fc80b
3
+ size 427997022
multilingual-MiniLMv2-L6-mnli-xnli/onnx/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "MoritzLaurer/multilingual-MiniLMv2-L6-mnli-xnli",
3
+ "architectures": [
4
+ "XLMRobertaForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 384,
13
+ "id2label": {
14
+ "0": "entailment",
15
+ "1": "neutral",
16
+ "2": "contradiction"
17
+ },
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 1536,
20
+ "label2id": {
21
+ "contradiction": 2,
22
+ "entailment": 0,
23
+ "neutral": 1
24
+ },
25
+ "layer_norm_eps": 1e-05,
26
+ "max_position_embeddings": 514,
27
+ "model_type": "xlm-roberta",
28
+ "num_attention_heads": 12,
29
+ "num_hidden_layers": 6,
30
+ "pad_token_id": 1,
31
+ "position_embedding_type": "absolute",
32
+ "problem_type": "single_label_classification",
33
+ "transformers_version": "4.37.2",
34
+ "type_vocab_size": 1,
35
+ "use_cache": true,
36
+ "vocab_size": 250002
37
+ }
multilingual-MiniLMv2-L6-mnli-xnli/onnx/model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79f8cda2b1230585a95ea0514a6f1bd21c5c986ba0529bb3261213a3e195fa6e
3
+ size 428127016
multilingual-MiniLMv2-L6-mnli-xnli/onnx/sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
3
+ size 5069051
multilingual-MiniLMv2-L6-mnli-xnli/onnx/special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
multilingual-MiniLMv2-L6-mnli-xnli/onnx/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2116c05e7305eea30394284760789681c5b3440dd4cd9a8c77539da68f9e8a6
3
+ size 17082854
multilingual-MiniLMv2-L6-mnli-xnli/onnx/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "250001": {
36
+ "content": "<mask>",
37
+ "lstrip": true,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "<s>",
45
+ "clean_up_tokenization_spaces": true,
46
+ "cls_token": "<s>",
47
+ "eos_token": "</s>",
48
+ "mask_token": "<mask>",
49
+ "max_length": 256,
50
+ "model_max_length": 512,
51
+ "pad_token": "<pad>",
52
+ "sep_token": "</s>",
53
+ "stride": 0,
54
+ "tokenizer_class": "XLMRobertaTokenizer",
55
+ "truncation_side": "right",
56
+ "truncation_strategy": "longest_first",
57
+ "unk_token": "<unk>"
58
+ }
multilingual-MiniLMv2-L6-mnli-xnli/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46298086b62942b2ad929d36e0ab7c11f98f9c109aa9ec3b65fff78104853a6a
3
+ size 428017837
multilingual-MiniLMv2-L6-mnli-xnli/sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
3
+ size 5069051
multilingual-MiniLMv2-L6-mnli-xnli/source.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ https://huggingface.co/MoritzLaurer/multilingual-MiniLMv2-L6-mnli-xnli
multilingual-MiniLMv2-L6-mnli-xnli/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
multilingual-MiniLMv2-L6-mnli-xnli/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:098c131bb4423163db239755e309facaa6850059f850f9f3d88a78344a4b631c
3
+ size 17082758
multilingual-MiniLMv2-L6-mnli-xnli/tokenizer_config.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "__type": "AddedToken",
7
+ "content": "<mask>",
8
+ "lstrip": true,
9
+ "normalized": true,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "model_max_length": 512,
14
+ "name_or_path": "./results/mnli-xnli/mMiniLMv2-L6-H384-distilled-from-XLMR-Large-20220211/best-mMiniLMv2-L6-H384-distilled-from-XLMR-Large-20220211",
15
+ "pad_token": "<pad>",
16
+ "sep_token": "</s>",
17
+ "special_tokens_map_file": null,
18
+ "tokenizer_class": "XLMRobertaTokenizer",
19
+ "unk_token": "<unk>"
20
+ }