FilipT commited on
Commit
632406b
·
verified ·
1 Parent(s): d16d6da

Upload folder using huggingface_hub

Browse files
config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/mnt/batch/tasks/shared/LS_root/mounts/clusters/test5/code/Users/filip.trhlik/bias-bench/checkpoints/cda_c-ltg/ltg-bert-babylm_t-race_s-0/checkpoint-10",
3
+ "architectures": [
4
+ "LtgBertForMaskedLM"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "auto_map": {
8
+ "AutoConfig": "configuration_ltgbert.LtgBertConfig",
9
+ "AutoModelForMaskedLM": "modeling_ltgbert.LtgBertForMaskedLM"
10
+ },
11
+ "classifier_dropout": 0.2,
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "intermediate_size": 2048,
15
+ "layer_norm_eps": 1e-07,
16
+ "max_position_embeddings": 512,
17
+ "model_type": "ltgbert",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "output_all_encoded_layers": true,
21
+ "pad_token_id": 4,
22
+ "position_bucket_size": 32,
23
+ "torch_dtype": "float32",
24
+ "transformers_version": "4.48.0",
25
+ "vocab_size": 16384
26
+ }
configuration_ltgbert.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2023 Language Technology Group from University of Oslo and The HuggingFace Inc. team.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ """ LTG-BERT configutation """
17
+
18
+
19
+ from transformers.configuration_utils import PretrainedConfig
20
+
21
+
22
+ LTG_BERT_PRETRAINED_CONFIG_ARCHIVE_MAP = {
23
+ "bnc-bert-span": "https://huggingface.co/ltg/bnc-bert-span",
24
+ "bnc-bert-span-2x": "https://huggingface.co/ltg/bnc-bert-span-2x",
25
+ "bnc-bert-span-0.5x": "https://huggingface.co/ltg/bnc-bert-span-0.5x",
26
+ "bnc-bert-span-0.25x": "https://huggingface.co/ltg/bnc-bert-span-0.25x",
27
+ "bnc-bert-span-order": "https://huggingface.co/ltg/bnc-bert-span-order",
28
+ "bnc-bert-span-document": "https://huggingface.co/ltg/bnc-bert-span-document",
29
+ "bnc-bert-span-word": "https://huggingface.co/ltg/bnc-bert-span-word",
30
+ "bnc-bert-span-subword": "https://huggingface.co/ltg/bnc-bert-span-subword",
31
+
32
+ "norbert3-xs": "https://huggingface.co/ltg/norbert3-xs/config.json",
33
+ "norbert3-small": "https://huggingface.co/ltg/norbert3-small/config.json",
34
+ "norbert3-base": "https://huggingface.co/ltg/norbert3-base/config.json",
35
+ "norbert3-large": "https://huggingface.co/ltg/norbert3-large/config.json",
36
+
37
+ "norbert3-oversampled-base": "https://huggingface.co/ltg/norbert3-oversampled-base/config.json",
38
+ "norbert3-ncc-base": "https://huggingface.co/ltg/norbert3-ncc-base/config.json",
39
+ "norbert3-nak-base": "https://huggingface.co/ltg/norbert3-nak-base/config.json",
40
+ "norbert3-nb-base": "https://huggingface.co/ltg/norbert3-nb-base/config.json",
41
+ "norbert3-wiki-base": "https://huggingface.co/ltg/norbert3-wiki-base/config.json",
42
+ "norbert3-c4-base": "https://huggingface.co/ltg/norbert3-c4-base/config.json"
43
+ }
44
+
45
+
46
+ class LtgBertConfig(PretrainedConfig):
47
+ r"""
48
+ This is the configuration class to store the configuration of a [`LtgBertModel`]. It is used to
49
+ instantiate an LTG-BERT model according to the specified arguments, defining the model architecture.
50
+ Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
51
+ documentation from [`PretrainedConfig`] for more information.
52
+ Args:
53
+ vocab_size (`int`, *optional*, defaults to 16384):
54
+ Vocabulary size of the LTG-BERT model. Defines the number of different tokens that can be represented by the
55
+ `inputs_ids` passed when calling [`LtgBertModel`].
56
+ hidden_size (`int`, *optional*, defaults to 768):
57
+ Dimensionality of the encoder layers and the pooler layer.
58
+ num_hidden_layers (`int`, *optional*, defaults to 12):
59
+ Number of hidden layers in the Transformer encoder.
60
+ num_attention_heads (`int`, *optional*, defaults to 12):
61
+ Number of attention heads for each attention layer in the Transformer encoder.
62
+ intermediate_size (`int`, *optional*, defaults to 2048):
63
+ Dimensionality of the "intermediate" (often named feed-forward) layer in the Transformer encoder.
64
+ hidden_dropout_prob (`float`, *optional*, defaults to 0.1):
65
+ The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
66
+ attention_probs_dropout_prob (`float`, *optional*, defaults to 0.1):
67
+ The dropout ratio for the attention probabilities.
68
+ max_position_embeddings (`int`, *optional*, defaults to 512):
69
+ The maximum sequence length that this model might ever be used with. Typically set this to something large
70
+ just in case (e.g., 512 or 1024 or 2048).
71
+ layer_norm_eps (`float`, *optional*, defaults to 1e-12):
72
+ The epsilon used by the layer normalization layers.
73
+ classifier_dropout (`float`, *optional*):
74
+ The dropout ratio for the classification head.
75
+ """
76
+ model_type = "ltgbert"
77
+ def __init__(
78
+ self,
79
+ vocab_size=16384,
80
+ attention_probs_dropout_prob=0.1,
81
+ hidden_dropout_prob=0.1,
82
+ hidden_size=768,
83
+ intermediate_size=2048,
84
+ max_position_embeddings=512,
85
+ position_bucket_size=32,
86
+ num_attention_heads=12,
87
+ num_hidden_layers=12,
88
+ layer_norm_eps=1.0e-7,
89
+ pad_token_id=4,
90
+ output_all_encoded_layers=True,
91
+ classifier_dropout=None,
92
+ **kwargs,
93
+ ):
94
+ super().__init__(pad_token_id=pad_token_id, **kwargs)
95
+
96
+ self.vocab_size = vocab_size
97
+ self.hidden_size = hidden_size
98
+ self.num_hidden_layers = num_hidden_layers
99
+ self.num_attention_heads = num_attention_heads
100
+ self.intermediate_size = intermediate_size
101
+ self.hidden_dropout_prob = hidden_dropout_prob
102
+ self.attention_probs_dropout_prob = attention_probs_dropout_prob
103
+ self.max_position_embeddings = max_position_embeddings
104
+ self.output_all_encoded_layers = output_all_encoded_layers
105
+ self.position_bucket_size = position_bucket_size
106
+ self.layer_norm_eps = layer_norm_eps
107
+ self.classifier_dropout = classifier_dropout
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6092c6e27a93f8cd3c1de3415f87386a589b3615cb4657f20de0165a452dfac
3
+ size 443265688
special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "[BOS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "[CLS]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "[EOS]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "[MASK]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "[PAD]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "[SEP]",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "[UNK]",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[UNK]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[CLS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[PAD]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "5": {
44
+ "content": "[PAR]",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "6": {
52
+ "content": "[TAB]",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "16384": {
60
+ "content": "[BOS]",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "16385": {
68
+ "content": "[EOS]",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ }
75
+ },
76
+ "bos_token": "[BOS]",
77
+ "clean_up_tokenization_spaces": false,
78
+ "cls_token": "[CLS]",
79
+ "eos_token": "[EOS]",
80
+ "extra_special_tokens": {},
81
+ "mask_token": "[MASK]",
82
+ "model_max_length": 1000000000000000019884624838656,
83
+ "pad_token": "[PAD]",
84
+ "sep_token": "[SEP]",
85
+ "tokenizer_class": "PreTrainedTokenizerFast",
86
+ "unk_token": "[UNK]"
87
+ }