Maghrebi commited on
Commit
6f08fd7
·
1 Parent(s): 92a93c8

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +39 -143
config.json CHANGED
@@ -1,155 +1,51 @@
1
- {
2
- "_name_or_path": "AutoTrain",
3
  "architectures": [
4
- "MT5ForConditionalGeneration"
5
  ],
6
- "d_ff": 2816,
7
  "d_kv": 64,
8
- "d_model": 1024,
9
  "decoder_start_token_id": 0,
10
- "dense_act_fn": "gelu_new",
11
  "dropout_rate": 0.1,
12
  "eos_token_id": 1,
13
- "feed_forward_proj": "gated-gelu",
14
  "initializer_factor": 1.0,
15
  "is_encoder_decoder": true,
16
- "is_gated_act": true,
17
  "layer_norm_epsilon": 1e-06,
18
- "max_length": 256,
19
- "model_type": "mt5",
20
- "num_decoder_layers": 24,
21
- "num_heads": 16,
22
- "num_layers": 24,
23
  "output_past": true,
24
  "pad_token_id": 0,
25
- "padding": "max_length",
26
- "relative_attention_max_distance": 128,
27
  "relative_attention_num_buckets": 32,
28
- "tie_word_embeddings": false,
29
- "tokenizer_class": "T5Tokenizer",
30
- "torch_dtype": "float32",
31
- "transformers_version": "4.25.1",
32
- "use_cache": true,
33
- "vocab_size": 250100
34
- }# coding=utf-8
35
- # Copyright 2020 Mesh TensorFlow authors, T5 Authors and HuggingFace Inc. team.
36
- #
37
- # Licensed under the Apache License, Version 2.0 (the "License");
38
- # you may not use this file except in compliance with the License.
39
- # You may obtain a copy of the License at
40
- #
41
- # http://www.apache.org/licenses/LICENSE-2.0
42
- #
43
- # Unless required by applicable law or agreed to in writing, software
44
- # distributed under the License is distributed on an "AS IS" BASIS,
45
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
46
- # See the License for the specific language governing permissions and
47
- # limitations under the License.
48
- """ PyTorch mT5 model. """
49
-
50
- from ...utils import logging
51
- from ..t5.modeling_t5 import T5EncoderModel, T5ForConditionalGeneration, T5Model
52
- from .configuration_mt5 import MT5Config
53
-
54
-
55
- logger = logging.get_logger(__name__)
56
-
57
- _CONFIG_FOR_DOC = "T5Config"
58
- _TOKENIZER_FOR_DOC = "T5Tokenizer"
59
-
60
-
61
-
62
- [DOCS]
63
- class MT5Model(T5Model):
64
- r"""
65
- This class overrides :class:`~transformers.T5Model`. Please check the superclass for the appropriate documentation
66
- alongside usage examples.
67
-
68
- Examples::
69
-
70
- >>> from transformers import MT5Model, T5Tokenizer
71
- >>> model = MT5Model.from_pretrained("google/mt5-small")
72
- >>> tokenizer = T5Tokenizer.from_pretrained("google/mt5-small")
73
- >>> article = "UN Offizier sagt, dass weiter verhandelt werden muss in Syrien."
74
- >>> summary = "Weiter Verhandlung in Syrien."
75
- >>> inputs = tokenizer(article, return_tensors="pt")
76
- >>> with tokenizer.as_target_tokenizer():
77
- ... labels = tokenizer(summary, return_tensors="pt")
78
-
79
- >>> outputs = model(input_ids=inputs["input_ids"], decoder_input_ids=labels["input_ids"])
80
- >>> hidden_states = outputs.last_hidden_state
81
- """
82
- model_type = "mt5"
83
- config_class = MT5Config
84
- _keys_to_ignore_on_load_missing = [
85
- r"encoder\.embed_tokens\.weight",
86
- r"decoder\.embed_tokens\.weight",
87
- r"decoder\.block\.0\.layer\.1\.EncDecAttention\.relative_attention_bias\.weight",
88
- ]
89
- _keys_to_ignore_on_save = [
90
- r"encoder\.embed_tokens\.weight",
91
- r"decoder\.embed_tokens\.weight",
92
- ]
93
-
94
-
95
-
96
-
97
- [DOCS]
98
- class MT5ForConditionalGeneration(T5ForConditionalGeneration):
99
- r"""
100
- This class overrides :class:`~transformers.T5ForConditionalGeneration`. Please check the superclass for the
101
- appropriate documentation alongside usage examples.
102
-
103
- Examples::
104
-
105
- >>> from transformers import MT5ForConditionalGeneration, T5Tokenizer
106
- >>> model = MT5ForConditionalGeneration.from_pretrained("google/mt5-small")
107
- >>> tokenizer = T5Tokenizer.from_pretrained("google/mt5-small")
108
- >>> article = "UN Offizier sagt, dass weiter verhandelt werden muss in Syrien."
109
- >>> summary = "Weiter Verhandlung in Syrien."
110
- >>> inputs = tokenizer(article, return_tensors="pt")
111
- >>> with tokenizer.as_target_tokenizer():
112
- ... labels = tokenizer(summary, return_tensors="pt")
113
-
114
- >>> outputs = model(**inputs,labels=labels["input_ids"])
115
- >>> loss = outputs.loss
116
- """
117
-
118
- model_type = "mt5"
119
- config_class = MT5Config
120
- _keys_to_ignore_on_load_missing = [
121
- r"encoder\.embed_tokens\.weight",
122
- ]
123
- _keys_to_ignore_on_save = [
124
- r"encoder\.embed_tokens\.weight",
125
- ]
126
-
127
-
128
-
129
-
130
- [DOCS]
131
- class MT5EncoderModel(T5EncoderModel):
132
- r"""
133
- This class overrides :class:`~transformers.T5EncoderModel`. Please check the superclass for the appropriate
134
- documentation alongside usage examples.
135
-
136
- Examples::
137
-
138
- >>> from transformers import MT5EncoderModel, T5Tokenizer
139
- >>> model = MT5EncoderModel.from_pretrained("google/mt5-small")
140
- >>> tokenizer = T5Tokenizer.from_pretrained("google/mt5-small")
141
- >>> article = "UN Offizier sagt, dass weiter verhandelt werden muss in Syrien."
142
- >>> input_ids = tokenizer(article, return_tensors="pt").input_ids
143
- >>> outputs = model(input_ids)
144
- >>> hidden_state = outputs.last_hidden_state
145
- """
146
-
147
- model_type = "mt5"
148
- config_class = MT5Config
149
- _keys_to_ignore_on_load_missing = [
150
- r"encoder\.embed_tokens\.weight",
151
- ]
152
- _keys_to_ignore_on_save = [
153
- r"encoder\.embed_tokens\.weight",
154
- ]
155
-
 
1
+
 
2
  "architectures": [
3
+ "T5ForConditionalGeneration"
4
  ],
5
+ "d_ff": 2048,
6
  "d_kv": 64,
7
+ "d_model": 512,
8
  "decoder_start_token_id": 0,
 
9
  "dropout_rate": 0.1,
10
  "eos_token_id": 1,
 
11
  "initializer_factor": 1.0,
12
  "is_encoder_decoder": true,
 
13
  "layer_norm_epsilon": 1e-06,
14
+ "model_type": "t5",
15
+ "n_positions": 512,
16
+ "num_heads": 8,
17
+ "num_layers": 6,
 
18
  "output_past": true,
19
  "pad_token_id": 0,
 
 
20
  "relative_attention_num_buckets": 32,
21
+ "task_specific_params": {
22
+ "summarization": {
23
+ "early_stopping": true,
24
+ "length_penalty": 2.0,
25
+ "max_length": 200,
26
+ "min_length": 30,
27
+ "no_repeat_ngram_size": 3,
28
+ "num_beams": 4,
29
+ "prefix": "summarize: "
30
+ },
31
+ "translation_ru_to_ab": {
32
+ "early_stopping": true,
33
+ "max_length": 300,
34
+ "num_beams": 4,
35
+ "prefix": "translate Russian to Abkhaz: "
36
+ },
37
+ "translation_ru_to_ab": {
38
+ "early_stopping": true,
39
+ "max_length": 300,
40
+ "num_beams": 4,
41
+ "prefix": "translate Russian to Abkhaz: "
42
+ },
43
+ "translation_ru_to_ab": {
44
+ "early_stopping": true,
45
+ "max_length": 300,
46
+ "num_beams": 4,
47
+ "prefix": "translate Russian to Abkhaz: "
48
+ }
49
+ },
50
+ "vocab_size": 32128
51
+ }