Commit
·
c4b0d7f
1
Parent(s):
ea715ce
Update model configuration and mappings
Browse files- config.json +1 -1
- configuration_aragpt2.py +5 -5
- modeling_aragpt2.py +22 -37
config.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
| 5 |
],
|
| 6 |
"auto_map": {
|
| 7 |
"AutoConfig": "configuration_aragpt2.AraGPT2Config",
|
| 8 |
-
"
|
| 9 |
"AutoModel": "modeling_aragpt2.AraGPT2Model"
|
| 10 |
},
|
| 11 |
"attention_probs_dropout_prob": 0.1,
|
|
|
|
| 5 |
],
|
| 6 |
"auto_map": {
|
| 7 |
"AutoConfig": "configuration_aragpt2.AraGPT2Config",
|
| 8 |
+
"AutoModelForCausalLM": "modeling_aragpt2.AraGPT2LMHeadModel",
|
| 9 |
"AutoModel": "modeling_aragpt2.AraGPT2Model"
|
| 10 |
},
|
| 11 |
"attention_probs_dropout_prob": 0.1,
|
configuration_aragpt2.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
# coding=utf-8
|
| 2 |
-
"""
|
| 3 |
from collections import OrderedDict
|
| 4 |
from typing import Any, List, Mapping, Optional
|
| 5 |
|
|
@@ -18,7 +18,7 @@ AraGPT2_PRETRAINED_CONFIG_ARCHIVE_MAP = {
|
|
| 18 |
|
| 19 |
class AraGPT2Config(PretrainedConfig):
|
| 20 |
"""
|
| 21 |
-
This is the configuration class to store the configuration of a [`
|
| 22 |
instantiate a AraGPT2 model according to the specified arguments, defining the model architecture. Instantiating a
|
| 23 |
configuration with the defaults will yield a similar configuration to that of the AraGPT2
|
| 24 |
[aubmindlab/aragpt2-mega](https://huggingface.co/aubmindlab/aragpt2-mega) architecture.
|
|
@@ -131,7 +131,7 @@ class AraGPT2Config(PretrainedConfig):
|
|
| 131 |
n_layer=12,
|
| 132 |
n_head=12,
|
| 133 |
n_inner=None,
|
| 134 |
-
activation_function="
|
| 135 |
resid_pdrop=0.1,
|
| 136 |
embd_pdrop=0.1,
|
| 137 |
attn_pdrop=0.1,
|
|
@@ -144,8 +144,8 @@ class AraGPT2Config(PretrainedConfig):
|
|
| 144 |
summary_first_dropout=0.1,
|
| 145 |
scale_attn_weights=True,
|
| 146 |
use_cache=True,
|
| 147 |
-
bos_token_id=
|
| 148 |
-
eos_token_id=
|
| 149 |
scale_attn_by_inverse_layer_idx=False,
|
| 150 |
reorder_and_upcast_attn=False,
|
| 151 |
**kwargs,
|
|
|
|
| 1 |
# coding=utf-8
|
| 2 |
+
""" AraGPT2 configuration"""
|
| 3 |
from collections import OrderedDict
|
| 4 |
from typing import Any, List, Mapping, Optional
|
| 5 |
|
|
|
|
| 18 |
|
| 19 |
class AraGPT2Config(PretrainedConfig):
|
| 20 |
"""
|
| 21 |
+
This is the configuration class to store the configuration of a [`AraGPT2Model`] or a [`TFAraGPT2Model`]. It is used to
|
| 22 |
instantiate a AraGPT2 model according to the specified arguments, defining the model architecture. Instantiating a
|
| 23 |
configuration with the defaults will yield a similar configuration to that of the AraGPT2
|
| 24 |
[aubmindlab/aragpt2-mega](https://huggingface.co/aubmindlab/aragpt2-mega) architecture.
|
|
|
|
| 131 |
n_layer=12,
|
| 132 |
n_head=12,
|
| 133 |
n_inner=None,
|
| 134 |
+
activation_function="gelu",
|
| 135 |
resid_pdrop=0.1,
|
| 136 |
embd_pdrop=0.1,
|
| 137 |
attn_pdrop=0.1,
|
|
|
|
| 144 |
summary_first_dropout=0.1,
|
| 145 |
scale_attn_weights=True,
|
| 146 |
use_cache=True,
|
| 147 |
+
bos_token_id=0,
|
| 148 |
+
eos_token_id=0,
|
| 149 |
scale_attn_by_inverse_layer_idx=False,
|
| 150 |
reorder_and_upcast_attn=False,
|
| 151 |
**kwargs,
|
modeling_aragpt2.py
CHANGED
|
@@ -1,19 +1,5 @@
|
|
| 1 |
# coding=utf-8
|
| 2 |
-
|
| 3 |
-
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
| 4 |
-
#
|
| 5 |
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 6 |
-
# you may not use this file except in compliance with the License.
|
| 7 |
-
# You may obtain a copy of the License at
|
| 8 |
-
#
|
| 9 |
-
# http://www.apache.org/licenses/LICENSE-2.0
|
| 10 |
-
#
|
| 11 |
-
# Unless required by applicable law or agreed to in writing, software
|
| 12 |
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 13 |
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 14 |
-
# See the License for the specific language governing permissions and
|
| 15 |
-
# limitations under the License.
|
| 16 |
-
"""PyTorch OpenAI GPT-2 model."""
|
| 17 |
|
| 18 |
import math
|
| 19 |
import os
|
|
@@ -59,16 +45,12 @@ _CHECKPOINT_FOR_DOC = "aubmindlab/aragpt2-mega"
|
|
| 59 |
_CONFIG_FOR_DOC = "AraGPT2Config"
|
| 60 |
_TOKENIZER_FOR_DOC = "GPT2Tokenizer"
|
| 61 |
|
| 62 |
-
|
| 63 |
"aubmindlab/aragpt2-mega",
|
| 64 |
-
|
| 65 |
-
"aubmindlab/aragpt2-mega",
|
| 66 |
-
"aubmindlab/aragpt2-mega",
|
| 67 |
-
"distilgpt2",
|
| 68 |
-
# See all GPT-2 models at https://huggingface.co/models?filter=gpt2
|
| 69 |
]
|
| 70 |
|
| 71 |
-
|
| 72 |
"LayerNorm_embed_norm": "emb_norm",
|
| 73 |
"pos_embed": "wpe.weight",
|
| 74 |
"word_embed": "wte.weight",
|
|
@@ -89,19 +71,22 @@ _GPT2_ML_TF_TO_TORCH = {
|
|
| 89 |
"bias": "bias",
|
| 90 |
}
|
| 91 |
|
|
|
|
|
|
|
|
|
|
| 92 |
|
| 93 |
def convert_gpt2_checkpoint_to_pytorch(
|
| 94 |
-
|
| 95 |
):
|
| 96 |
# Construct model
|
| 97 |
-
if
|
| 98 |
config = AraGPT2Config()
|
| 99 |
else:
|
| 100 |
-
config = AraGPT2Config.from_json_file(
|
| 101 |
model = AraGPT2Model(config)
|
| 102 |
|
| 103 |
# Load weights from numpy
|
| 104 |
-
|
| 105 |
|
| 106 |
# Save pytorch-model
|
| 107 |
pytorch_weights_dump_path = pytorch_dump_folder_path + "/" + WEIGHTS_NAME
|
|
@@ -115,7 +100,7 @@ def convert_gpt2_checkpoint_to_pytorch(
|
|
| 115 |
|
| 116 |
# XXX: MUST do like: convert_gpt2_checkpoint_to_pytorch('./model.ckpt-100000', './mega.json', './')
|
| 117 |
# https://github.com/tensorflow/models/issues/2675#issuecomment-516595597
|
| 118 |
-
def
|
| 119 |
"""Load tf checkpoints in a pytorch model"""
|
| 120 |
try:
|
| 121 |
import re
|
|
@@ -126,7 +111,7 @@ def load_tf_weights_in_gpt2(model, config, gpt2_checkpoint_path):
|
|
| 126 |
"https://www.tensorflow.org/install/ for installation instructions."
|
| 127 |
)
|
| 128 |
raise
|
| 129 |
-
tf_path = os.path.abspath(
|
| 130 |
logger.info("Converting TensorFlow checkpoint from {}".format(tf_path))
|
| 131 |
# Load weights from TF model
|
| 132 |
init_vars = tf.train.list_variables(tf_path)
|
|
@@ -157,13 +142,13 @@ def load_tf_weights_in_gpt2(model, config, gpt2_checkpoint_path):
|
|
| 157 |
|
| 158 |
if sname == "" or sname == "embeddings":
|
| 159 |
continue
|
| 160 |
-
elif sname not in
|
| 161 |
print("=========================================================")
|
| 162 |
logger.info("Skip var name {}".format(scope_names))
|
| 163 |
pointer = None
|
| 164 |
break
|
| 165 |
else:
|
| 166 |
-
tname =
|
| 167 |
if "." in tname:
|
| 168 |
parent, child = tname.split(".")
|
| 169 |
pointer = getattr(pointer, parent)
|
|
@@ -602,7 +587,7 @@ class AraGPT2PreTrainedModel(PreTrainedModel):
|
|
| 602 |
"""
|
| 603 |
|
| 604 |
config_class = AraGPT2Config
|
| 605 |
-
load_tf_weights =
|
| 606 |
base_model_prefix = "transformer"
|
| 607 |
is_parallelizable = True
|
| 608 |
supports_gradient_checkpointing = True
|
|
@@ -828,7 +813,7 @@ class AraGPT2Model(AraGPT2PreTrainedModel):
|
|
| 828 |
_keys_to_ignore_on_load_unexpected = ["attn.masked_bias"]
|
| 829 |
_keys_to_ignore_on_load_missing = ["attn.masked_bias"]
|
| 830 |
|
| 831 |
-
def __init__(self, config):
|
| 832 |
super().__init__(config)
|
| 833 |
|
| 834 |
self.embed_dim = config.hidden_size
|
|
@@ -1177,7 +1162,7 @@ class AraGPT2LMHeadModel(AraGPT2PreTrainedModel):
|
|
| 1177 |
]
|
| 1178 |
_tied_weights_keys = ["lm_head.weight"]
|
| 1179 |
|
| 1180 |
-
def __init__(self, config):
|
| 1181 |
super().__init__(config)
|
| 1182 |
self.transformer = AraGPT2Model(config)
|
| 1183 |
self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
|
|
@@ -1399,7 +1384,7 @@ class AraGPT2DoubleHeadsModel(AraGPT2PreTrainedModel):
|
|
| 1399 |
]
|
| 1400 |
_tied_weights_keys = ["lm_head.weight"]
|
| 1401 |
|
| 1402 |
-
def __init__(self, config):
|
| 1403 |
super().__init__(config)
|
| 1404 |
config.num_labels = 1
|
| 1405 |
self.transformer = AraGPT2Model(config)
|
|
@@ -1653,7 +1638,7 @@ class AraGPT2ForSequenceClassification(AraGPT2PreTrainedModel):
|
|
| 1653 |
]
|
| 1654 |
_keys_to_ignore_on_load_missing = [r"h\.\d+\.attn\.masked_bias", r"lm_head.weight"]
|
| 1655 |
|
| 1656 |
-
def __init__(self, config):
|
| 1657 |
super().__init__(config)
|
| 1658 |
self.num_labels = config.num_labels
|
| 1659 |
self.transformer = AraGPT2Model(config)
|
|
@@ -1789,7 +1774,7 @@ class AraGPT2ForSequenceClassification(AraGPT2PreTrainedModel):
|
|
| 1789 |
AraGPT2_START_DOCSTRING,
|
| 1790 |
)
|
| 1791 |
class AraGPT2ForTokenClassification(AraGPT2PreTrainedModel):
|
| 1792 |
-
def __init__(self, config):
|
| 1793 |
super().__init__(config)
|
| 1794 |
self.num_labels = config.num_labels
|
| 1795 |
|
|
@@ -1890,7 +1875,7 @@ class AraGPT2ForTokenClassification(AraGPT2PreTrainedModel):
|
|
| 1890 |
AraGPT2_START_DOCSTRING,
|
| 1891 |
)
|
| 1892 |
class AraGPT2ForQuestionAnswering(AraGPT2PreTrainedModel):
|
| 1893 |
-
def __init__(self, config):
|
| 1894 |
super().__init__(config)
|
| 1895 |
self.num_labels = config.num_labels
|
| 1896 |
self.transformer = AraGPT2Model(config)
|
|
|
|
| 1 |
# coding=utf-8
|
| 2 |
+
"""PyTorch AraGPT2 model."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
import math
|
| 5 |
import os
|
|
|
|
| 45 |
_CONFIG_FOR_DOC = "AraGPT2Config"
|
| 46 |
_TOKENIZER_FOR_DOC = "GPT2Tokenizer"
|
| 47 |
|
| 48 |
+
ARAGPT2_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
| 49 |
"aubmindlab/aragpt2-mega",
|
| 50 |
+
# See all AraGPT2 models at https://huggingface.co/models?filter=aragpt2
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
]
|
| 52 |
|
| 53 |
+
_ARAGPT2_ML_TF_TO_TORCH = {
|
| 54 |
"LayerNorm_embed_norm": "emb_norm",
|
| 55 |
"pos_embed": "wpe.weight",
|
| 56 |
"word_embed": "wte.weight",
|
|
|
|
| 71 |
"bias": "bias",
|
| 72 |
}
|
| 73 |
|
| 74 |
+
WEIGHTS_NAME = "pytorch_model.bin"
|
| 75 |
+
CONFIG_NAME = "config.json"
|
| 76 |
+
|
| 77 |
|
| 78 |
def convert_gpt2_checkpoint_to_pytorch(
|
| 79 |
+
aragpt2_checkpoint_path, aragpt2_config_file, pytorch_dump_folder_path
|
| 80 |
):
|
| 81 |
# Construct model
|
| 82 |
+
if aragpt2_config_file == "":
|
| 83 |
config = AraGPT2Config()
|
| 84 |
else:
|
| 85 |
+
config = AraGPT2Config.from_json_file(aragpt2_config_file)
|
| 86 |
model = AraGPT2Model(config)
|
| 87 |
|
| 88 |
# Load weights from numpy
|
| 89 |
+
load_tf_weights_in_aragpt2(model, config, aragpt2_checkpoint_path)
|
| 90 |
|
| 91 |
# Save pytorch-model
|
| 92 |
pytorch_weights_dump_path = pytorch_dump_folder_path + "/" + WEIGHTS_NAME
|
|
|
|
| 100 |
|
| 101 |
# XXX: MUST do like: convert_gpt2_checkpoint_to_pytorch('./model.ckpt-100000', './mega.json', './')
|
| 102 |
# https://github.com/tensorflow/models/issues/2675#issuecomment-516595597
|
| 103 |
+
def load_tf_weights_in_aragpt2(model, config, aragpt2_checkpoint_path):
|
| 104 |
"""Load tf checkpoints in a pytorch model"""
|
| 105 |
try:
|
| 106 |
import re
|
|
|
|
| 111 |
"https://www.tensorflow.org/install/ for installation instructions."
|
| 112 |
)
|
| 113 |
raise
|
| 114 |
+
tf_path = os.path.abspath(aragpt2_checkpoint_path)
|
| 115 |
logger.info("Converting TensorFlow checkpoint from {}".format(tf_path))
|
| 116 |
# Load weights from TF model
|
| 117 |
init_vars = tf.train.list_variables(tf_path)
|
|
|
|
| 142 |
|
| 143 |
if sname == "" or sname == "embeddings":
|
| 144 |
continue
|
| 145 |
+
elif sname not in _ARAGPT2_ML_TF_TO_TORCH:
|
| 146 |
print("=========================================================")
|
| 147 |
logger.info("Skip var name {}".format(scope_names))
|
| 148 |
pointer = None
|
| 149 |
break
|
| 150 |
else:
|
| 151 |
+
tname = _ARAGPT2_ML_TF_TO_TORCH[sname]
|
| 152 |
if "." in tname:
|
| 153 |
parent, child = tname.split(".")
|
| 154 |
pointer = getattr(pointer, parent)
|
|
|
|
| 587 |
"""
|
| 588 |
|
| 589 |
config_class = AraGPT2Config
|
| 590 |
+
load_tf_weights = load_tf_weights_in_aragpt2
|
| 591 |
base_model_prefix = "transformer"
|
| 592 |
is_parallelizable = True
|
| 593 |
supports_gradient_checkpointing = True
|
|
|
|
| 813 |
_keys_to_ignore_on_load_unexpected = ["attn.masked_bias"]
|
| 814 |
_keys_to_ignore_on_load_missing = ["attn.masked_bias"]
|
| 815 |
|
| 816 |
+
def __init__(self, config: AraGPT2Config):
|
| 817 |
super().__init__(config)
|
| 818 |
|
| 819 |
self.embed_dim = config.hidden_size
|
|
|
|
| 1162 |
]
|
| 1163 |
_tied_weights_keys = ["lm_head.weight"]
|
| 1164 |
|
| 1165 |
+
def __init__(self, config: AraGPT2Config):
|
| 1166 |
super().__init__(config)
|
| 1167 |
self.transformer = AraGPT2Model(config)
|
| 1168 |
self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
|
|
|
|
| 1384 |
]
|
| 1385 |
_tied_weights_keys = ["lm_head.weight"]
|
| 1386 |
|
| 1387 |
+
def __init__(self, config: AraGPT2Config):
|
| 1388 |
super().__init__(config)
|
| 1389 |
config.num_labels = 1
|
| 1390 |
self.transformer = AraGPT2Model(config)
|
|
|
|
| 1638 |
]
|
| 1639 |
_keys_to_ignore_on_load_missing = [r"h\.\d+\.attn\.masked_bias", r"lm_head.weight"]
|
| 1640 |
|
| 1641 |
+
def __init__(self, config: AraGPT2Config):
|
| 1642 |
super().__init__(config)
|
| 1643 |
self.num_labels = config.num_labels
|
| 1644 |
self.transformer = AraGPT2Model(config)
|
|
|
|
| 1774 |
AraGPT2_START_DOCSTRING,
|
| 1775 |
)
|
| 1776 |
class AraGPT2ForTokenClassification(AraGPT2PreTrainedModel):
|
| 1777 |
+
def __init__(self, config: AraGPT2Config):
|
| 1778 |
super().__init__(config)
|
| 1779 |
self.num_labels = config.num_labels
|
| 1780 |
|
|
|
|
| 1875 |
AraGPT2_START_DOCSTRING,
|
| 1876 |
)
|
| 1877 |
class AraGPT2ForQuestionAnswering(AraGPT2PreTrainedModel):
|
| 1878 |
+
def __init__(self, config: AraGPT2Config):
|
| 1879 |
super().__init__(config)
|
| 1880 |
self.num_labels = config.num_labels
|
| 1881 |
self.transformer = AraGPT2Model(config)
|