Upload model
Browse files- config.json +0 -1
- configuration_multiheadcrf.py +0 -2
- model.safetensors +2 -2
- modeling_multiheadcrf.py +19 -11
config.json
CHANGED
|
@@ -48,7 +48,6 @@
|
|
| 48 |
"num_attention_heads": 12,
|
| 49 |
"num_hidden_layers": 12,
|
| 50 |
"number_of_layer_per_head": 1,
|
| 51 |
-
"p_augmentation": 0.5,
|
| 52 |
"pad_token_id": 1,
|
| 53 |
"percentage_tags": 0.25,
|
| 54 |
"position_embedding_type": "absolute",
|
|
|
|
| 48 |
"num_attention_heads": 12,
|
| 49 |
"num_hidden_layers": 12,
|
| 50 |
"number_of_layer_per_head": 1,
|
|
|
|
| 51 |
"pad_token_id": 1,
|
| 52 |
"percentage_tags": 0.25,
|
| 53 |
"position_embedding_type": "absolute",
|
configuration_multiheadcrf.py
CHANGED
|
@@ -13,7 +13,6 @@ class MultiHeadCRFConfig(PretrainedConfig):
|
|
| 13 |
augmentation = "random",
|
| 14 |
context_size = 64,
|
| 15 |
percentage_tags = 0.2,
|
| 16 |
-
p_augmentation = 0.5,
|
| 17 |
aug_prob = 0.5,
|
| 18 |
crf_reduction = "mean",
|
| 19 |
freeze = False,
|
|
@@ -26,7 +25,6 @@ class MultiHeadCRFConfig(PretrainedConfig):
|
|
| 26 |
self.augmentation = augmentation
|
| 27 |
self.context_size = context_size
|
| 28 |
self.percentage_tags = percentage_tags
|
| 29 |
-
self.p_augmentation = p_augmentation
|
| 30 |
self.aug_prob = aug_prob,
|
| 31 |
self.crf_reduction = crf_reduction
|
| 32 |
self.freeze=freeze
|
|
|
|
| 13 |
augmentation = "random",
|
| 14 |
context_size = 64,
|
| 15 |
percentage_tags = 0.2,
|
|
|
|
| 16 |
aug_prob = 0.5,
|
| 17 |
crf_reduction = "mean",
|
| 18 |
freeze = False,
|
|
|
|
| 25 |
self.augmentation = augmentation
|
| 26 |
self.context_size = context_size
|
| 27 |
self.percentage_tags = percentage_tags
|
|
|
|
| 28 |
self.aug_prob = aug_prob,
|
| 29 |
self.crf_reduction = crf_reduction
|
| 30 |
self.freeze=freeze
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:37dd4fda285a360ba9dae043072640394db34629a40097e48bc182a061002f3d
|
| 3 |
+
size 508096224
|
modeling_multiheadcrf.py
CHANGED
|
@@ -12,9 +12,10 @@ NUM_PER_LAYER = 16
|
|
| 12 |
|
| 13 |
class RobertaMultiHeadCRFModel(PreTrainedModel):
|
| 14 |
config_class = MultiHeadCRFConfig
|
| 15 |
-
|
|
|
|
| 16 |
_keys_to_ignore_on_load_unexpected = [r"pooler"]
|
| 17 |
-
|
| 18 |
def __init__(self, config):
|
| 19 |
super().__init__(config)
|
| 20 |
self.num_labels = config.num_labels
|
|
@@ -24,7 +25,10 @@ class RobertaMultiHeadCRFModel(PreTrainedModel):
|
|
| 24 |
self.heads = config.classes #expected an array of classes we are predicting
|
| 25 |
|
| 26 |
# this can be BERT ROBERTA and other BERT-variants
|
| 27 |
-
|
|
|
|
|
|
|
|
|
|
| 28 |
#AutoModel(config, add_pooling_layer=False)
|
| 29 |
#AutoModel.from_pretrained(config._name_or_path, config=config, add_pooling_layer=False)
|
| 30 |
self.dropout = nn.Dropout(config.hidden_dropout_prob)
|
|
@@ -43,20 +47,23 @@ class RobertaMultiHeadCRFModel(PreTrainedModel):
|
|
| 43 |
self.manage_freezing()
|
| 44 |
|
| 45 |
def training_mode(self):
|
|
|
|
| 46 |
# for some reason these layers are not being correctly init
|
| 47 |
# probably related with the lifecycle of the hf .from_pretrained method
|
| 48 |
-
self.
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
|
|
|
|
|
|
| 52 |
|
| 53 |
def manage_freezing(self):
|
| 54 |
-
for _, param in self.
|
| 55 |
param.requires_grad = False
|
| 56 |
|
| 57 |
num_encoders_to_freeze = self.config.num_frozen_encoder
|
| 58 |
if num_encoders_to_freeze > 0:
|
| 59 |
-
for _, param in islice(self.
|
| 60 |
param.requires_grad = False
|
| 61 |
|
| 62 |
|
|
@@ -75,7 +82,7 @@ class RobertaMultiHeadCRFModel(PreTrainedModel):
|
|
| 75 |
# Default `model.config.use_return_dict´ is `True´
|
| 76 |
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
| 77 |
|
| 78 |
-
outputs = self.
|
| 79 |
attention_mask=attention_mask,
|
| 80 |
token_type_ids=token_type_ids,
|
| 81 |
position_ids=position_ids,
|
|
@@ -119,7 +126,8 @@ class RobertaMultiHeadCRFModel(PreTrainedModel):
|
|
| 119 |
|
| 120 |
class BertMultiHeadCRFModel(RobertaMultiHeadCRFModel):
|
| 121 |
config_class = MultiHeadCRFConfig
|
| 122 |
-
|
|
|
|
| 123 |
_keys_to_ignore_on_load_unexpected = [r"pooler"]
|
| 124 |
|
| 125 |
# Taken from https://github.com/kmkurn/pytorch-crf/blob/master/torchcrf/__init__.py and fixed got uint8 warning
|
|
|
|
| 12 |
|
| 13 |
class RobertaMultiHeadCRFModel(PreTrainedModel):
|
| 14 |
config_class = MultiHeadCRFConfig
|
| 15 |
+
transformers_backbone_name = "roberta"
|
| 16 |
+
transformers_backbone_class = RobertaModel
|
| 17 |
_keys_to_ignore_on_load_unexpected = [r"pooler"]
|
| 18 |
+
|
| 19 |
def __init__(self, config):
|
| 20 |
super().__init__(config)
|
| 21 |
self.num_labels = config.num_labels
|
|
|
|
| 25 |
self.heads = config.classes #expected an array of classes we are predicting
|
| 26 |
|
| 27 |
# this can be BERT ROBERTA and other BERT-variants
|
| 28 |
+
# THIS IS BC HF needs to have "roberta" for roberta models and "bert" for BERT models as var so tha I can load
|
| 29 |
+
# check https://github.com/huggingface/transformers/blob/b487096b02307cd6e0f132b676cdcc7255fe8e74/src/transformers/models/roberta/modeling_roberta.py#L1170C16-L1170C20
|
| 30 |
+
setattr(self, self.transformers_backbone_name, self.transformers_backbone_class(config, add_pooling_layer=False))
|
| 31 |
+
#self.roberta = self.transformer_backbone_class(config, add_pooling_layer=False)
|
| 32 |
#AutoModel(config, add_pooling_layer=False)
|
| 33 |
#AutoModel.from_pretrained(config._name_or_path, config=config, add_pooling_layer=False)
|
| 34 |
self.dropout = nn.Dropout(config.hidden_dropout_prob)
|
|
|
|
| 47 |
self.manage_freezing()
|
| 48 |
|
| 49 |
def training_mode(self):
|
| 50 |
+
|
| 51 |
# for some reason these layers are not being correctly init
|
| 52 |
# probably related with the lifecycle of the hf .from_pretrained method
|
| 53 |
+
for ent in self.heads:
|
| 54 |
+
for i in range(self.number_of_layer_per_head):
|
| 55 |
+
getattr(self, f"{ent}_dense_{i}").reset_parameters()
|
| 56 |
+
getattr(self, f"{ent}_classifier").reset_parameters()
|
| 57 |
+
getattr(self, f"{ent}_crf").reset_parameters()
|
| 58 |
+
getattr(self, f"{ent}_crf").mask_impossible_transitions()
|
| 59 |
|
| 60 |
def manage_freezing(self):
|
| 61 |
+
for _, param in getattr(self, self.transformers_backbone_name).embeddings.named_parameters():
|
| 62 |
param.requires_grad = False
|
| 63 |
|
| 64 |
num_encoders_to_freeze = self.config.num_frozen_encoder
|
| 65 |
if num_encoders_to_freeze > 0:
|
| 66 |
+
for _, param in islice(getattr(self, self.transformers_backbone_name).encoder.named_parameters(), num_encoders_to_freeze*NUM_PER_LAYER):
|
| 67 |
param.requires_grad = False
|
| 68 |
|
| 69 |
|
|
|
|
| 82 |
# Default `model.config.use_return_dict´ is `True´
|
| 83 |
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
| 84 |
|
| 85 |
+
outputs = getattr(self, self.transformers_backbone_name)(input_ids,
|
| 86 |
attention_mask=attention_mask,
|
| 87 |
token_type_ids=token_type_ids,
|
| 88 |
position_ids=position_ids,
|
|
|
|
| 126 |
|
| 127 |
class BertMultiHeadCRFModel(RobertaMultiHeadCRFModel):
|
| 128 |
config_class = MultiHeadCRFConfig
|
| 129 |
+
transformers_backbone_name = "bert"
|
| 130 |
+
transformers_backbone_class = BertModel
|
| 131 |
_keys_to_ignore_on_load_unexpected = [r"pooler"]
|
| 132 |
|
| 133 |
# Taken from https://github.com/kmkurn/pytorch-crf/blob/master/torchcrf/__init__.py and fixed got uint8 warning
|