Upload 10 files
Browse files- added_tokens.json +6 -0
- config.json +49 -0
- evaluation_report.txt +21 -0
- pytorch_model.bin +3 -0
- special_tokens_map.json +13 -0
- temporal_relation_classification.py +174 -0
- temporal_relation_classification_config.py +15 -0
- tokenizer.json +0 -0
- tokenizer_config.json +17 -0
- training_args.bin +3 -0
added_tokens.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"[/א1]": 52001,
|
| 3 |
+
"[/א2]": 52003,
|
| 4 |
+
"[א1]": 52000,
|
| 5 |
+
"[א2]": 52002
|
| 6 |
+
}
|
config.json
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"EMS1": 52000,
|
| 3 |
+
"EMS2": 52002,
|
| 4 |
+
"ES_ID": 0,
|
| 5 |
+
"_name_or_path": "onlplab/alephbert-base",
|
| 6 |
+
"architecture": "EMP",
|
| 7 |
+
"architectures": [
|
| 8 |
+
"TemporalRelationClassification"
|
| 9 |
+
],
|
| 10 |
+
"attention_probs_dropout_prob": 0.1,
|
| 11 |
+
"auto_map": {
|
| 12 |
+
"AutoConfig": "temporal_relation_classification_config.TemporalRelationClassificationConfig",
|
| 13 |
+
"AutoModelForSequenceClassification": "temporal_relation_classification.TemporalRelationClassification"
|
| 14 |
+
},
|
| 15 |
+
"base_lm": "onlplab/alephbert-base",
|
| 16 |
+
"classifier_dropout": null,
|
| 17 |
+
"hidden_act": "gelu",
|
| 18 |
+
"hidden_dropout_prob": 0.1,
|
| 19 |
+
"hidden_size": 768,
|
| 20 |
+
"id2label": {
|
| 21 |
+
"0": "BEFORE",
|
| 22 |
+
"1": "AFTER",
|
| 23 |
+
"2": "EQUAL",
|
| 24 |
+
"3": "VAGUE"
|
| 25 |
+
},
|
| 26 |
+
"initializer_range": 0.02,
|
| 27 |
+
"intermediate_size": 3072,
|
| 28 |
+
"label2id": {
|
| 29 |
+
"AFTER": 1,
|
| 30 |
+
"BEFORE": 0,
|
| 31 |
+
"EQUAL": 2,
|
| 32 |
+
"VAGUE": 3
|
| 33 |
+
},
|
| 34 |
+
"layer_norm_eps": 1e-12,
|
| 35 |
+
"max_position_embeddings": 512,
|
| 36 |
+
"model_type": "TemporalRelationClassification",
|
| 37 |
+
"num_attention_heads": 12,
|
| 38 |
+
"num_hidden_layers": 12,
|
| 39 |
+
"pad_token_id": 0,
|
| 40 |
+
"pool_tokens": true,
|
| 41 |
+
"position_embedding_type": "absolute",
|
| 42 |
+
"special_markers": true,
|
| 43 |
+
"tokenizer_class": "BertTokenizerFast",
|
| 44 |
+
"torch_dtype": "float32",
|
| 45 |
+
"transformers_version": "4.26.1",
|
| 46 |
+
"type_vocab_size": 2,
|
| 47 |
+
"use_cache": true,
|
| 48 |
+
"vocab_size": 52004
|
| 49 |
+
}
|
evaluation_report.txt
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
precision recall f1-score support
|
| 2 |
+
|
| 3 |
+
BEFORE 0.72 0.73 0.73 674
|
| 4 |
+
AFTER 0.61 0.77 0.68 437
|
| 5 |
+
EQUAL 0.37 0.37 0.37 106
|
| 6 |
+
VAGUE 0.42 0.23 0.29 268
|
| 7 |
+
|
| 8 |
+
accuracy 0.62 1485
|
| 9 |
+
macro avg 0.53 0.52 0.52 1485
|
| 10 |
+
weighted avg 0.61 0.62 0.61 1485
|
| 11 |
+
|
| 12 |
+
precision recall f1-score support
|
| 13 |
+
|
| 14 |
+
BEFORE 0.87 0.76 0.81 772
|
| 15 |
+
AFTER 0.79 0.81 0.80 536
|
| 16 |
+
EQUAL 0.46 0.42 0.44 116
|
| 17 |
+
VAGUE 0.42 1.00 0.59 61
|
| 18 |
+
|
| 19 |
+
accuracy 0.76 1485
|
| 20 |
+
macro avg 0.63 0.75 0.66 1485
|
| 21 |
+
weighted avg 0.79 0.76 0.77 1485
|
pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bdd49e993a971a07961c0768a7a722d1798a23fa2eb95051b6f7ba60253ba261
|
| 3 |
+
size 527633677
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
"[א1]",
|
| 4 |
+
"[/א1]",
|
| 5 |
+
"[א2]",
|
| 6 |
+
"[/א2]"
|
| 7 |
+
],
|
| 8 |
+
"cls_token": "[CLS]",
|
| 9 |
+
"mask_token": "[MASK]",
|
| 10 |
+
"pad_token": "[PAD]",
|
| 11 |
+
"sep_token": "[SEP]",
|
| 12 |
+
"unk_token": "[UNK]"
|
| 13 |
+
}
|
temporal_relation_classification.py
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Optional, Tuple, Union
|
| 2 |
+
|
| 3 |
+
import torch
|
| 4 |
+
from torch import nn
|
| 5 |
+
from torch.nn import CrossEntropyLoss, BCEWithLogitsLoss, MSELoss
|
| 6 |
+
from transformers import BertPreTrainedModel, BertModel, BertForSequenceClassification
|
| 7 |
+
from transformers.modeling_outputs import SequenceClassifierOutput
|
| 8 |
+
|
| 9 |
+
from trc_model.temporal_relation_classification_config import TemporalRelationClassificationConfig
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class TokenPooler(nn.Module):
|
| 13 |
+
def __init__(self, config):
|
| 14 |
+
super().__init__()
|
| 15 |
+
self.dense = nn.Linear(config.hidden_size, config.hidden_size)
|
| 16 |
+
self.activation = nn.Tanh()
|
| 17 |
+
|
| 18 |
+
def forward(self, token_tensor: torch.Tensor) -> torch.Tensor:
|
| 19 |
+
# We "pool" the model by simply taking the hidden state corresponding
|
| 20 |
+
# to the first token.
|
| 21 |
+
pooled_output = self.dense(token_tensor)
|
| 22 |
+
pooled_output = self.activation(pooled_output)
|
| 23 |
+
return pooled_output
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class TemporalRelationClassification(BertForSequenceClassification):
|
| 27 |
+
config_class = TemporalRelationClassificationConfig
|
| 28 |
+
|
| 29 |
+
def __init__(self, config):
|
| 30 |
+
super().__init__(config)
|
| 31 |
+
self.num_labels = config.num_labels
|
| 32 |
+
self.special_markers = config.special_markers
|
| 33 |
+
self.pool_tokens = config.pool_tokens
|
| 34 |
+
self.ES_ID = config.ES_ID
|
| 35 |
+
self.EMS1 = config.EMS1
|
| 36 |
+
self.EMS2 = config.EMS2
|
| 37 |
+
self.architecture = config.architecture
|
| 38 |
+
self.config = config
|
| 39 |
+
|
| 40 |
+
self.bert = BertModel.from_pretrained(config.base_lm)
|
| 41 |
+
if self.bert.config.vocab_size != config.vocab_size:
|
| 42 |
+
self.bert.resize_token_embeddings(config.vocab_size)
|
| 43 |
+
|
| 44 |
+
classifier_dropout = (
|
| 45 |
+
config.classifier_dropout if config.classifier_dropout is not None else config.hidden_dropout_prob
|
| 46 |
+
)
|
| 47 |
+
if config.pool_tokens:
|
| 48 |
+
self.ems_1_pooler = TokenPooler(config)
|
| 49 |
+
self.ems_2_pooler = TokenPooler(config)
|
| 50 |
+
self.e_1_pooler = TokenPooler(config)
|
| 51 |
+
self.e_2_pooler = TokenPooler(config)
|
| 52 |
+
|
| 53 |
+
self.dropout = nn.Dropout(classifier_dropout)
|
| 54 |
+
|
| 55 |
+
self.classification_layers = None
|
| 56 |
+
if self.architecture == 'SEQ_CLS':
|
| 57 |
+
self.classification_layers = nn.Sequential(
|
| 58 |
+
nn.Linear(config.hidden_size, config.num_labels)
|
| 59 |
+
)
|
| 60 |
+
if self.architecture == 'EMP':
|
| 61 |
+
self.e_1_linear = nn.Linear(config.hidden_size * 2, config.hidden_size)
|
| 62 |
+
self.e_2_linear = nn.Linear(config.hidden_size * 2, config.hidden_size)
|
| 63 |
+
|
| 64 |
+
if self.architecture in ['ESS', 'EF', 'EMP']:
|
| 65 |
+
self.classification_layers = nn.Sequential(
|
| 66 |
+
nn.Linear(config.hidden_size * 2, config.hidden_size),
|
| 67 |
+
nn.Linear(config.hidden_size, config.num_labels)
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
# Initialize weights and apply final processing
|
| 71 |
+
# self.post_init()
|
| 72 |
+
|
| 73 |
+
def _get_entities_and_start_markers_indices(self, input_ids):
|
| 74 |
+
if not self.special_markers:
|
| 75 |
+
event_1_start, event_2_start = torch.tensor(
|
| 76 |
+
[(ids == self.ES_ID).nonzero().squeeze().tolist() for ids in input_ids]).T
|
| 77 |
+
return event_1_start, event_1_start + 1, event_2_start, event_2_start + 1
|
| 78 |
+
|
| 79 |
+
em1_s = torch.tensor([(ids == self.EMS1).nonzero().item() for ids in input_ids], device=self.device)
|
| 80 |
+
entity_1 = em1_s + 1
|
| 81 |
+
|
| 82 |
+
em2_s = torch.tensor([(ids == self.EMS2).nonzero().item() for ids in input_ids], device=self.device)
|
| 83 |
+
entity_2 = em2_s + 1
|
| 84 |
+
return em1_s, entity_1, em2_s, entity_2
|
| 85 |
+
|
| 86 |
+
def forward(
|
| 87 |
+
self,
|
| 88 |
+
input_ids: Optional[torch.Tensor] = None,
|
| 89 |
+
attention_mask: Optional[torch.Tensor] = None,
|
| 90 |
+
token_type_ids: Optional[torch.Tensor] = None,
|
| 91 |
+
position_ids: Optional[torch.Tensor] = None,
|
| 92 |
+
head_mask: Optional[torch.Tensor] = None,
|
| 93 |
+
inputs_embeds: Optional[torch.Tensor] = None,
|
| 94 |
+
labels: Optional[torch.Tensor] = None,
|
| 95 |
+
output_attentions: Optional[bool] = None,
|
| 96 |
+
output_hidden_states: Optional[bool] = None,
|
| 97 |
+
return_dict: Optional[bool] = None,
|
| 98 |
+
) -> Union[Tuple[torch.Tensor], SequenceClassifierOutput]:
|
| 99 |
+
r"""
|
| 100 |
+
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
|
| 101 |
+
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
|
| 102 |
+
config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
|
| 103 |
+
`config.num_labels > 1` a classification loss is computed (Cross-Entropy).
|
| 104 |
+
"""
|
| 105 |
+
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
| 106 |
+
|
| 107 |
+
outputs = self.bert(
|
| 108 |
+
input_ids,
|
| 109 |
+
attention_mask=attention_mask,
|
| 110 |
+
token_type_ids=token_type_ids,
|
| 111 |
+
position_ids=position_ids,
|
| 112 |
+
head_mask=head_mask,
|
| 113 |
+
inputs_embeds=inputs_embeds,
|
| 114 |
+
output_attentions=output_attentions,
|
| 115 |
+
output_hidden_states=output_hidden_states,
|
| 116 |
+
return_dict=return_dict,
|
| 117 |
+
)
|
| 118 |
+
|
| 119 |
+
logits = None
|
| 120 |
+
if self.architecture == 'SEQ_CLS':
|
| 121 |
+
pooled_output = outputs[1]
|
| 122 |
+
|
| 123 |
+
pooled_output = self.dropout(pooled_output)
|
| 124 |
+
logits = self.classification_layers(pooled_output)
|
| 125 |
+
else:
|
| 126 |
+
sequence_output = outputs[0]
|
| 127 |
+
|
| 128 |
+
sequence_output = self.dropout(sequence_output)
|
| 129 |
+
|
| 130 |
+
entity_mark_1_s, entity_1, entity_mark_2_s, entity_2 = self._get_entities_and_start_markers_indices(
|
| 131 |
+
input_ids)
|
| 132 |
+
|
| 133 |
+
e1_start_mark_tensors = sequence_output[torch.arange(sequence_output.size(0)), entity_mark_1_s]
|
| 134 |
+
e2_start_mark_tensors = sequence_output[torch.arange(sequence_output.size(0)), entity_mark_2_s]
|
| 135 |
+
|
| 136 |
+
e1_tensor = sequence_output[torch.arange(sequence_output.size(0)), entity_1]
|
| 137 |
+
e2_tensor = sequence_output[torch.arange(sequence_output.size(0)), entity_2]
|
| 138 |
+
|
| 139 |
+
if self.pool_tokens:
|
| 140 |
+
e1_start_mark_tensors = self.ems_1_pooler(e1_start_mark_tensors)
|
| 141 |
+
e2_start_mark_tensors = self.ems_2_pooler(e2_start_mark_tensors)
|
| 142 |
+
|
| 143 |
+
e1_tensor = self.e_1_pooler(e1_tensor)
|
| 144 |
+
e2_tensor = self.e_2_pooler(e2_tensor)
|
| 145 |
+
|
| 146 |
+
if self.architecture == 'ESS':
|
| 147 |
+
e_start_markers_cat = torch.cat((e1_start_mark_tensors, e2_start_mark_tensors), 1)
|
| 148 |
+
logits = self.classification_layers(e_start_markers_cat)
|
| 149 |
+
|
| 150 |
+
if self.architecture == 'EF':
|
| 151 |
+
events_cat = torch.cat((e1_tensor, e2_tensor), 1)
|
| 152 |
+
logits = self.classification_layers(events_cat)
|
| 153 |
+
|
| 154 |
+
if self.architecture == 'EMP':
|
| 155 |
+
e1_and_start_mark = self.e_1_linear(torch.cat((e1_start_mark_tensors, e1_tensor), 1))
|
| 156 |
+
e2_and_start_mark = self.e_2_linear(torch.cat((e2_start_mark_tensors, e2_tensor), 1))
|
| 157 |
+
both_e_cat = torch.cat((e1_and_start_mark, e2_and_start_mark), 1)
|
| 158 |
+
logits = self.classification_layers(both_e_cat)
|
| 159 |
+
|
| 160 |
+
loss = None
|
| 161 |
+
if labels is not None:
|
| 162 |
+
loss_fct = CrossEntropyLoss()
|
| 163 |
+
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
| 164 |
+
|
| 165 |
+
if not return_dict:
|
| 166 |
+
output = (logits,) + outputs[2:]
|
| 167 |
+
return ((loss,) + output) if loss is not None else output
|
| 168 |
+
|
| 169 |
+
return SequenceClassifierOutput(
|
| 170 |
+
loss=loss,
|
| 171 |
+
logits=logits,
|
| 172 |
+
hidden_states=outputs.hidden_states,
|
| 173 |
+
attentions=outputs.attentions,
|
| 174 |
+
)
|
temporal_relation_classification_config.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import PretrainedConfig, BertConfig
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class TemporalRelationClassificationConfig(BertConfig):
|
| 5 |
+
model_type = "TemporalRelationClassification"
|
| 6 |
+
|
| 7 |
+
def __init__(self, special_markers=False, pool_tokens=False, ES_ID=0, EMS1=0, EMS2=0, architecture=0, **kwargs):
|
| 8 |
+
super().__init__(**kwargs)
|
| 9 |
+
self.base_lm = self.name_or_path
|
| 10 |
+
self.pool_tokens = pool_tokens
|
| 11 |
+
self.special_markers = special_markers
|
| 12 |
+
self.architecture = architecture
|
| 13 |
+
self.EMS1 = EMS1
|
| 14 |
+
self.EMS2 = EMS2
|
| 15 |
+
self.ES_ID = ES_ID
|
tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cls_token": "[CLS]",
|
| 3 |
+
"do_basic_tokenize": true,
|
| 4 |
+
"do_lower_case": true,
|
| 5 |
+
"mask_token": "[MASK]",
|
| 6 |
+
"max_len": 512,
|
| 7 |
+
"model_max_length": 512,
|
| 8 |
+
"name_or_path": "onlplab/alephbert-base",
|
| 9 |
+
"never_split": null,
|
| 10 |
+
"pad_token": "[PAD]",
|
| 11 |
+
"sep_token": "[SEP]",
|
| 12 |
+
"special_tokens_map_file": null,
|
| 13 |
+
"strip_accents": null,
|
| 14 |
+
"tokenize_chinese_chars": true,
|
| 15 |
+
"tokenizer_class": "BertTokenizer",
|
| 16 |
+
"unk_token": "[UNK]"
|
| 17 |
+
}
|
training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:21ac59d2913711397a63f441cc16d3162d1b0218c47185750d13da6a5dde2800
|
| 3 |
+
size 3567
|