guyyanko commited on
Commit
74a45d1
·
1 Parent(s): 365a4cd

Upload 10 files

Browse files
added_tokens.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "[/א1]": 52001,
3
+ "[/א2]": 52003,
4
+ "[א1]": 52000,
5
+ "[א2]": 52002
6
+ }
config.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "EMS1": 52000,
3
+ "EMS2": 52002,
4
+ "ES_ID": 0,
5
+ "_name_or_path": "onlplab/alephbert-base",
6
+ "architecture": "EMP",
7
+ "architectures": [
8
+ "TemporalRelationClassification"
9
+ ],
10
+ "attention_probs_dropout_prob": 0.1,
11
+ "auto_map": {
12
+ "AutoConfig": "temporal_relation_classification_config.TemporalRelationClassificationConfig",
13
+ "AutoModelForSequenceClassification": "temporal_relation_classification.TemporalRelationClassification"
14
+ },
15
+ "base_lm": "onlplab/alephbert-base",
16
+ "classifier_dropout": null,
17
+ "hidden_act": "gelu",
18
+ "hidden_dropout_prob": 0.1,
19
+ "hidden_size": 768,
20
+ "id2label": {
21
+ "0": "BEFORE",
22
+ "1": "AFTER",
23
+ "2": "EQUAL",
24
+ "3": "VAGUE"
25
+ },
26
+ "initializer_range": 0.02,
27
+ "intermediate_size": 3072,
28
+ "label2id": {
29
+ "AFTER": 1,
30
+ "BEFORE": 0,
31
+ "EQUAL": 2,
32
+ "VAGUE": 3
33
+ },
34
+ "layer_norm_eps": 1e-12,
35
+ "max_position_embeddings": 512,
36
+ "model_type": "TemporalRelationClassification",
37
+ "num_attention_heads": 12,
38
+ "num_hidden_layers": 12,
39
+ "pad_token_id": 0,
40
+ "pool_tokens": true,
41
+ "position_embedding_type": "absolute",
42
+ "special_markers": true,
43
+ "tokenizer_class": "BertTokenizerFast",
44
+ "torch_dtype": "float32",
45
+ "transformers_version": "4.26.1",
46
+ "type_vocab_size": 2,
47
+ "use_cache": true,
48
+ "vocab_size": 52004
49
+ }
evaluation_report.txt ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ precision recall f1-score support
2
+
3
+ BEFORE 0.72 0.73 0.73 674
4
+ AFTER 0.61 0.77 0.68 437
5
+ EQUAL 0.37 0.37 0.37 106
6
+ VAGUE 0.42 0.23 0.29 268
7
+
8
+ accuracy 0.62 1485
9
+ macro avg 0.53 0.52 0.52 1485
10
+ weighted avg 0.61 0.62 0.61 1485
11
+
12
+ precision recall f1-score support
13
+
14
+ BEFORE 0.87 0.76 0.81 772
15
+ AFTER 0.79 0.81 0.80 536
16
+ EQUAL 0.46 0.42 0.44 116
17
+ VAGUE 0.42 1.00 0.59 61
18
+
19
+ accuracy 0.76 1485
20
+ macro avg 0.63 0.75 0.66 1485
21
+ weighted avg 0.79 0.76 0.77 1485
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdd49e993a971a07961c0768a7a722d1798a23fa2eb95051b6f7ba60253ba261
3
+ size 527633677
special_tokens_map.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "[א1]",
4
+ "[/א1]",
5
+ "[א2]",
6
+ "[/א2]"
7
+ ],
8
+ "cls_token": "[CLS]",
9
+ "mask_token": "[MASK]",
10
+ "pad_token": "[PAD]",
11
+ "sep_token": "[SEP]",
12
+ "unk_token": "[UNK]"
13
+ }
temporal_relation_classification.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Optional, Tuple, Union
2
+
3
+ import torch
4
+ from torch import nn
5
+ from torch.nn import CrossEntropyLoss, BCEWithLogitsLoss, MSELoss
6
+ from transformers import BertPreTrainedModel, BertModel, BertForSequenceClassification
7
+ from transformers.modeling_outputs import SequenceClassifierOutput
8
+
9
+ from trc_model.temporal_relation_classification_config import TemporalRelationClassificationConfig
10
+
11
+
12
+ class TokenPooler(nn.Module):
13
+ def __init__(self, config):
14
+ super().__init__()
15
+ self.dense = nn.Linear(config.hidden_size, config.hidden_size)
16
+ self.activation = nn.Tanh()
17
+
18
+ def forward(self, token_tensor: torch.Tensor) -> torch.Tensor:
19
+ # We "pool" the model by simply taking the hidden state corresponding
20
+ # to the first token.
21
+ pooled_output = self.dense(token_tensor)
22
+ pooled_output = self.activation(pooled_output)
23
+ return pooled_output
24
+
25
+
26
+ class TemporalRelationClassification(BertForSequenceClassification):
27
+ config_class = TemporalRelationClassificationConfig
28
+
29
+ def __init__(self, config):
30
+ super().__init__(config)
31
+ self.num_labels = config.num_labels
32
+ self.special_markers = config.special_markers
33
+ self.pool_tokens = config.pool_tokens
34
+ self.ES_ID = config.ES_ID
35
+ self.EMS1 = config.EMS1
36
+ self.EMS2 = config.EMS2
37
+ self.architecture = config.architecture
38
+ self.config = config
39
+
40
+ self.bert = BertModel.from_pretrained(config.base_lm)
41
+ if self.bert.config.vocab_size != config.vocab_size:
42
+ self.bert.resize_token_embeddings(config.vocab_size)
43
+
44
+ classifier_dropout = (
45
+ config.classifier_dropout if config.classifier_dropout is not None else config.hidden_dropout_prob
46
+ )
47
+ if config.pool_tokens:
48
+ self.ems_1_pooler = TokenPooler(config)
49
+ self.ems_2_pooler = TokenPooler(config)
50
+ self.e_1_pooler = TokenPooler(config)
51
+ self.e_2_pooler = TokenPooler(config)
52
+
53
+ self.dropout = nn.Dropout(classifier_dropout)
54
+
55
+ self.classification_layers = None
56
+ if self.architecture == 'SEQ_CLS':
57
+ self.classification_layers = nn.Sequential(
58
+ nn.Linear(config.hidden_size, config.num_labels)
59
+ )
60
+ if self.architecture == 'EMP':
61
+ self.e_1_linear = nn.Linear(config.hidden_size * 2, config.hidden_size)
62
+ self.e_2_linear = nn.Linear(config.hidden_size * 2, config.hidden_size)
63
+
64
+ if self.architecture in ['ESS', 'EF', 'EMP']:
65
+ self.classification_layers = nn.Sequential(
66
+ nn.Linear(config.hidden_size * 2, config.hidden_size),
67
+ nn.Linear(config.hidden_size, config.num_labels)
68
+ )
69
+
70
+ # Initialize weights and apply final processing
71
+ # self.post_init()
72
+
73
+ def _get_entities_and_start_markers_indices(self, input_ids):
74
+ if not self.special_markers:
75
+ event_1_start, event_2_start = torch.tensor(
76
+ [(ids == self.ES_ID).nonzero().squeeze().tolist() for ids in input_ids]).T
77
+ return event_1_start, event_1_start + 1, event_2_start, event_2_start + 1
78
+
79
+ em1_s = torch.tensor([(ids == self.EMS1).nonzero().item() for ids in input_ids], device=self.device)
80
+ entity_1 = em1_s + 1
81
+
82
+ em2_s = torch.tensor([(ids == self.EMS2).nonzero().item() for ids in input_ids], device=self.device)
83
+ entity_2 = em2_s + 1
84
+ return em1_s, entity_1, em2_s, entity_2
85
+
86
+ def forward(
87
+ self,
88
+ input_ids: Optional[torch.Tensor] = None,
89
+ attention_mask: Optional[torch.Tensor] = None,
90
+ token_type_ids: Optional[torch.Tensor] = None,
91
+ position_ids: Optional[torch.Tensor] = None,
92
+ head_mask: Optional[torch.Tensor] = None,
93
+ inputs_embeds: Optional[torch.Tensor] = None,
94
+ labels: Optional[torch.Tensor] = None,
95
+ output_attentions: Optional[bool] = None,
96
+ output_hidden_states: Optional[bool] = None,
97
+ return_dict: Optional[bool] = None,
98
+ ) -> Union[Tuple[torch.Tensor], SequenceClassifierOutput]:
99
+ r"""
100
+ labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
101
+ Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
102
+ config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
103
+ `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
104
+ """
105
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
106
+
107
+ outputs = self.bert(
108
+ input_ids,
109
+ attention_mask=attention_mask,
110
+ token_type_ids=token_type_ids,
111
+ position_ids=position_ids,
112
+ head_mask=head_mask,
113
+ inputs_embeds=inputs_embeds,
114
+ output_attentions=output_attentions,
115
+ output_hidden_states=output_hidden_states,
116
+ return_dict=return_dict,
117
+ )
118
+
119
+ logits = None
120
+ if self.architecture == 'SEQ_CLS':
121
+ pooled_output = outputs[1]
122
+
123
+ pooled_output = self.dropout(pooled_output)
124
+ logits = self.classification_layers(pooled_output)
125
+ else:
126
+ sequence_output = outputs[0]
127
+
128
+ sequence_output = self.dropout(sequence_output)
129
+
130
+ entity_mark_1_s, entity_1, entity_mark_2_s, entity_2 = self._get_entities_and_start_markers_indices(
131
+ input_ids)
132
+
133
+ e1_start_mark_tensors = sequence_output[torch.arange(sequence_output.size(0)), entity_mark_1_s]
134
+ e2_start_mark_tensors = sequence_output[torch.arange(sequence_output.size(0)), entity_mark_2_s]
135
+
136
+ e1_tensor = sequence_output[torch.arange(sequence_output.size(0)), entity_1]
137
+ e2_tensor = sequence_output[torch.arange(sequence_output.size(0)), entity_2]
138
+
139
+ if self.pool_tokens:
140
+ e1_start_mark_tensors = self.ems_1_pooler(e1_start_mark_tensors)
141
+ e2_start_mark_tensors = self.ems_2_pooler(e2_start_mark_tensors)
142
+
143
+ e1_tensor = self.e_1_pooler(e1_tensor)
144
+ e2_tensor = self.e_2_pooler(e2_tensor)
145
+
146
+ if self.architecture == 'ESS':
147
+ e_start_markers_cat = torch.cat((e1_start_mark_tensors, e2_start_mark_tensors), 1)
148
+ logits = self.classification_layers(e_start_markers_cat)
149
+
150
+ if self.architecture == 'EF':
151
+ events_cat = torch.cat((e1_tensor, e2_tensor), 1)
152
+ logits = self.classification_layers(events_cat)
153
+
154
+ if self.architecture == 'EMP':
155
+ e1_and_start_mark = self.e_1_linear(torch.cat((e1_start_mark_tensors, e1_tensor), 1))
156
+ e2_and_start_mark = self.e_2_linear(torch.cat((e2_start_mark_tensors, e2_tensor), 1))
157
+ both_e_cat = torch.cat((e1_and_start_mark, e2_and_start_mark), 1)
158
+ logits = self.classification_layers(both_e_cat)
159
+
160
+ loss = None
161
+ if labels is not None:
162
+ loss_fct = CrossEntropyLoss()
163
+ loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
164
+
165
+ if not return_dict:
166
+ output = (logits,) + outputs[2:]
167
+ return ((loss,) + output) if loss is not None else output
168
+
169
+ return SequenceClassifierOutput(
170
+ loss=loss,
171
+ logits=logits,
172
+ hidden_states=outputs.hidden_states,
173
+ attentions=outputs.attentions,
174
+ )
temporal_relation_classification_config.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import PretrainedConfig, BertConfig
2
+
3
+
4
+ class TemporalRelationClassificationConfig(BertConfig):
5
+ model_type = "TemporalRelationClassification"
6
+
7
+ def __init__(self, special_markers=False, pool_tokens=False, ES_ID=0, EMS1=0, EMS2=0, architecture=0, **kwargs):
8
+ super().__init__(**kwargs)
9
+ self.base_lm = self.name_or_path
10
+ self.pool_tokens = pool_tokens
11
+ self.special_markers = special_markers
12
+ self.architecture = architecture
13
+ self.EMS1 = EMS1
14
+ self.EMS2 = EMS2
15
+ self.ES_ID = ES_ID
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "do_basic_tokenize": true,
4
+ "do_lower_case": true,
5
+ "mask_token": "[MASK]",
6
+ "max_len": 512,
7
+ "model_max_length": 512,
8
+ "name_or_path": "onlplab/alephbert-base",
9
+ "never_split": null,
10
+ "pad_token": "[PAD]",
11
+ "sep_token": "[SEP]",
12
+ "special_tokens_map_file": null,
13
+ "strip_accents": null,
14
+ "tokenize_chinese_chars": true,
15
+ "tokenizer_class": "BertTokenizer",
16
+ "unk_token": "[UNK]"
17
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21ac59d2913711397a63f441cc16d3162d1b0218c47185750d13da6a5dde2800
3
+ size 3567