katrjohn commited on
Commit
327aa65
Β·
verified Β·
1 Parent(s): 2f86a47

Upload 4 files

Browse files
configuration_distil_greek_news_bert.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import DistilBertConfig
2
+
3
+ class DistilGreekNewsBertConfig(DistilBertConfig):
4
+ model_type = "distil_greek_news_bert"
5
+
6
+ def __init__(
7
+ self,
8
+ num_labels_class: int = 19,
9
+ num_labels_ner: int = 32,
10
+ ner_loss_weight: float = 3.0,
11
+ **kwargs,
12
+ ):
13
+ super().__init__(**kwargs)
14
+ self.num_labels_class = num_labels_class
15
+ self.num_labels_ner = num_labels_ner
16
+ self.ner_loss_weight = ner_loss_weight
17
+
18
+ # tells AutoConfig where to import this class when trust_remote_code=True
19
+ DistilGreekNewsBertConfig.register_for_auto_class()
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf439c45367c9505c05681c979f9c7bcf2372227f54bffa69531f82d0c3e50e7
3
+ size 281739892
modeling_distil_greek_news_bert.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch.nn as nn
2
+ from transformers import DistilBertModel, DistilBertPreTrainedModel
3
+ from .configuration_distil_greek_news_bert import DistilGreekNewsBertConfig # ⬅️ relative
4
+
5
+ class DistilGreekNewsBert(DistilBertPreTrainedModel)
6
+ config_class = DistilGreekNewsBertConfig # critical link
7
+ _auto_class = AutoModel
8
+
9
+ def __init__(self, config)
10
+ super().__init__(config)
11
+
12
+ self.distilbert = DistilBertModel(config)
13
+
14
+ n_cls = config.num_labels_class
15
+ n_ner = config.num_labels_ner
16
+ self.ner_loss_weight = getattr(config, ner_loss_weight, 3.0)
17
+
18
+ self.class_dropout = nn.Dropout(0.3)
19
+ self.class_fc = nn.Linear(config.dim, 768)
20
+ self.class_relu = nn.ReLU()
21
+ self.classifier = nn.Linear(768, n_cls)
22
+
23
+ self.ner_classifier = nn.Linear(config.dim, n_ner)
24
+
25
+ self.initial_cls_loss = None
26
+ self.initial_ner_loss = None
27
+
28
+ self.post_init()
29
+
30
+ # forward identical to what you already wrote
31
+
32
+ def forward(
33
+ self,
34
+ input_ids,
35
+ attention_mask=None,
36
+ labels_class=None,
37
+ labels_ner=None,
38
+ ):
39
+ outputs = self.distilbert(
40
+ input_ids,
41
+ attention_mask=attention_mask,
42
+ return_dict=True,
43
+ )
44
+
45
+ sequence_output = outputs.last_hidden_state
46
+ cls_output = sequence_output[:, 0, :]
47
+
48
+ # ── Classification branch ─────────────
49
+ cls_output = self.class_dropout(cls_output)
50
+ cls_features = self.class_fc(cls_output)
51
+ cls_features = self.class_relu(cls_features)
52
+ logits_class = self.classifier(cls_features)
53
+
54
+ # ── NER branch ────────────────────────
55
+ logits_ner = self.ner_classifier(sequence_output)
56
+
57
+ if labels_class is None or labels_ner is None:
58
+ return logits_class, logits_ner
59
+
60
+ # β€” Classification loss
61
+ loss_cls = nn.CrossEntropyLoss()(logits_class, labels_class)
62
+
63
+ # β€” NER loss: summed, averaged over non-pad tokens
64
+ ner_loss_sum = nn.CrossEntropyLoss(ignore_index=-100, reduction='sum')(
65
+ logits_ner.view(-1, logits_ner.size(-1)),
66
+ labels_ner.view(-1)
67
+ )
68
+ mask = (labels_ner != -100).view(-1).float()
69
+ loss_ner = ner_loss_sum / (mask.sum() + 1e-9)
70
+
71
+ # β€” Dynamic normalization: store initial values
72
+ if self.initial_cls_loss is None and self.training:
73
+ self.initial_cls_loss = loss_cls.item()
74
+ if self.initial_ner_loss is None and self.training:
75
+ self.initial_ner_loss = loss_ner.item()
76
+
77
+ # β€” Normalize losses
78
+ if (self.initial_cls_loss is not None) and (self.initial_ner_loss is not None):
79
+ norm_cls_loss = loss_cls / (self.initial_cls_loss + 1e-8)
80
+ norm_ner_loss = loss_ner / (self.initial_ner_loss + 1e-8)
81
+ else:
82
+ norm_cls_loss = loss_cls
83
+ norm_ner_loss = loss_ner
84
+
85
+ # β€” Combine with weighting
86
+ loss = norm_cls_loss + self.ner_loss_weight * norm_ner_loss
87
+ return loss, logits_class, logits_ner
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a3087de3f6bc9d9199d91ea74b310bed629b1fe2a75e9646e43cacdb99d48f8
3
+ size 5304