File size: 2,478 Bytes
07b65ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import torch
import torch.nn as nn
from transformers import BertForTokenClassification


class FiveOClassifier(nn.Module):
    def __init__(self, clf_hidden_size, clf_num_labels):
        super(FiveOClassifier, self).__init__()
        self.dense1 = nn.Linear(clf_hidden_size, clf_hidden_size // 2)
        self.activation1 = nn.ReLU()
        self.dropout1 = nn.Dropout(p=0.1)
        self.dense2 = nn.Linear(clf_hidden_size // 2, clf_hidden_size // 4)
        self.activation2 = nn.ReLU()
        self.dropout2 = nn.Dropout(p=0.1)
        self.output_layer = nn.Linear(clf_hidden_size // 4, clf_num_labels)

    def forward(self, clf_input):
        x = self.dense1(clf_input)
        x = self.activation1(x)
        x = self.dropout1(x)
        x = self.dense2(x)
        x = self.activation2(x)
        x = self.dropout2(x)
        x = self.output_layer(x)
        return x


class BertForTokenClassificationWithFiveO(BertForTokenClassification):
    def __init__(self, config):
        super().__init__(config)
        self.num_labels = config.num_labels
        self.classifier = FiveOClassifier(config.hidden_size, config.num_labels)
        self.init_weights()

    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
        model = super().from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs)
        model.check_classifier_initialization()
        return model

    def check_classifier_initialization(self):
        # Check if classifier weights seem to be randomly initialized
        def is_randomly_initialized(tensor):
            return torch.abs(tensor.mean()) < 1e-3 < tensor.std() < 1e-1

        classifier_weights = [
            self.classifier.dense1.weight,
            self.classifier.dense1.bias,
            self.classifier.dense2.weight,
            self.classifier.dense2.bias,
            self.classifier.output_layer.weight,
            self.classifier.output_layer.bias
        ]


    def freeze_bert(self):
        """Freezes the BERT layers to prevent their parameters from being updated during training."""
        for param in self.bert.parameters():
            param.requires_grad = False
        print("BERT layers frozen.")

    def unfreeze_bert(self):
        """Unfreezes the BERT layers to allow their parameters to be updated during training."""
        for param in self.bert.parameters():
            param.requires_grad = True
        print("BERT layers unfrozen.")