|
|
import torch |
|
|
import torch.nn as nn |
|
|
from transformers import BertForTokenClassification |
|
|
|
|
|
|
|
|
class FiveOClassifier(nn.Module): |
|
|
def __init__(self, clf_hidden_size, clf_num_labels): |
|
|
super(FiveOClassifier, self).__init__() |
|
|
self.dense1 = nn.Linear(clf_hidden_size, clf_hidden_size // 2) |
|
|
self.activation1 = nn.ReLU() |
|
|
self.dropout1 = nn.Dropout(p=0.1) |
|
|
self.dense2 = nn.Linear(clf_hidden_size // 2, clf_hidden_size // 4) |
|
|
self.activation2 = nn.ReLU() |
|
|
self.dropout2 = nn.Dropout(p=0.1) |
|
|
self.output_layer = nn.Linear(clf_hidden_size // 4, clf_num_labels) |
|
|
|
|
|
def forward(self, clf_input): |
|
|
x = self.dense1(clf_input) |
|
|
x = self.activation1(x) |
|
|
x = self.dropout1(x) |
|
|
x = self.dense2(x) |
|
|
x = self.activation2(x) |
|
|
x = self.dropout2(x) |
|
|
x = self.output_layer(x) |
|
|
return x |
|
|
|
|
|
|
|
|
class BertForTokenClassificationWithFiveO(BertForTokenClassification): |
|
|
def __init__(self, config): |
|
|
super().__init__(config) |
|
|
self.num_labels = config.num_labels |
|
|
self.classifier = FiveOClassifier(config.hidden_size, config.num_labels) |
|
|
self.init_weights() |
|
|
|
|
|
@classmethod |
|
|
def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs): |
|
|
model = super().from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs) |
|
|
model.check_classifier_initialization() |
|
|
return model |
|
|
|
|
|
def check_classifier_initialization(self): |
|
|
|
|
|
def is_randomly_initialized(tensor): |
|
|
return torch.abs(tensor.mean()) < 1e-3 < tensor.std() < 1e-1 |
|
|
|
|
|
classifier_weights = [ |
|
|
self.classifier.dense1.weight, |
|
|
self.classifier.dense1.bias, |
|
|
self.classifier.dense2.weight, |
|
|
self.classifier.dense2.bias, |
|
|
self.classifier.output_layer.weight, |
|
|
self.classifier.output_layer.bias |
|
|
] |
|
|
|
|
|
|
|
|
def freeze_bert(self): |
|
|
"""Freezes the BERT layers to prevent their parameters from being updated during training.""" |
|
|
for param in self.bert.parameters(): |
|
|
param.requires_grad = False |
|
|
print("BERT layers frozen.") |
|
|
|
|
|
def unfreeze_bert(self): |
|
|
"""Unfreezes the BERT layers to allow their parameters to be updated during training.""" |
|
|
for param in self.bert.parameters(): |
|
|
param.requires_grad = True |
|
|
print("BERT layers unfrozen.") |
|
|
|