Relu-Norm / modeling_custom.py
matin-ebrahimkhani's picture
Upload the model
f5e403b verified
import torch
import torch.nn as nn
from transformers import BertForTokenClassification
class FourOClassifier(nn.Module):
def __init__(self, clf_hidden_size, num_labels):
super(FourOClassifier, self).__init__()
self.dense = nn.Linear(clf_hidden_size, clf_hidden_size)
self.activation = nn.ReLU()
self.dropout = nn.Dropout(p=0.1)
self.batch_norm = nn.BatchNorm1d(clf_hidden_size)
self.output_layer = nn.Linear(clf_hidden_size, num_labels)
def forward(self, clf_input):
x = self.dense(clf_input)
x = self.activation(x)
x = self.dropout(x)
x = self.batch_norm(x.permute(0, 2, 1)).permute(0, 2, 1) # BatchNorm1d expects (N, C, L)
x = self.output_layer(x)
return x
class BertForTokenClassificationWithFourO(BertForTokenClassification):
def __init__(self, config):
super().__init__(config)
self.num_labels = config.num_labels
self.classifier = FourOClassifier(config.hidden_size, config.num_labels)
self.init_weights()
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
model = super().from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs)
model.check_classifier_initialization()
return model
def check_classifier_initialization(self):
# Check if classifier weights seem to be randomly initialized
def is_randomly_initialized(tensor):
return torch.abs(tensor.mean()) < 1e-3 < tensor.std() < 1e-1
classifier_weights = [
self.classifier.dense.weight,
self.classifier.dense.bias,
self.classifier.output_layer.weight,
self.classifier.output_layer.bias
]
def freeze_bert(self):
"""Freezes the BERT layers to prevent their parameters from being updated during training."""
for param in self.bert.parameters():
param.requires_grad = False
print("BERT layers frozen.")
def unfreeze_bert(self):
"""Unfreezes the BERT layers to allow their parameters to be updated during training."""
for param in self.bert.parameters():
param.requires_grad = True
print("BERT layers unfrozen.")