Fill-Mask
Transformers
PyTorch
Norwegian
Norwegian Bokmål
Norwegian Nynorsk
BERT
NorBERT
Norwegian
encoder
custom_code
Instructions to use ltg/norbert3-base with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use ltg/norbert3-base with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("fill-mask", model="ltg/norbert3-base", trust_remote_code=True)# Load model directly from transformers import AutoModelForMaskedLM model = AutoModelForMaskedLM.from_pretrained("ltg/norbert3-base", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
Fix compatibility with transformers v5
Browse files- modeling_norbert.py +9 -2
modeling_norbert.py
CHANGED
|
@@ -222,7 +222,7 @@ class NorbertPreTrainedModel(PreTrainedModel):
|
|
| 222 |
config_class = NorbertConfig
|
| 223 |
base_model_prefix = "norbert3"
|
| 224 |
supports_gradient_checkpointing = True
|
| 225 |
-
_tied_weights_keys =
|
| 226 |
|
| 227 |
def _set_gradient_checkpointing(self, module, value=False):
|
| 228 |
if isinstance(module, Encoder):
|
|
@@ -252,6 +252,8 @@ class NorbertModel(NorbertPreTrainedModel):
|
|
| 252 |
self.transformer = Encoder(config, activation_checkpointing=gradient_checkpointing)
|
| 253 |
self.classifier = MaskClassifier(config, self.embedding.word_embedding.weight) if add_mlm_layer else None
|
| 254 |
|
|
|
|
|
|
|
| 255 |
def get_input_embeddings(self):
|
| 256 |
return self.embedding.word_embedding
|
| 257 |
|
|
@@ -317,10 +319,11 @@ class NorbertModel(NorbertPreTrainedModel):
|
|
| 317 |
|
| 318 |
class NorbertForMaskedLM(NorbertModel):
|
| 319 |
_keys_to_ignore_on_load_unexpected = ["head"]
|
| 320 |
-
_tied_weights_keys =
|
| 321 |
|
| 322 |
def __init__(self, config, **kwargs):
|
| 323 |
super().__init__(config, add_mlm_layer=True, **kwargs)
|
|
|
|
| 324 |
|
| 325 |
def get_output_embeddings(self):
|
| 326 |
return self.classifier.nonlinearity[-1].weight
|
|
@@ -395,6 +398,7 @@ class NorbertForSequenceClassification(NorbertModel):
|
|
| 395 |
|
| 396 |
self.num_labels = config.num_labels
|
| 397 |
self.head = Classifier(config, self.num_labels)
|
|
|
|
| 398 |
|
| 399 |
def forward(
|
| 400 |
self,
|
|
@@ -460,6 +464,7 @@ class NorbertForTokenClassification(NorbertModel):
|
|
| 460 |
|
| 461 |
self.num_labels = config.num_labels
|
| 462 |
self.head = Classifier(config, self.num_labels)
|
|
|
|
| 463 |
|
| 464 |
def forward(
|
| 465 |
self,
|
|
@@ -507,6 +512,7 @@ class NorbertForQuestionAnswering(NorbertModel):
|
|
| 507 |
|
| 508 |
self.num_labels = config.num_labels
|
| 509 |
self.head = Classifier(config, self.num_labels)
|
|
|
|
| 510 |
|
| 511 |
def forward(
|
| 512 |
self,
|
|
@@ -574,6 +580,7 @@ class NorbertForMultipleChoice(NorbertModel):
|
|
| 574 |
|
| 575 |
self.num_labels = getattr(config, "num_labels", 2)
|
| 576 |
self.head = Classifier(config, self.num_labels)
|
|
|
|
| 577 |
|
| 578 |
def forward(
|
| 579 |
self,
|
|
|
|
| 222 |
config_class = NorbertConfig
|
| 223 |
base_model_prefix = "norbert3"
|
| 224 |
supports_gradient_checkpointing = True
|
| 225 |
+
_tied_weights_keys = {}
|
| 226 |
|
| 227 |
def _set_gradient_checkpointing(self, module, value=False):
|
| 228 |
if isinstance(module, Encoder):
|
|
|
|
| 252 |
self.transformer = Encoder(config, activation_checkpointing=gradient_checkpointing)
|
| 253 |
self.classifier = MaskClassifier(config, self.embedding.word_embedding.weight) if add_mlm_layer else None
|
| 254 |
|
| 255 |
+
self.post_init()
|
| 256 |
+
|
| 257 |
def get_input_embeddings(self):
|
| 258 |
return self.embedding.word_embedding
|
| 259 |
|
|
|
|
| 319 |
|
| 320 |
class NorbertForMaskedLM(NorbertModel):
|
| 321 |
_keys_to_ignore_on_load_unexpected = ["head"]
|
| 322 |
+
_tied_weights_keys = {"classifier.nonlinearity.5.weight": "embedding.word_embedding.weight"}
|
| 323 |
|
| 324 |
def __init__(self, config, **kwargs):
|
| 325 |
super().__init__(config, add_mlm_layer=True, **kwargs)
|
| 326 |
+
self.post_init()
|
| 327 |
|
| 328 |
def get_output_embeddings(self):
|
| 329 |
return self.classifier.nonlinearity[-1].weight
|
|
|
|
| 398 |
|
| 399 |
self.num_labels = config.num_labels
|
| 400 |
self.head = Classifier(config, self.num_labels)
|
| 401 |
+
self.post_init()
|
| 402 |
|
| 403 |
def forward(
|
| 404 |
self,
|
|
|
|
| 464 |
|
| 465 |
self.num_labels = config.num_labels
|
| 466 |
self.head = Classifier(config, self.num_labels)
|
| 467 |
+
self.post_init()
|
| 468 |
|
| 469 |
def forward(
|
| 470 |
self,
|
|
|
|
| 512 |
|
| 513 |
self.num_labels = config.num_labels
|
| 514 |
self.head = Classifier(config, self.num_labels)
|
| 515 |
+
self.post_init()
|
| 516 |
|
| 517 |
def forward(
|
| 518 |
self,
|
|
|
|
| 580 |
|
| 581 |
self.num_labels = getattr(config, "num_labels", 2)
|
| 582 |
self.head = Classifier(config, self.num_labels)
|
| 583 |
+
self.post_init()
|
| 584 |
|
| 585 |
def forward(
|
| 586 |
self,
|