Fill-Mask
Transformers
PyTorch
Norwegian
Norwegian Bokmål
Norwegian Nynorsk
BERT
NorBERT
Norwegian
encoder
custom_code
Instructions to use ltg/norbert3-base with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use ltg/norbert3-base with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("fill-mask", model="ltg/norbert3-base", trust_remote_code=True)# Load model directly from transformers import AutoModelForMaskedLM model = AutoModelForMaskedLM.from_pretrained("ltg/norbert3-base", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
Update modeling_norbert.py
Browse files- modeling_norbert.py +4 -7
modeling_norbert.py
CHANGED
|
@@ -231,15 +231,12 @@ class NorbertPreTrainedModel(PreTrainedModel):
|
|
| 231 |
def _init_weights(self, module):
|
| 232 |
std = math.sqrt(2.0 / (5.0 * self.hidden_size))
|
| 233 |
|
| 234 |
-
if isinstance(module, nn.Linear):
|
| 235 |
nn.init.trunc_normal_(module.weight.data, mean=0.0, std=std, a=-2*std, b=2*std)
|
| 236 |
-
|
| 237 |
-
module.bias.data.zero_()
|
| 238 |
-
elif isinstance(module, nn.Embedding):
|
| 239 |
-
nn.init.trunc_normal_(module.weight.data, mean=0.0, std=std, a=-2*std, b=2*std)
|
| 240 |
-
elif isinstance(module, nn.LayerNorm):
|
| 241 |
-
module.bias.data.zero_()
|
| 242 |
module.weight.data.fill_(1.0)
|
|
|
|
|
|
|
| 243 |
|
| 244 |
|
| 245 |
class NorbertModel(NorbertPreTrainedModel):
|
|
|
|
| 231 |
def _init_weights(self, module):
|
| 232 |
std = math.sqrt(2.0 / (5.0 * self.hidden_size))
|
| 233 |
|
| 234 |
+
if isinstance(module, nn.Linear) or isinstance(module, nn.Embedding):
|
| 235 |
nn.init.trunc_normal_(module.weight.data, mean=0.0, std=std, a=-2*std, b=2*std)
|
| 236 |
+
elif isinstance(module, nn.LayerNorm) and module.weight is not None:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 237 |
module.weight.data.fill_(1.0)
|
| 238 |
+
if module.bias is not None:
|
| 239 |
+
module.bias.data.zero_()
|
| 240 |
|
| 241 |
|
| 242 |
class NorbertModel(NorbertPreTrainedModel):
|