Upload folder using huggingface_hub
Browse files
__init__.py
ADDED
|
File without changes
|
__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (171 Bytes). View file
|
|
|
__pycache__/modeling_helmbert.cpython-312.pyc
CHANGED
|
Binary files a/__pycache__/modeling_helmbert.cpython-312.pyc and b/__pycache__/modeling_helmbert.cpython-312.pyc differ
|
|
|
modeling_helmbert.py
CHANGED
|
@@ -7,17 +7,14 @@ This module implements the HELM-BERT model with:
|
|
| 7 |
"""
|
| 8 |
|
| 9 |
import math
|
| 10 |
-
from dataclasses import dataclass
|
| 11 |
from typing import Any, Dict, Optional, Tuple, Union
|
| 12 |
|
| 13 |
import torch
|
| 14 |
import torch.nn as nn
|
| 15 |
-
import torch.nn.functional as F
|
| 16 |
from packaging import version
|
| 17 |
from torch import _softmax_backward_data
|
| 18 |
from transformers import PreTrainedModel
|
| 19 |
from transformers.modeling_outputs import (
|
| 20 |
-
BaseModelOutput,
|
| 21 |
BaseModelOutputWithPooling,
|
| 22 |
MaskedLMOutput,
|
| 23 |
SequenceClassifierOutput,
|
|
@@ -44,7 +41,7 @@ def masked_layer_norm(
|
|
| 44 |
Returns:
|
| 45 |
Normalized tensor with padding positions zeroed out
|
| 46 |
"""
|
| 47 |
-
output = layer_norm(x).to(x)
|
| 48 |
if mask is None:
|
| 49 |
return output
|
| 50 |
if mask.dim() != x.dim():
|
|
@@ -777,13 +774,15 @@ class HELMBertModel(HELMBertPreTrainedModel):
|
|
| 777 |
|
| 778 |
|
| 779 |
class HELMBertLMHead(nn.Module):
|
| 780 |
-
"""MLM head with weight tying."""
|
| 781 |
|
| 782 |
def __init__(self, config: HELMBertConfig):
|
| 783 |
super().__init__()
|
| 784 |
self.dense = nn.Linear(config.hidden_size, config.hidden_size)
|
| 785 |
self.layer_norm = nn.LayerNorm(config.hidden_size)
|
| 786 |
self.activation = nn.GELU()
|
|
|
|
|
|
|
| 787 |
self.decoder = nn.Linear(config.hidden_size, config.vocab_size, bias=True)
|
| 788 |
|
| 789 |
def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
|
|
@@ -814,7 +813,7 @@ class HELMBertForMaskedLM(HELMBertPreTrainedModel):
|
|
| 814 |
>>> predictions = outputs.logits.argmax(dim=-1)
|
| 815 |
"""
|
| 816 |
|
| 817 |
-
_tied_weights_keys = ["lm_head.decoder.weight"
|
| 818 |
|
| 819 |
def __init__(self, config: HELMBertConfig):
|
| 820 |
super().__init__(config)
|
|
@@ -829,6 +828,9 @@ class HELMBertForMaskedLM(HELMBertPreTrainedModel):
|
|
| 829 |
def set_output_embeddings(self, new_embeddings: nn.Linear) -> None:
|
| 830 |
self.lm_head.decoder = new_embeddings
|
| 831 |
|
|
|
|
|
|
|
|
|
|
| 832 |
def forward(
|
| 833 |
self,
|
| 834 |
input_ids: torch.Tensor,
|
|
@@ -878,7 +880,7 @@ class HELMBertForMaskedLM(HELMBertPreTrainedModel):
|
|
| 878 |
hidden_states = encoder_outputs[2]
|
| 879 |
attentions = encoder_outputs[3]
|
| 880 |
|
| 881 |
-
# MLM head
|
| 882 |
prediction_scores = self.lm_head(sequence_output)
|
| 883 |
|
| 884 |
# Calculate loss if labels provided
|
|
|
|
| 7 |
"""
|
| 8 |
|
| 9 |
import math
|
|
|
|
| 10 |
from typing import Any, Dict, Optional, Tuple, Union
|
| 11 |
|
| 12 |
import torch
|
| 13 |
import torch.nn as nn
|
|
|
|
| 14 |
from packaging import version
|
| 15 |
from torch import _softmax_backward_data
|
| 16 |
from transformers import PreTrainedModel
|
| 17 |
from transformers.modeling_outputs import (
|
|
|
|
| 18 |
BaseModelOutputWithPooling,
|
| 19 |
MaskedLMOutput,
|
| 20 |
SequenceClassifierOutput,
|
|
|
|
| 41 |
Returns:
|
| 42 |
Normalized tensor with padding positions zeroed out
|
| 43 |
"""
|
| 44 |
+
output = layer_norm(x).to(x.dtype)
|
| 45 |
if mask is None:
|
| 46 |
return output
|
| 47 |
if mask.dim() != x.dim():
|
|
|
|
| 774 |
|
| 775 |
|
| 776 |
class HELMBertLMHead(nn.Module):
|
| 777 |
+
"""MLM head with weight tying (HuggingFace standard)."""
|
| 778 |
|
| 779 |
def __init__(self, config: HELMBertConfig):
|
| 780 |
super().__init__()
|
| 781 |
self.dense = nn.Linear(config.hidden_size, config.hidden_size)
|
| 782 |
self.layer_norm = nn.LayerNorm(config.hidden_size)
|
| 783 |
self.activation = nn.GELU()
|
| 784 |
+
|
| 785 |
+
# Decoder with weight tying (weight tied to embedding, bias is separate)
|
| 786 |
self.decoder = nn.Linear(config.hidden_size, config.vocab_size, bias=True)
|
| 787 |
|
| 788 |
def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
|
|
|
|
| 813 |
>>> predictions = outputs.logits.argmax(dim=-1)
|
| 814 |
"""
|
| 815 |
|
| 816 |
+
_tied_weights_keys = ["lm_head.decoder.weight"]
|
| 817 |
|
| 818 |
def __init__(self, config: HELMBertConfig):
|
| 819 |
super().__init__(config)
|
|
|
|
| 828 |
def set_output_embeddings(self, new_embeddings: nn.Linear) -> None:
|
| 829 |
self.lm_head.decoder = new_embeddings
|
| 830 |
|
| 831 |
+
def get_input_embeddings(self) -> nn.Embedding:
|
| 832 |
+
return self.helmbert.embeddings.word_embeddings
|
| 833 |
+
|
| 834 |
def forward(
|
| 835 |
self,
|
| 836 |
input_ids: torch.Tensor,
|
|
|
|
| 880 |
hidden_states = encoder_outputs[2]
|
| 881 |
attentions = encoder_outputs[3]
|
| 882 |
|
| 883 |
+
# MLM head (weight tying handled by HuggingFace)
|
| 884 |
prediction_scores = self.lm_head(sequence_output)
|
| 885 |
|
| 886 |
# Calculate loss if labels provided
|