Upload folder using huggingface_hub

Browse files

Files changed (4) hide show

__init__.py +0 -0
__pycache__/__init__.cpython-312.pyc +0 -0
__pycache__/modeling_helmbert.cpython-312.pyc +0 -0
modeling_helmbert.py +9 -7

__init__.py ADDED Viewed

File without changes

__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (171 Bytes). View file

__pycache__/modeling_helmbert.cpython-312.pyc CHANGED Viewed

Binary files a/__pycache__/modeling_helmbert.cpython-312.pyc and b/__pycache__/modeling_helmbert.cpython-312.pyc differ

modeling_helmbert.py CHANGED Viewed

@@ -7,17 +7,14 @@ This module implements the HELM-BERT model with:
 """
 import math
-from dataclasses import dataclass
 from typing import Any, Dict, Optional, Tuple, Union
 import torch
 import torch.nn as nn
-import torch.nn.functional as F
 from packaging import version
 from torch import _softmax_backward_data
 from transformers import PreTrainedModel
 from transformers.modeling_outputs import (
-    BaseModelOutput,
     BaseModelOutputWithPooling,
     MaskedLMOutput,
     SequenceClassifierOutput,
@@ -44,7 +41,7 @@ def masked_layer_norm(
     Returns:
         Normalized tensor with padding positions zeroed out
     """
-    output = layer_norm(x).to(x)
     if mask is None:
         return output
     if mask.dim() != x.dim():
@@ -777,13 +774,15 @@ class HELMBertModel(HELMBertPreTrainedModel):
 class HELMBertLMHead(nn.Module):
-    """MLM head with weight tying."""
     def __init__(self, config: HELMBertConfig):
         super().__init__()
         self.dense = nn.Linear(config.hidden_size, config.hidden_size)
         self.layer_norm = nn.LayerNorm(config.hidden_size)
         self.activation = nn.GELU()
         self.decoder = nn.Linear(config.hidden_size, config.vocab_size, bias=True)
     def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
@@ -814,7 +813,7 @@ class HELMBertForMaskedLM(HELMBertPreTrainedModel):
         >>> predictions = outputs.logits.argmax(dim=-1)
     """
-    _tied_weights_keys = ["lm_head.decoder.weight", "lm_head.decoder.bias"]
     def __init__(self, config: HELMBertConfig):
         super().__init__(config)
@@ -829,6 +828,9 @@ class HELMBertForMaskedLM(HELMBertPreTrainedModel):
     def set_output_embeddings(self, new_embeddings: nn.Linear) -> None:
         self.lm_head.decoder = new_embeddings
     def forward(
         self,
         input_ids: torch.Tensor,
@@ -878,7 +880,7 @@ class HELMBertForMaskedLM(HELMBertPreTrainedModel):
         hidden_states = encoder_outputs[2]
         attentions = encoder_outputs[3]
-        # MLM head
         prediction_scores = self.lm_head(sequence_output)
         # Calculate loss if labels provided

 """
 import math
 from typing import Any, Dict, Optional, Tuple, Union
 import torch
 import torch.nn as nn
 from packaging import version
 from torch import _softmax_backward_data
 from transformers import PreTrainedModel
 from transformers.modeling_outputs import (
     BaseModelOutputWithPooling,
     MaskedLMOutput,
     SequenceClassifierOutput,
     Returns:
         Normalized tensor with padding positions zeroed out
     """
+    output = layer_norm(x).to(x.dtype)
     if mask is None:
         return output
     if mask.dim() != x.dim():
 class HELMBertLMHead(nn.Module):
+    """MLM head with weight tying (HuggingFace standard)."""
     def __init__(self, config: HELMBertConfig):
         super().__init__()
         self.dense = nn.Linear(config.hidden_size, config.hidden_size)
         self.layer_norm = nn.LayerNorm(config.hidden_size)
         self.activation = nn.GELU()
+        # Decoder with weight tying (weight tied to embedding, bias is separate)
         self.decoder = nn.Linear(config.hidden_size, config.vocab_size, bias=True)
     def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
         >>> predictions = outputs.logits.argmax(dim=-1)
     """
+    _tied_weights_keys = ["lm_head.decoder.weight"]
     def __init__(self, config: HELMBertConfig):
         super().__init__(config)
     def set_output_embeddings(self, new_embeddings: nn.Linear) -> None:
         self.lm_head.decoder = new_embeddings
+    def get_input_embeddings(self) -> nn.Embedding:
+        return self.helmbert.embeddings.word_embeddings
     def forward(
         self,
         input_ids: torch.Tensor,
         hidden_states = encoder_outputs[2]
         attentions = encoder_outputs[3]
+        # MLM head (weight tying handled by HuggingFace)
         prediction_scores = self.lm_head(sequence_output)
         # Calculate loss if labels provided