Flansma commited on
Commit
c1a5361
·
verified ·
1 Parent(s): 0aaf6b3

Upload folder using huggingface_hub

Browse files
__init__.py ADDED
File without changes
__pycache__/__init__.cpython-312.pyc ADDED
Binary file (171 Bytes). View file
 
__pycache__/modeling_helmbert.cpython-312.pyc CHANGED
Binary files a/__pycache__/modeling_helmbert.cpython-312.pyc and b/__pycache__/modeling_helmbert.cpython-312.pyc differ
 
modeling_helmbert.py CHANGED
@@ -7,17 +7,14 @@ This module implements the HELM-BERT model with:
7
  """
8
 
9
  import math
10
- from dataclasses import dataclass
11
  from typing import Any, Dict, Optional, Tuple, Union
12
 
13
  import torch
14
  import torch.nn as nn
15
- import torch.nn.functional as F
16
  from packaging import version
17
  from torch import _softmax_backward_data
18
  from transformers import PreTrainedModel
19
  from transformers.modeling_outputs import (
20
- BaseModelOutput,
21
  BaseModelOutputWithPooling,
22
  MaskedLMOutput,
23
  SequenceClassifierOutput,
@@ -44,7 +41,7 @@ def masked_layer_norm(
44
  Returns:
45
  Normalized tensor with padding positions zeroed out
46
  """
47
- output = layer_norm(x).to(x)
48
  if mask is None:
49
  return output
50
  if mask.dim() != x.dim():
@@ -777,13 +774,15 @@ class HELMBertModel(HELMBertPreTrainedModel):
777
 
778
 
779
  class HELMBertLMHead(nn.Module):
780
- """MLM head with weight tying."""
781
 
782
  def __init__(self, config: HELMBertConfig):
783
  super().__init__()
784
  self.dense = nn.Linear(config.hidden_size, config.hidden_size)
785
  self.layer_norm = nn.LayerNorm(config.hidden_size)
786
  self.activation = nn.GELU()
 
 
787
  self.decoder = nn.Linear(config.hidden_size, config.vocab_size, bias=True)
788
 
789
  def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
@@ -814,7 +813,7 @@ class HELMBertForMaskedLM(HELMBertPreTrainedModel):
814
  >>> predictions = outputs.logits.argmax(dim=-1)
815
  """
816
 
817
- _tied_weights_keys = ["lm_head.decoder.weight", "lm_head.decoder.bias"]
818
 
819
  def __init__(self, config: HELMBertConfig):
820
  super().__init__(config)
@@ -829,6 +828,9 @@ class HELMBertForMaskedLM(HELMBertPreTrainedModel):
829
  def set_output_embeddings(self, new_embeddings: nn.Linear) -> None:
830
  self.lm_head.decoder = new_embeddings
831
 
 
 
 
832
  def forward(
833
  self,
834
  input_ids: torch.Tensor,
@@ -878,7 +880,7 @@ class HELMBertForMaskedLM(HELMBertPreTrainedModel):
878
  hidden_states = encoder_outputs[2]
879
  attentions = encoder_outputs[3]
880
 
881
- # MLM head
882
  prediction_scores = self.lm_head(sequence_output)
883
 
884
  # Calculate loss if labels provided
 
7
  """
8
 
9
  import math
 
10
  from typing import Any, Dict, Optional, Tuple, Union
11
 
12
  import torch
13
  import torch.nn as nn
 
14
  from packaging import version
15
  from torch import _softmax_backward_data
16
  from transformers import PreTrainedModel
17
  from transformers.modeling_outputs import (
 
18
  BaseModelOutputWithPooling,
19
  MaskedLMOutput,
20
  SequenceClassifierOutput,
 
41
  Returns:
42
  Normalized tensor with padding positions zeroed out
43
  """
44
+ output = layer_norm(x).to(x.dtype)
45
  if mask is None:
46
  return output
47
  if mask.dim() != x.dim():
 
774
 
775
 
776
  class HELMBertLMHead(nn.Module):
777
+ """MLM head with weight tying (HuggingFace standard)."""
778
 
779
  def __init__(self, config: HELMBertConfig):
780
  super().__init__()
781
  self.dense = nn.Linear(config.hidden_size, config.hidden_size)
782
  self.layer_norm = nn.LayerNorm(config.hidden_size)
783
  self.activation = nn.GELU()
784
+
785
+ # Decoder with weight tying (weight tied to embedding, bias is separate)
786
  self.decoder = nn.Linear(config.hidden_size, config.vocab_size, bias=True)
787
 
788
  def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
 
813
  >>> predictions = outputs.logits.argmax(dim=-1)
814
  """
815
 
816
+ _tied_weights_keys = ["lm_head.decoder.weight"]
817
 
818
  def __init__(self, config: HELMBertConfig):
819
  super().__init__(config)
 
828
  def set_output_embeddings(self, new_embeddings: nn.Linear) -> None:
829
  self.lm_head.decoder = new_embeddings
830
 
831
+ def get_input_embeddings(self) -> nn.Embedding:
832
+ return self.helmbert.embeddings.word_embeddings
833
+
834
  def forward(
835
  self,
836
  input_ids: torch.Tensor,
 
880
  hidden_states = encoder_outputs[2]
881
  attentions = encoder_outputs[3]
882
 
883
+ # MLM head (weight tying handled by HuggingFace)
884
  prediction_scores = self.lm_head(sequence_output)
885
 
886
  # Calculate loss if labels provided