yezdata
/

EmCoder

@@ -1,12 +1,12 @@
 import torch
 import torch.nn as nn
-from transformers import PreTrainedModel
 from .configuration_emcoder import EmCoderConfig
-class EmCoderCore(nn.Module):
-    """The core encoder architecture of EmCoder, without the classification head."""
     def __init__(self, config: EmCoderConfig):
         super().__init__()
@@ -55,7 +55,7 @@ class EmCoder(PreTrainedModel):
     def __init__(self, config: EmCoderConfig):
         super().__init__(config)
-        self.encoder = EmCoderCore(config)
         self.classifier = nn.Sequential(
             nn.Linear(config.d_model, config.d_model),
             nn.GELU(),
@@ -65,6 +65,21 @@ class EmCoder(PreTrainedModel):
         self.post_init()
     def _set_mc_dropout(self, active: bool = True):
         for m in self.modules():
@@ -84,10 +99,12 @@ class EmCoder(PreTrainedModel):
     def mc_forward(
         self,
-        x: torch.Tensor,
-        mask: torch.Tensor,
-        n_samples: int,
         max_batch_size: int | None = None,
     ) -> torch.Tensor:
         """
         Performs Monte Carlo Dropout inference to quantify epistemic uncertainty.
@@ -101,9 +118,16 @@ class EmCoder(PreTrainedModel):
         Returns:
             Logits of shape (n_samples, B, num_labels).
         """
         if max_batch_size is None:
             max_batch_size = n_samples
         B, S = x.shape
         num_labels = self.classifier[-1].out_features
@@ -134,9 +158,29 @@ class EmCoder(PreTrainedModel):
-    def forward(self, x: torch.Tensor, mask: torch.Tensor) -> torch.Tensor:
         """Standard forward pass without MC Dropout."""
         features = self.encoder(x, mask)
         pooled = self._masked_mean_pooling(features, mask)
         return self.classifier(pooled)

 import torch
 import torch.nn as nn
+from transformers import PreTrainedModel, AutoConfig, AutoModel
 from .configuration_emcoder import EmCoderConfig
+class EmCoderEncoder(nn.Module):
+    """The core encoder architecture of EmCoder Transformer."""
     def __init__(self, config: EmCoderConfig):
         super().__init__()
     def __init__(self, config: EmCoderConfig):
         super().__init__(config)
+        self.encoder = EmCoderEncoder(config)
         self.classifier = nn.Sequential(
             nn.Linear(config.d_model, config.d_model),
             nn.GELU(),
         self.post_init()
+    def _init_weights(self, module: nn.Module) -> None:
+        if isinstance(module, nn.Linear):
+            nn.init.trunc_normal_(module.weight, std=0.02)
+            if module.bias is not None:
+                nn.init.zeros_(module.bias)
+        elif isinstance(module, nn.Embedding):
+            nn.init.trunc_normal_(module.weight, std=0.02)
+            if hasattr(module, "padding_idx") and module.padding_idx is not None:
+                module.weight.data[module.padding_idx].zero_()
+        elif isinstance(module, nn.LayerNorm):
+            nn.init.ones_(module.weight)
+            nn.init.zeros_(module.bias)
     def _set_mc_dropout(self, active: bool = True):
         for m in self.modules():
     def mc_forward(
         self,
+        input_ids: torch.Tensor | None = None,
+        attention_mask: torch.Tensor | None = None,
+        n_samples: int = 10,
         max_batch_size: int | None = None,
+        return_dict: bool | None = None,
+        **kwargs,
     ) -> torch.Tensor:
         """
         Performs Monte Carlo Dropout inference to quantify epistemic uncertainty.
         Returns:
             Logits of shape (n_samples, B, num_labels).
         """
+        x = input_ids if input_ids is not None else kwargs.get("x")
+        mask = attention_mask if attention_mask is not None else kwargs.get("mask")
+        if x is None or mask is None:
+            raise ValueError("input_ids (x) and attention_mask (mask) must be provided")
         if max_batch_size is None:
             max_batch_size = n_samples
         B, S = x.shape
         num_labels = self.classifier[-1].out_features
+    def forward(
+        self,
+        input_ids: torch.Tensor | None = None,
+        attention_mask: torch.Tensor | None = None,
+        return_dict: bool | None = None,
+        **kwargs,
+    ) -> torch.Tensor:
         """Standard forward pass without MC Dropout."""
+        x = input_ids if input_ids is not None else kwargs.get("x")
+        mask = attention_mask if attention_mask is not None else kwargs.get("mask")
+        if x is None or mask is None:
+            raise ValueError("input_ids (x) and attention_mask (mask) must be provided")
         features = self.encoder(x, mask)
         pooled = self._masked_mean_pooling(features, mask)
         return self.classifier(pooled)
+try:
+    AutoConfig.register("emcoder", EmCoderConfig)
+    AutoModel.register(EmCoderConfig, EmCoder)
+except ValueError:
+    pass