ADD EmCoder V1.5

Browse files

Files changed (9) hide show

config.json +77 -0
configuration_emcoder.py +34 -0
model.safetensors +3 -0
modeling_emcoder.py +142 -0
thresholds.json +114 -0
tokenizer.json +0 -0
tokenizer_config.json +0 -0
train_config.json +11 -0
train_state.json +4 -0

config.json ADDED Viewed

	@@ -0,0 +1,77 @@

+{
+  "model_type": "emcoder",
+  "auto_map": {
+    "AutoConfig": "configuration_emcoder.EmCoderConfig",
+    "AutoModel": "modeling_emcoder.EmCoder"
+  },
+  "architectures": ["EmCoder"],
+  "vocab_size": 50265,
+  "max_seq_len": 512,
+  "d_model": 768,
+  "n_head": 12,
+  "n_layers": 6,
+  "d_ffn": 3072,
+  "dropout": 0.1,
+  "num_labels": 28,
+  "id2label": {
+    "0": "admiration",
+    "1": "amusement",
+    "2": "anger",
+    "3": "annoyance",
+    "4": "approval",
+    "5": "caring",
+    "6": "confusion",
+    "7": "curiosity",
+    "8": "desire",
+    "9": "disappointment",
+    "10": "disapproval",
+    "11": "disgust",
+    "12": "embarrassment",
+    "13": "excitement",
+    "14": "fear",
+    "15": "gratitude",
+    "16": "grief",
+    "17": "joy",
+    "18": "love",
+    "19": "nervousness",
+    "20": "optimism",
+    "21": "pride",
+    "22": "realization",
+    "23": "relief",
+    "24": "remorse",
+    "25": "sadness",
+    "26": "surprise",
+    "27": "neutral"
+  },
+  "label2id": {
+    "admiration": 0,
+    "amusement": 1,
+    "anger": 2,
+    "annoyance": 3,
+    "approval": 4,
+    "caring": 5,
+    "confusion": 6,
+    "curiosity": 7,
+    "desire": 8,
+    "disappointment": 9,
+    "disapproval": 10,
+    "disgust": 11,
+    "embarrassment": 12,
+    "excitement": 13,
+    "fear": 14,
+    "gratitude": 15,
+    "grief": 16,
+    "joy": 17,
+    "love": 18,
+    "nervousness": 19,
+    "optimism": 20,
+    "pride": 21,
+    "realization": 22,
+    "relief": 23,
+    "remorse": 24,
+    "sadness": 25,
+    "surprise": 26,
+    "neutral": 27
+  },
+  "base_encoder_path": "models/v1/pretrain/checkpoints/epoch_1/step_120000"
+}

configuration_emcoder.py ADDED Viewed

	@@ -0,0 +1,34 @@

+from transformers import PretrainedConfig
+class EmCoderConfig(PretrainedConfig):
+    model_type = "emcoder"
+    def __init__(
+        self,
+        vocab_size=50265,
+        max_seq_len=512,
+        d_model=768,
+        n_head=12,
+        n_layers=6,
+        d_ffn=3072,
+        dropout=0.1,
+        num_labels=28,
+        base_encoder_path="",
+        id2label=None,
+        label2id=None,
+        **kwargs,
+    ):
+        if id2label is not None:
+            id2label = {int(k): v for k, v in id2label.items()}
+        super().__init__(id2label=id2label, label2id=label2id, **kwargs)
+        self.vocab_size = vocab_size
+        self.max_seq_len = max_seq_len
+        self.d_model = d_model
+        self.n_head = n_head
+        self.n_layers = n_layers
+        self.d_ffn = d_ffn
+        self.dropout = dropout
+        self.num_labels = num_labels
+        self.base_encoder_path = base_encoder_path

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cdaf493f59fad028e70cf14d448aa3215ec08d8c6af5840e28fc3c1307648f42
+size 328565600

modeling_emcoder.py ADDED Viewed

	@@ -0,0 +1,142 @@

+import torch
+import torch.nn as nn
+from transformers import PreTrainedModel
+from .configuration_emcoder import EmCoderConfig
+class EmCoderCore(nn.Module):
+    """The core encoder architecture of EmCoder, without the classification head."""
+    def __init__(self, config: EmCoderConfig):
+        super().__init__()
+        self.token_embedding = nn.Embedding(config.vocab_size, config.d_model)
+        self.pos_embedding = nn.Embedding(config.max_seq_len, config.d_model)
+        self.embed_norm = nn.LayerNorm(config.d_model)
+        encoder_layer = nn.TransformerEncoderLayer(
+            d_model=config.d_model,
+            nhead=config.n_head,
+            dim_feedforward=config.d_ffn,
+            dropout=config.dropout,
+            activation="gelu",
+            norm_first=True,
+            batch_first=True,
+        )
+        self.encoder = nn.TransformerEncoder(
+            encoder_layer=encoder_layer, num_layers=config.n_layers
+        )
+        self.final_norm = nn.LayerNorm(config.d_model)
+        self.dropout = nn.Dropout(config.dropout)
+    def forward(self, x: torch.Tensor, mask: torch.Tensor) -> torch.Tensor:
+        """Standard forward pass through the encoder."""
+        seq_len = x.size(1)
+        pos_ids = torch.arange(seq_len, device=x.device).unsqueeze(0)
+        x = self.token_embedding(x) + self.pos_embedding(pos_ids)
+        x = self.embed_norm(x)
+        x = self.dropout(x)
+        padding_mask = mask == 0
+        encoded = self.encoder(x, src_key_padding_mask=padding_mask)
+        return self.final_norm(encoded)
+class EmCoder(PreTrainedModel):
+    """The full EmCoder model, including the classification head."""
+    config_class = EmCoderConfig
+    def __init__(self, config: EmCoderConfig):
+        super().__init__(config)
+        self.encoder = EmCoderCore(config)
+        self.classifier = nn.Sequential(
+            nn.Linear(config.d_model, config.d_model),
+            nn.GELU(),
+            nn.Dropout(config.dropout),
+            nn.Linear(config.d_model, config.num_labels),
+        )
+        self.post_init()
+    def _set_mc_dropout(self, active: bool = True):
+        for m in self.modules():
+            if isinstance(m, nn.Dropout) or isinstance(m, nn.MultiheadAttention):
+                m.train(active)
+    @staticmethod
+    def _masked_mean_pooling(
+        features: torch.Tensor, mask: torch.Tensor
+    ) -> torch.Tensor:
+        mask = mask.unsqueeze(-1)  # (B, S, 1)
+        masked_features = features * mask  # (B, S, D)
+        sum_masked_features = masked_features.sum(dim=1)  # (B, D)
+        count_tokens = torch.clamp(mask.sum(dim=1), min=1e-9)  # (B, 1)
+        return sum_masked_features / count_tokens  # (B, D)
+    def mc_forward(
+        self,
+        x: torch.Tensor,
+        mask: torch.Tensor,
+        n_samples: int,
+        max_batch_size: int | None = None,
+    ) -> torch.Tensor:
+        """
+        Performs Monte Carlo Dropout inference to quantify epistemic uncertainty.
+        Args:
+            x: Input token IDs of shape (B, S).
+            mask: Attention mask of shape (B, S).
+            n_samples: Total number of Monte Carlo samples.
+            max_batch_size: Maximum number of samples in one forward pass.
+        Returns:
+            Logits of shape (n_samples, B, num_labels).
+        """
+        if max_batch_size is None:
+            max_batch_size = n_samples
+        B, S = x.shape
+        num_labels = self.classifier[-1].out_features
+        all_logits = torch.empty((n_samples, B, num_labels), device=x.device)
+        is_training = self.training
+        self._set_mc_dropout(active=True)
+        try:
+            for i in range(0, n_samples, max_batch_size):
+                batch_samples = min(max_batch_size, n_samples - i)
+                x_stacked = x.repeat(batch_samples, 1)  # (batch_samples * B, S)
+                mask_stacked = mask.repeat(batch_samples, 1)  # (batch_samples * B, S)
+                features = self.encoder(
+                    x_stacked, mask_stacked
+                )  # (batch_samples * B, S, D)
+                pooled = self._masked_mean_pooling(features, mask_stacked)
+                logits = self.classifier(pooled)  # (n_samples * B, num_labels)
+                all_logits[i : i + batch_samples] = logits.view(batch_samples, B, -1)
+        finally:
+            self._set_mc_dropout(active=is_training)
+        return all_logits
+    def forward(self, x: torch.Tensor, mask: torch.Tensor) -> torch.Tensor:
+        """Standard forward pass without MC Dropout."""
+        features = self.encoder(x, mask)
+        pooled = self._masked_mean_pooling(features, mask)
+        return self.classifier(pooled)

thresholds.json ADDED Viewed

	@@ -0,0 +1,114 @@

+{
+    "admiration": {
+        "p": 0.6714285714285715,
+        "f1": 0.6646403242147924
+    },
+    "amusement": {
+        "p": 0.6714285714285715,
+        "f1": 0.7877862595419848
+    },
+    "anger": {
+        "p": 0.5571428571428572,
+        "f1": 0.43231441048034935
+    },
+    "annoyance": {
+        "p": 0.3857142857142858,
+        "f1": 0.32748538011695905
+    },
+    "approval": {
+        "p": 0.3285714285714286,
+        "f1": 0.30103480714957664
+    },
+    "caring": {
+        "p": 0.6714285714285715,
+        "f1": 0.33440514469453375
+    },
+    "confusion": {
+        "p": 0.6714285714285715,
+        "f1": 0.3940520446096654
+    },
+    "curiosity": {
+        "p": 0.5571428571428572,
+        "f1": 0.5225225225225225
+    },
+    "desire": {
+        "p": 0.7285714285714286,
+        "f1": 0.5228758169934641
+    },
+    "disappointment": {
+        "p": 0.5571428571428572,
+        "f1": 0.2638888888888889
+    },
+    "disapproval": {
+        "p": 0.3857142857142858,
+        "f1": 0.3365617433414044
+    },
+    "disgust": {
+        "p": 0.6714285714285715,
+        "f1": 0.44680851063829785
+    },
+    "embarrassment": {
+        "p": 0.8428571428571429,
+        "f1": 0.5454545454545454
+    },
+    "excitement": {
+        "p": 0.6714285714285715,
+        "f1": 0.29411764705882354
+    },
+    "fear": {
+        "p": 0.7857142857142857,
+        "f1": 0.5365853658536586
+    },
+    "gratitude": {
+        "p": 0.8428571428571429,
+        "f1": 0.9135446685878963
+    },
+    "grief": {
+        "p": 0.5571428571428572,
+        "f1": 0.4166666666666667
+    },
+    "joy": {
+        "p": 0.7857142857142857,
+        "f1": 0.5679012345679012
+    },
+    "love": {
+        "p": 0.7857142857142857,
+        "f1": 0.7805755395683454
+    },
+    "nervousness": {
+        "p": 0.6714285714285715,
+        "f1": 0.4
+    },
+    "optimism": {
+        "p": 0.6714285714285715,
+        "f1": 0.5983827493261455
+    },
+    "pride": {
+        "p": 0.6714285714285715,
+        "f1": 0.6666666666666666
+    },
+    "realization": {
+        "p": 0.5571428571428572,
+        "f1": 0.24390243902439024
+    },
+    "relief": {
+        "p": 0.7285714285714286,
+        "f1": 0.24
+    },
+    "remorse": {
+        "p": 0.7857142857142857,
+        "f1": 0.7682119205298014
+    },
+    "sadness": {
+        "p": 0.6142857142857143,
+        "f1": 0.4875
+    },
+    "surprise": {
+        "p": 0.6714285714285715,
+        "f1": 0.5092250922509225
+    },
+    "neutral": {
+        "p": 0.3285714285714286,
+        "f1": 0.6542099192618224
+    }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff

train_config.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+    "n_samples": 30,
+    "tokenized_ds_dir": "data/goemotions_v1_seq512",
+    "encoder_lr": 0.00001,
+    "head_lr": 0.0005,
+    "lr_warmup": 0.05,
+    "weight_decay": 0.01,
+    "batch_size": 8,
+    "gradient_accumulation_steps": 8,
+    "num_epochs": 10
+}

train_state.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+    "train_loss": 0.1895649628543834,
+    "eval_loss": 0.2377220498005666
+}