yezdata commited on
Commit
326e148
·
verified ·
1 Parent(s): 0f90b49

ADD EmCoder V1.5

Browse files
config.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "emcoder",
3
+ "auto_map": {
4
+ "AutoConfig": "configuration_emcoder.EmCoderConfig",
5
+ "AutoModel": "modeling_emcoder.EmCoder"
6
+ },
7
+ "architectures": ["EmCoder"],
8
+ "vocab_size": 50265,
9
+ "max_seq_len": 512,
10
+ "d_model": 768,
11
+ "n_head": 12,
12
+ "n_layers": 6,
13
+ "d_ffn": 3072,
14
+ "dropout": 0.1,
15
+ "num_labels": 28,
16
+ "id2label": {
17
+ "0": "admiration",
18
+ "1": "amusement",
19
+ "2": "anger",
20
+ "3": "annoyance",
21
+ "4": "approval",
22
+ "5": "caring",
23
+ "6": "confusion",
24
+ "7": "curiosity",
25
+ "8": "desire",
26
+ "9": "disappointment",
27
+ "10": "disapproval",
28
+ "11": "disgust",
29
+ "12": "embarrassment",
30
+ "13": "excitement",
31
+ "14": "fear",
32
+ "15": "gratitude",
33
+ "16": "grief",
34
+ "17": "joy",
35
+ "18": "love",
36
+ "19": "nervousness",
37
+ "20": "optimism",
38
+ "21": "pride",
39
+ "22": "realization",
40
+ "23": "relief",
41
+ "24": "remorse",
42
+ "25": "sadness",
43
+ "26": "surprise",
44
+ "27": "neutral"
45
+ },
46
+ "label2id": {
47
+ "admiration": 0,
48
+ "amusement": 1,
49
+ "anger": 2,
50
+ "annoyance": 3,
51
+ "approval": 4,
52
+ "caring": 5,
53
+ "confusion": 6,
54
+ "curiosity": 7,
55
+ "desire": 8,
56
+ "disappointment": 9,
57
+ "disapproval": 10,
58
+ "disgust": 11,
59
+ "embarrassment": 12,
60
+ "excitement": 13,
61
+ "fear": 14,
62
+ "gratitude": 15,
63
+ "grief": 16,
64
+ "joy": 17,
65
+ "love": 18,
66
+ "nervousness": 19,
67
+ "optimism": 20,
68
+ "pride": 21,
69
+ "realization": 22,
70
+ "relief": 23,
71
+ "remorse": 24,
72
+ "sadness": 25,
73
+ "surprise": 26,
74
+ "neutral": 27
75
+ },
76
+ "base_encoder_path": "models/v1/pretrain/checkpoints/epoch_1/step_120000"
77
+ }
configuration_emcoder.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import PretrainedConfig
2
+
3
+
4
+ class EmCoderConfig(PretrainedConfig):
5
+ model_type = "emcoder"
6
+
7
+ def __init__(
8
+ self,
9
+ vocab_size=50265,
10
+ max_seq_len=512,
11
+ d_model=768,
12
+ n_head=12,
13
+ n_layers=6,
14
+ d_ffn=3072,
15
+ dropout=0.1,
16
+ num_labels=28,
17
+ base_encoder_path="",
18
+ id2label=None,
19
+ label2id=None,
20
+ **kwargs,
21
+ ):
22
+ if id2label is not None:
23
+ id2label = {int(k): v for k, v in id2label.items()}
24
+
25
+ super().__init__(id2label=id2label, label2id=label2id, **kwargs)
26
+ self.vocab_size = vocab_size
27
+ self.max_seq_len = max_seq_len
28
+ self.d_model = d_model
29
+ self.n_head = n_head
30
+ self.n_layers = n_layers
31
+ self.d_ffn = d_ffn
32
+ self.dropout = dropout
33
+ self.num_labels = num_labels
34
+ self.base_encoder_path = base_encoder_path
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdaf493f59fad028e70cf14d448aa3215ec08d8c6af5840e28fc3c1307648f42
3
+ size 328565600
modeling_emcoder.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ from transformers import PreTrainedModel
4
+
5
+ from .configuration_emcoder import EmCoderConfig
6
+
7
+
8
+ class EmCoderCore(nn.Module):
9
+ """The core encoder architecture of EmCoder, without the classification head."""
10
+
11
+ def __init__(self, config: EmCoderConfig):
12
+ super().__init__()
13
+
14
+ self.token_embedding = nn.Embedding(config.vocab_size, config.d_model)
15
+ self.pos_embedding = nn.Embedding(config.max_seq_len, config.d_model)
16
+ self.embed_norm = nn.LayerNorm(config.d_model)
17
+
18
+ encoder_layer = nn.TransformerEncoderLayer(
19
+ d_model=config.d_model,
20
+ nhead=config.n_head,
21
+ dim_feedforward=config.d_ffn,
22
+ dropout=config.dropout,
23
+ activation="gelu",
24
+ norm_first=True,
25
+ batch_first=True,
26
+ )
27
+ self.encoder = nn.TransformerEncoder(
28
+ encoder_layer=encoder_layer, num_layers=config.n_layers
29
+ )
30
+
31
+ self.final_norm = nn.LayerNorm(config.d_model)
32
+ self.dropout = nn.Dropout(config.dropout)
33
+
34
+ def forward(self, x: torch.Tensor, mask: torch.Tensor) -> torch.Tensor:
35
+ """Standard forward pass through the encoder."""
36
+ seq_len = x.size(1)
37
+ pos_ids = torch.arange(seq_len, device=x.device).unsqueeze(0)
38
+
39
+ x = self.token_embedding(x) + self.pos_embedding(pos_ids)
40
+
41
+ x = self.embed_norm(x)
42
+ x = self.dropout(x)
43
+
44
+ padding_mask = mask == 0
45
+
46
+ encoded = self.encoder(x, src_key_padding_mask=padding_mask)
47
+ return self.final_norm(encoded)
48
+
49
+
50
+ class EmCoder(PreTrainedModel):
51
+ """The full EmCoder model, including the classification head."""
52
+
53
+ config_class = EmCoderConfig
54
+
55
+ def __init__(self, config: EmCoderConfig):
56
+ super().__init__(config)
57
+
58
+ self.encoder = EmCoderCore(config)
59
+ self.classifier = nn.Sequential(
60
+ nn.Linear(config.d_model, config.d_model),
61
+ nn.GELU(),
62
+ nn.Dropout(config.dropout),
63
+ nn.Linear(config.d_model, config.num_labels),
64
+ )
65
+
66
+ self.post_init()
67
+
68
+
69
+ def _set_mc_dropout(self, active: bool = True):
70
+ for m in self.modules():
71
+ if isinstance(m, nn.Dropout) or isinstance(m, nn.MultiheadAttention):
72
+ m.train(active)
73
+
74
+ @staticmethod
75
+ def _masked_mean_pooling(
76
+ features: torch.Tensor, mask: torch.Tensor
77
+ ) -> torch.Tensor:
78
+ mask = mask.unsqueeze(-1) # (B, S, 1)
79
+ masked_features = features * mask # (B, S, D)
80
+ sum_masked_features = masked_features.sum(dim=1) # (B, D)
81
+ count_tokens = torch.clamp(mask.sum(dim=1), min=1e-9) # (B, 1)
82
+ return sum_masked_features / count_tokens # (B, D)
83
+
84
+
85
+ def mc_forward(
86
+ self,
87
+ x: torch.Tensor,
88
+ mask: torch.Tensor,
89
+ n_samples: int,
90
+ max_batch_size: int | None = None,
91
+ ) -> torch.Tensor:
92
+ """
93
+ Performs Monte Carlo Dropout inference to quantify epistemic uncertainty.
94
+
95
+ Args:
96
+ x: Input token IDs of shape (B, S).
97
+ mask: Attention mask of shape (B, S).
98
+ n_samples: Total number of Monte Carlo samples.
99
+ max_batch_size: Maximum number of samples in one forward pass.
100
+
101
+ Returns:
102
+ Logits of shape (n_samples, B, num_labels).
103
+ """
104
+ if max_batch_size is None:
105
+ max_batch_size = n_samples
106
+
107
+ B, S = x.shape
108
+ num_labels = self.classifier[-1].out_features
109
+
110
+ all_logits = torch.empty((n_samples, B, num_labels), device=x.device)
111
+
112
+ is_training = self.training
113
+ self._set_mc_dropout(active=True)
114
+ try:
115
+ for i in range(0, n_samples, max_batch_size):
116
+ batch_samples = min(max_batch_size, n_samples - i)
117
+
118
+ x_stacked = x.repeat(batch_samples, 1) # (batch_samples * B, S)
119
+ mask_stacked = mask.repeat(batch_samples, 1) # (batch_samples * B, S)
120
+
121
+ features = self.encoder(
122
+ x_stacked, mask_stacked
123
+ ) # (batch_samples * B, S, D)
124
+
125
+ pooled = self._masked_mean_pooling(features, mask_stacked)
126
+ logits = self.classifier(pooled) # (n_samples * B, num_labels)
127
+
128
+ all_logits[i : i + batch_samples] = logits.view(batch_samples, B, -1)
129
+ finally:
130
+ self._set_mc_dropout(active=is_training)
131
+
132
+ return all_logits
133
+
134
+
135
+
136
+
137
+ def forward(self, x: torch.Tensor, mask: torch.Tensor) -> torch.Tensor:
138
+ """Standard forward pass without MC Dropout."""
139
+ features = self.encoder(x, mask)
140
+
141
+ pooled = self._masked_mean_pooling(features, mask)
142
+ return self.classifier(pooled)
thresholds.json ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "admiration": {
3
+ "p": 0.6714285714285715,
4
+ "f1": 0.6646403242147924
5
+ },
6
+ "amusement": {
7
+ "p": 0.6714285714285715,
8
+ "f1": 0.7877862595419848
9
+ },
10
+ "anger": {
11
+ "p": 0.5571428571428572,
12
+ "f1": 0.43231441048034935
13
+ },
14
+ "annoyance": {
15
+ "p": 0.3857142857142858,
16
+ "f1": 0.32748538011695905
17
+ },
18
+ "approval": {
19
+ "p": 0.3285714285714286,
20
+ "f1": 0.30103480714957664
21
+ },
22
+ "caring": {
23
+ "p": 0.6714285714285715,
24
+ "f1": 0.33440514469453375
25
+ },
26
+ "confusion": {
27
+ "p": 0.6714285714285715,
28
+ "f1": 0.3940520446096654
29
+ },
30
+ "curiosity": {
31
+ "p": 0.5571428571428572,
32
+ "f1": 0.5225225225225225
33
+ },
34
+ "desire": {
35
+ "p": 0.7285714285714286,
36
+ "f1": 0.5228758169934641
37
+ },
38
+ "disappointment": {
39
+ "p": 0.5571428571428572,
40
+ "f1": 0.2638888888888889
41
+ },
42
+ "disapproval": {
43
+ "p": 0.3857142857142858,
44
+ "f1": 0.3365617433414044
45
+ },
46
+ "disgust": {
47
+ "p": 0.6714285714285715,
48
+ "f1": 0.44680851063829785
49
+ },
50
+ "embarrassment": {
51
+ "p": 0.8428571428571429,
52
+ "f1": 0.5454545454545454
53
+ },
54
+ "excitement": {
55
+ "p": 0.6714285714285715,
56
+ "f1": 0.29411764705882354
57
+ },
58
+ "fear": {
59
+ "p": 0.7857142857142857,
60
+ "f1": 0.5365853658536586
61
+ },
62
+ "gratitude": {
63
+ "p": 0.8428571428571429,
64
+ "f1": 0.9135446685878963
65
+ },
66
+ "grief": {
67
+ "p": 0.5571428571428572,
68
+ "f1": 0.4166666666666667
69
+ },
70
+ "joy": {
71
+ "p": 0.7857142857142857,
72
+ "f1": 0.5679012345679012
73
+ },
74
+ "love": {
75
+ "p": 0.7857142857142857,
76
+ "f1": 0.7805755395683454
77
+ },
78
+ "nervousness": {
79
+ "p": 0.6714285714285715,
80
+ "f1": 0.4
81
+ },
82
+ "optimism": {
83
+ "p": 0.6714285714285715,
84
+ "f1": 0.5983827493261455
85
+ },
86
+ "pride": {
87
+ "p": 0.6714285714285715,
88
+ "f1": 0.6666666666666666
89
+ },
90
+ "realization": {
91
+ "p": 0.5571428571428572,
92
+ "f1": 0.24390243902439024
93
+ },
94
+ "relief": {
95
+ "p": 0.7285714285714286,
96
+ "f1": 0.24
97
+ },
98
+ "remorse": {
99
+ "p": 0.7857142857142857,
100
+ "f1": 0.7682119205298014
101
+ },
102
+ "sadness": {
103
+ "p": 0.6142857142857143,
104
+ "f1": 0.4875
105
+ },
106
+ "surprise": {
107
+ "p": 0.6714285714285715,
108
+ "f1": 0.5092250922509225
109
+ },
110
+ "neutral": {
111
+ "p": 0.3285714285714286,
112
+ "f1": 0.6542099192618224
113
+ }
114
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
train_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_samples": 30,
3
+ "tokenized_ds_dir": "data/goemotions_v1_seq512",
4
+ "encoder_lr": 0.00001,
5
+ "head_lr": 0.0005,
6
+ "lr_warmup": 0.05,
7
+ "weight_decay": 0.01,
8
+ "batch_size": 8,
9
+ "gradient_accumulation_steps": 8,
10
+ "num_epochs": 10
11
+ }
train_state.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "train_loss": 0.1895649628543834,
3
+ "eval_loss": 0.2377220498005666
4
+ }