AnnyNguyen commited on
Commit
7493dd3
·
verified ·
1 Parent(s): 1fcc0d2

Upload models.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. models.py +238 -0
models.py ADDED
@@ -0,0 +1,238 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Model architectures for emotion recognition.
3
+ """
4
+
5
+ import torch
6
+ import torch.nn as nn
7
+ import torch.nn.functional as F
8
+ from transformers import AutoModel, AutoConfig, AutoModelForSequenceClassification
9
+
10
+
11
+ class BaseEmotionModel(nn.Module):
12
+ """
13
+ Base class for emotion classification models.
14
+ """
15
+ def __init__(self, model_name: str, num_labels: int):
16
+ super().__init__()
17
+ self.config = AutoConfig.from_pretrained(model_name, ignore_mismatched_sizes=True)
18
+ self.encoder = AutoModel.from_pretrained(model_name, config=self.config, ignore_mismatched_sizes=True)
19
+ self.dropout = nn.Dropout(0.1)
20
+ self.classifier = nn.Linear(self.config.hidden_size, num_labels)
21
+
22
+
23
+ class TransformerForEmotion(BaseEmotionModel):
24
+ """
25
+ Standard transformer model for emotion classification.
26
+ Uses CLS token pooling.
27
+ """
28
+ def forward(self, input_ids, attention_mask, labels=None):
29
+ """Forward pass."""
30
+ outputs = self.encoder(input_ids=input_ids, attention_mask=attention_mask)
31
+
32
+ # Try to get pooled output, fallback to CLS token
33
+ if hasattr(outputs, 'pooler_output') and outputs.pooler_output is not None:
34
+ pooled_output = outputs.pooler_output
35
+ else:
36
+ pooled_output = outputs.last_hidden_state[:, 0] # CLS token
37
+
38
+ pooled_output = self.dropout(pooled_output)
39
+ logits = self.classifier(pooled_output)
40
+
41
+ loss = None
42
+ if labels is not None:
43
+ loss_fn = nn.CrossEntropyLoss()
44
+ loss = loss_fn(logits, labels)
45
+ return {"loss": loss, "logits": logits}
46
+
47
+
48
+ class SPhoBERTModel(BaseEmotionModel):
49
+ """
50
+ SPhoBERT - Specialized PhoBERT variant for emotion recognition.
51
+ Uses mean pooling over sequence output instead of CLS token.
52
+ """
53
+ def forward(self, input_ids, attention_mask, labels=None):
54
+ """Forward pass with mean pooling."""
55
+ outputs = self.encoder(input_ids=input_ids, attention_mask=attention_mask)
56
+
57
+ # Try pooler_output first, then use mean pooling
58
+ if hasattr(outputs, 'pooler_output') and outputs.pooler_output is not None:
59
+ pooled_output = outputs.pooler_output
60
+ else:
61
+ # Mean pooling over sequence length
62
+ pooled_output = outputs.last_hidden_state.mean(dim=1)
63
+
64
+ pooled_output = self.dropout(pooled_output)
65
+ logits = self.classifier(pooled_output)
66
+
67
+ loss = None
68
+ if labels is not None:
69
+ loss_fn = nn.CrossEntropyLoss()
70
+ loss = loss_fn(logits, labels)
71
+ return {"loss": loss, "logits": logits}
72
+
73
+
74
+ class RoBERTaGRUModel(nn.Module):
75
+ """
76
+ RoBERTa + GRU Hybrid model for emotion recognition.
77
+ """
78
+ def __init__(self, model_name: str, num_labels: int, hidden_size: int = 256):
79
+ super().__init__()
80
+ self.config = AutoConfig.from_pretrained(model_name, ignore_mismatched_sizes=True)
81
+ self.encoder = AutoModel.from_pretrained(model_name, config=self.config, ignore_mismatched_sizes=True)
82
+ self.gru = nn.GRU(
83
+ input_size=self.config.hidden_size,
84
+ hidden_size=hidden_size,
85
+ num_layers=2,
86
+ batch_first=True,
87
+ dropout=0.1,
88
+ bidirectional=True
89
+ )
90
+ self.dropout = nn.Dropout(0.1)
91
+ self.classifier = nn.Linear(hidden_size * 2, num_labels) # *2 for bidirectional
92
+
93
+ def forward(self, input_ids, attention_mask, labels=None):
94
+ outputs = self.encoder(input_ids=input_ids, attention_mask=attention_mask)
95
+ hidden_states = outputs.last_hidden_state # [batch_size, seq_len, hidden_size]
96
+
97
+ # GRU processing
98
+ gru_output, _ = self.gru(hidden_states) # [batch_size, seq_len, hidden_size*2]
99
+
100
+ # Global average pooling
101
+ pooled_output = gru_output.mean(dim=1) # [batch_size, hidden_size*2]
102
+ pooled_output = self.dropout(pooled_output)
103
+ logits = self.classifier(pooled_output)
104
+
105
+ loss = None
106
+ if labels is not None:
107
+ loss_fn = nn.CrossEntropyLoss()
108
+ loss = loss_fn(logits, labels)
109
+ return {"loss": loss, "logits": logits}
110
+
111
+
112
+ class TextCNNModel(nn.Module):
113
+ """
114
+ TextCNN model for emotion recognition.
115
+ """
116
+ def __init__(self, vocab_size: int, embedding_dim: int = 128, num_labels: int = 7,
117
+ num_filters: int = 100, filter_sizes: list = [3, 4, 5], dropout: float = 0.5):
118
+ super().__init__()
119
+ self.embedding = nn.Embedding(vocab_size, embedding_dim)
120
+ self.convs = nn.ModuleList([
121
+ nn.Conv2d(1, num_filters, (filter_size, embedding_dim))
122
+ for filter_size in filter_sizes
123
+ ])
124
+ self.dropout = nn.Dropout(dropout)
125
+ self.classifier = nn.Linear(num_filters * len(filter_sizes), num_labels)
126
+
127
+ def forward(self, input_ids, attention_mask, labels=None):
128
+ # Embedding
129
+ embedded = self.embedding(input_ids) # [batch_size, seq_len, embedding_dim]
130
+
131
+ # Add channel dimension for Conv2d
132
+ embedded = embedded.unsqueeze(1) # [batch_size, 1, seq_len, embedding_dim]
133
+
134
+ # Convolutional layers
135
+ conv_outputs = []
136
+ for conv in self.convs:
137
+ conv_out = F.relu(conv(embedded)) # [batch_size, num_filters, seq_len', 1]
138
+ conv_out = conv_out.squeeze(3) # [batch_size, num_filters, seq_len']
139
+ pooled = F.max_pool1d(conv_out, conv_out.size(2)) # [batch_size, num_filters, 1]
140
+ pooled = pooled.squeeze(2) # [batch_size, num_filters]
141
+ conv_outputs.append(pooled)
142
+
143
+ # Concatenate all conv outputs
144
+ concatenated = torch.cat(conv_outputs, dim=1) # [batch_size, num_filters * len(filter_sizes)]
145
+
146
+ # Classification
147
+ concatenated = self.dropout(concatenated)
148
+ logits = self.classifier(concatenated)
149
+
150
+ loss = None
151
+ if labels is not None:
152
+ loss_fn = nn.CrossEntropyLoss()
153
+ loss = loss_fn(logits, labels)
154
+ return {"loss": loss, "logits": logits}
155
+
156
+
157
+ class BiLSTMModel(nn.Module):
158
+ """
159
+ BiLSTM model for emotion recognition.
160
+ """
161
+ def __init__(self, vocab_size: int, embedding_dim: int = 128, hidden_size: int = 256,
162
+ num_labels: int = 7, num_layers: int = 2, dropout: float = 0.5):
163
+ super().__init__()
164
+ self.embedding = nn.Embedding(vocab_size, embedding_dim)
165
+ self.lstm = nn.LSTM(
166
+ input_size=embedding_dim,
167
+ hidden_size=hidden_size,
168
+ num_layers=num_layers,
169
+ batch_first=True,
170
+ dropout=dropout if num_layers > 1 else 0,
171
+ bidirectional=True
172
+ )
173
+ self.dropout = nn.Dropout(dropout)
174
+ self.classifier = nn.Linear(hidden_size * 2, num_labels) # *2 for bidirectional
175
+
176
+ def forward(self, input_ids, attention_mask, labels=None):
177
+ # Embedding
178
+ embedded = self.embedding(input_ids) # [batch_size, seq_len, embedding_dim]
179
+
180
+ # BiLSTM
181
+ lstm_output, (hidden, cell) = self.lstm(embedded) # [batch_size, seq_len, hidden_size*2]
182
+
183
+ # Global average pooling
184
+ pooled_output = lstm_output.mean(dim=1) # [batch_size, hidden_size*2]
185
+
186
+ pooled_output = self.dropout(pooled_output)
187
+ logits = self.classifier(pooled_output)
188
+
189
+ loss = None
190
+ if labels is not None:
191
+ loss_fn = nn.CrossEntropyLoss()
192
+ loss = loss_fn(logits, labels)
193
+ return {"loss": loss, "logits": logits}
194
+
195
+
196
+ def get_model(model_name: str, num_labels: int, use_custom: bool = False,
197
+ model_type: str = "standard", **kwargs):
198
+ """
199
+ Factory function to get a model instance.
200
+
201
+ Args:
202
+ model_name: HuggingFace model identifier
203
+ num_labels: Number of classification labels
204
+ use_custom: Whether to use custom implementation
205
+ model_type: Type of model - "standard", "sphobert", "roberta-gru", "textcnn", "bilstm"
206
+ **kwargs: Additional model arguments
207
+ """
208
+ if model_type == "sphobert":
209
+ return SPhoBERTModel(model_name, num_labels)
210
+ elif model_type == "roberta-gru":
211
+ hidden_size = kwargs.get('hidden_size', 256)
212
+ return RoBERTaGRUModel(model_name, num_labels, hidden_size)
213
+ elif model_type == "textcnn":
214
+ vocab_size = kwargs.get('vocab_size', 32000)
215
+ embedding_dim = kwargs.get('embedding_dim', 128)
216
+ return TextCNNModel(vocab_size, embedding_dim, num_labels)
217
+ elif model_type == "bilstm":
218
+ vocab_size = kwargs.get('vocab_size', 32000)
219
+ embedding_dim = kwargs.get('embedding_dim', 128)
220
+ hidden_size = kwargs.get('hidden_size', 256)
221
+ return BiLSTMModel(vocab_size, embedding_dim, hidden_size, num_labels)
222
+ elif use_custom:
223
+ return TransformerForEmotion(model_name, num_labels, **kwargs)
224
+ else:
225
+ # Use HuggingFace AutoModel for Sequence Classification
226
+ try:
227
+ config = AutoConfig.from_pretrained(model_name)
228
+ config.num_labels = num_labels
229
+
230
+ model = AutoModelForSequenceClassification.from_pretrained(
231
+ model_name,
232
+ config=config,
233
+ **{k: v for k, v in kwargs.items() if k in ['ignore_mismatched_sizes']}
234
+ )
235
+ return model
236
+ except Exception as e:
237
+ print(f"Warning: Failed to use AutoModelForSequenceClassification: {e}")
238
+ return TransformerForEmotion(model_name, num_labels, **kwargs)