AnnyNguyen commited on
Commit
ec1a06b
·
verified ·
1 Parent(s): a098d3f

Delete models.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. models.py +0 -238
models.py DELETED
@@ -1,238 +0,0 @@
1
- """
2
- Model architectures for emotion recognition.
3
- """
4
-
5
- import torch
6
- import torch.nn as nn
7
- import torch.nn.functional as F
8
- from transformers import AutoModel, AutoConfig, AutoModelForSequenceClassification
9
-
10
-
11
- class BaseEmotionModel(nn.Module):
12
- """
13
- Base class for emotion classification models.
14
- """
15
- def __init__(self, model_name: str, num_labels: int):
16
- super().__init__()
17
- self.config = AutoConfig.from_pretrained(model_name, ignore_mismatched_sizes=True)
18
- self.encoder = AutoModel.from_pretrained(model_name, config=self.config, ignore_mismatched_sizes=True)
19
- self.dropout = nn.Dropout(0.1)
20
- self.classifier = nn.Linear(self.config.hidden_size, num_labels)
21
-
22
-
23
- class TransformerForEmotion(BaseEmotionModel):
24
- """
25
- Standard transformer model for emotion classification.
26
- Uses CLS token pooling.
27
- """
28
- def forward(self, input_ids, attention_mask, labels=None):
29
- """Forward pass."""
30
- outputs = self.encoder(input_ids=input_ids, attention_mask=attention_mask)
31
-
32
- # Try to get pooled output, fallback to CLS token
33
- if hasattr(outputs, 'pooler_output') and outputs.pooler_output is not None:
34
- pooled_output = outputs.pooler_output
35
- else:
36
- pooled_output = outputs.last_hidden_state[:, 0] # CLS token
37
-
38
- pooled_output = self.dropout(pooled_output)
39
- logits = self.classifier(pooled_output)
40
-
41
- loss = None
42
- if labels is not None:
43
- loss_fn = nn.CrossEntropyLoss()
44
- loss = loss_fn(logits, labels)
45
- return {"loss": loss, "logits": logits}
46
-
47
-
48
- class SPhoBERTModel(BaseEmotionModel):
49
- """
50
- SPhoBERT - Specialized PhoBERT variant for emotion recognition.
51
- Uses mean pooling over sequence output instead of CLS token.
52
- """
53
- def forward(self, input_ids, attention_mask, labels=None):
54
- """Forward pass with mean pooling."""
55
- outputs = self.encoder(input_ids=input_ids, attention_mask=attention_mask)
56
-
57
- # Try pooler_output first, then use mean pooling
58
- if hasattr(outputs, 'pooler_output') and outputs.pooler_output is not None:
59
- pooled_output = outputs.pooler_output
60
- else:
61
- # Mean pooling over sequence length
62
- pooled_output = outputs.last_hidden_state.mean(dim=1)
63
-
64
- pooled_output = self.dropout(pooled_output)
65
- logits = self.classifier(pooled_output)
66
-
67
- loss = None
68
- if labels is not None:
69
- loss_fn = nn.CrossEntropyLoss()
70
- loss = loss_fn(logits, labels)
71
- return {"loss": loss, "logits": logits}
72
-
73
-
74
- class RoBERTaGRUModel(nn.Module):
75
- """
76
- RoBERTa + GRU Hybrid model for emotion recognition.
77
- """
78
- def __init__(self, model_name: str, num_labels: int, hidden_size: int = 256):
79
- super().__init__()
80
- self.config = AutoConfig.from_pretrained(model_name, ignore_mismatched_sizes=True)
81
- self.encoder = AutoModel.from_pretrained(model_name, config=self.config, ignore_mismatched_sizes=True)
82
- self.gru = nn.GRU(
83
- input_size=self.config.hidden_size,
84
- hidden_size=hidden_size,
85
- num_layers=2,
86
- batch_first=True,
87
- dropout=0.1,
88
- bidirectional=True
89
- )
90
- self.dropout = nn.Dropout(0.1)
91
- self.classifier = nn.Linear(hidden_size * 2, num_labels) # *2 for bidirectional
92
-
93
- def forward(self, input_ids, attention_mask, labels=None):
94
- outputs = self.encoder(input_ids=input_ids, attention_mask=attention_mask)
95
- hidden_states = outputs.last_hidden_state # [batch_size, seq_len, hidden_size]
96
-
97
- # GRU processing
98
- gru_output, _ = self.gru(hidden_states) # [batch_size, seq_len, hidden_size*2]
99
-
100
- # Global average pooling
101
- pooled_output = gru_output.mean(dim=1) # [batch_size, hidden_size*2]
102
- pooled_output = self.dropout(pooled_output)
103
- logits = self.classifier(pooled_output)
104
-
105
- loss = None
106
- if labels is not None:
107
- loss_fn = nn.CrossEntropyLoss()
108
- loss = loss_fn(logits, labels)
109
- return {"loss": loss, "logits": logits}
110
-
111
-
112
- class TextCNNModel(nn.Module):
113
- """
114
- TextCNN model for emotion recognition.
115
- """
116
- def __init__(self, vocab_size: int, embedding_dim: int = 128, num_labels: int = 7,
117
- num_filters: int = 100, filter_sizes: list = [3, 4, 5], dropout: float = 0.5):
118
- super().__init__()
119
- self.embedding = nn.Embedding(vocab_size, embedding_dim)
120
- self.convs = nn.ModuleList([
121
- nn.Conv2d(1, num_filters, (filter_size, embedding_dim))
122
- for filter_size in filter_sizes
123
- ])
124
- self.dropout = nn.Dropout(dropout)
125
- self.classifier = nn.Linear(num_filters * len(filter_sizes), num_labels)
126
-
127
- def forward(self, input_ids, attention_mask, labels=None):
128
- # Embedding
129
- embedded = self.embedding(input_ids) # [batch_size, seq_len, embedding_dim]
130
-
131
- # Add channel dimension for Conv2d
132
- embedded = embedded.unsqueeze(1) # [batch_size, 1, seq_len, embedding_dim]
133
-
134
- # Convolutional layers
135
- conv_outputs = []
136
- for conv in self.convs:
137
- conv_out = F.relu(conv(embedded)) # [batch_size, num_filters, seq_len', 1]
138
- conv_out = conv_out.squeeze(3) # [batch_size, num_filters, seq_len']
139
- pooled = F.max_pool1d(conv_out, conv_out.size(2)) # [batch_size, num_filters, 1]
140
- pooled = pooled.squeeze(2) # [batch_size, num_filters]
141
- conv_outputs.append(pooled)
142
-
143
- # Concatenate all conv outputs
144
- concatenated = torch.cat(conv_outputs, dim=1) # [batch_size, num_filters * len(filter_sizes)]
145
-
146
- # Classification
147
- concatenated = self.dropout(concatenated)
148
- logits = self.classifier(concatenated)
149
-
150
- loss = None
151
- if labels is not None:
152
- loss_fn = nn.CrossEntropyLoss()
153
- loss = loss_fn(logits, labels)
154
- return {"loss": loss, "logits": logits}
155
-
156
-
157
- class BiLSTMModel(nn.Module):
158
- """
159
- BiLSTM model for emotion recognition.
160
- """
161
- def __init__(self, vocab_size: int, embedding_dim: int = 128, hidden_size: int = 256,
162
- num_labels: int = 7, num_layers: int = 2, dropout: float = 0.5):
163
- super().__init__()
164
- self.embedding = nn.Embedding(vocab_size, embedding_dim)
165
- self.lstm = nn.LSTM(
166
- input_size=embedding_dim,
167
- hidden_size=hidden_size,
168
- num_layers=num_layers,
169
- batch_first=True,
170
- dropout=dropout if num_layers > 1 else 0,
171
- bidirectional=True
172
- )
173
- self.dropout = nn.Dropout(dropout)
174
- self.classifier = nn.Linear(hidden_size * 2, num_labels) # *2 for bidirectional
175
-
176
- def forward(self, input_ids, attention_mask, labels=None):
177
- # Embedding
178
- embedded = self.embedding(input_ids) # [batch_size, seq_len, embedding_dim]
179
-
180
- # BiLSTM
181
- lstm_output, (hidden, cell) = self.lstm(embedded) # [batch_size, seq_len, hidden_size*2]
182
-
183
- # Global average pooling
184
- pooled_output = lstm_output.mean(dim=1) # [batch_size, hidden_size*2]
185
-
186
- pooled_output = self.dropout(pooled_output)
187
- logits = self.classifier(pooled_output)
188
-
189
- loss = None
190
- if labels is not None:
191
- loss_fn = nn.CrossEntropyLoss()
192
- loss = loss_fn(logits, labels)
193
- return {"loss": loss, "logits": logits}
194
-
195
-
196
- def get_model(model_name: str, num_labels: int, use_custom: bool = False,
197
- model_type: str = "standard", **kwargs):
198
- """
199
- Factory function to get a model instance.
200
-
201
- Args:
202
- model_name: HuggingFace model identifier
203
- num_labels: Number of classification labels
204
- use_custom: Whether to use custom implementation
205
- model_type: Type of model - "standard", "sphobert", "roberta-gru", "textcnn", "bilstm"
206
- **kwargs: Additional model arguments
207
- """
208
- if model_type == "sphobert":
209
- return SPhoBERTModel(model_name, num_labels)
210
- elif model_type == "roberta-gru":
211
- hidden_size = kwargs.get('hidden_size', 256)
212
- return RoBERTaGRUModel(model_name, num_labels, hidden_size)
213
- elif model_type == "textcnn":
214
- vocab_size = kwargs.get('vocab_size', 32000)
215
- embedding_dim = kwargs.get('embedding_dim', 128)
216
- return TextCNNModel(vocab_size, embedding_dim, num_labels)
217
- elif model_type == "bilstm":
218
- vocab_size = kwargs.get('vocab_size', 32000)
219
- embedding_dim = kwargs.get('embedding_dim', 128)
220
- hidden_size = kwargs.get('hidden_size', 256)
221
- return BiLSTMModel(vocab_size, embedding_dim, hidden_size, num_labels)
222
- elif use_custom:
223
- return TransformerForEmotion(model_name, num_labels, **kwargs)
224
- else:
225
- # Use HuggingFace AutoModel for Sequence Classification
226
- try:
227
- config = AutoConfig.from_pretrained(model_name)
228
- config.num_labels = num_labels
229
-
230
- model = AutoModelForSequenceClassification.from_pretrained(
231
- model_name,
232
- config=config,
233
- **{k: v for k, v in kwargs.items() if k in ['ignore_mismatched_sizes']}
234
- )
235
- return model
236
- except Exception as e:
237
- print(f"Warning: Failed to use AutoModelForSequenceClassification: {e}")
238
- return TransformerForEmotion(model_name, num_labels, **kwargs)