anpmts commited on
Commit
26f3ae9
·
verified ·
1 Parent(s): 8691179

Upload sentiment classifier trained on Amazon Reviews

Browse files
Files changed (3) hide show
  1. README.md +46 -1
  2. configuration_sentiment.py +45 -0
  3. sentiment_classifier.py +208 -0
README.md CHANGED
@@ -63,14 +63,22 @@ This is a sentiment classification model fine-tuned on Amazon Reviews dataset.
63
 
64
  ## Usage
65
 
 
 
 
 
66
  ```python
 
67
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
68
  import torch
69
 
70
  # Load model and tokenizer
71
  model_name = "anpmts/sentiment-classifier"
72
  tokenizer = AutoTokenizer.from_pretrained(model_name)
73
- model = AutoModelForSequenceClassification.from_pretrained(model_name)
 
 
 
74
 
75
  # Prepare input
76
  text = "This product is amazing! Highly recommend."
@@ -88,6 +96,43 @@ print(f"Sentiment: {labels[sentiment.item()]}")
88
  print(f"Confidence: {predictions[0][sentiment].item():.2%}")
89
  ```
90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  ## Training Metrics Over Epochs
92
 
93
  | Epoch | Train Loss | Val Loss | Val Acc |
 
63
 
64
  ## Usage
65
 
66
+ ### Option 1: Using AutoModelForSequenceClassification (Recommended)
67
+
68
+ First, make sure the custom model is registered by installing from this repository:
69
+
70
  ```python
71
+ # If loading from HuggingFace Hub, you need to install trust_remote_code
72
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
73
  import torch
74
 
75
  # Load model and tokenizer
76
  model_name = "anpmts/sentiment-classifier"
77
  tokenizer = AutoTokenizer.from_pretrained(model_name)
78
+ model = AutoModelForSequenceClassification.from_pretrained(
79
+ model_name,
80
+ trust_remote_code=True # Required for custom models
81
+ )
82
 
83
  # Prepare input
84
  text = "This product is amazing! Highly recommend."
 
96
  print(f"Confidence: {predictions[0][sentiment].item():.2%}")
97
  ```
98
 
99
+ ### Option 2: Using Pipeline (Easiest)
100
+
101
+ ```python
102
+ from transformers import pipeline
103
+
104
+ # Load sentiment analysis pipeline
105
+ classifier = pipeline(
106
+ "text-classification",
107
+ model="anpmts/sentiment-classifier",
108
+ trust_remote_code=True
109
+ )
110
+
111
+ # Predict
112
+ result = classifier("This product is amazing! Highly recommend.")
113
+ print(result)
114
+ # Output: [{'label': 'positive', 'score': 0.96}]
115
+ ```
116
+
117
+ ### Option 3: Direct Model Loading
118
+
119
+ ```python
120
+ from transformers import AutoTokenizer
121
+ import torch
122
+
123
+ # You need to have the model code available locally
124
+ from src.models import SentimentClassifier
125
+
126
+ model = SentimentClassifier.from_pretrained("anpmts/sentiment-classifier")
127
+ tokenizer = AutoTokenizer.from_pretrained("anpmts/sentiment-classifier")
128
+
129
+ # Inference
130
+ text = "This product is amazing!"
131
+ inputs = tokenizer(text, return_tensors="pt", max_length=256, truncation=True, padding=True)
132
+ outputs = model(**inputs)
133
+ predictions = torch.softmax(outputs["logits"], dim=-1)
134
+ ```
135
+
136
  ## Training Metrics Over Epochs
137
 
138
  | Epoch | Train Loss | Val Loss | Val Acc |
configuration_sentiment.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Configuration class for SentimentClassifier."""
2
+
3
+ from typing import Optional
4
+
5
+ from transformers import PretrainedConfig
6
+
7
+
8
+ class SentimentClassifierConfig(PretrainedConfig):
9
+ """
10
+ Configuration class for SentimentClassifier model.
11
+
12
+ This class stores the configuration of a :class:`~SentimentClassifier` model.
13
+ It is used to instantiate a SentimentClassifier model according to the specified
14
+ arguments, defining the model architecture.
15
+
16
+ Args:
17
+ pretrained_model (:obj:`str`, defaults to :obj:`"xlm-roberta-base"`):
18
+ Name of the pre-trained transformer model to use as encoder.
19
+ num_labels (:obj:`int`, defaults to :obj:`3`):
20
+ Number of sentiment classes (positive/neutral/negative).
21
+ dropout (:obj:`float`, defaults to :obj:`0.1`):
22
+ Dropout probability for the classification head.
23
+ hidden_size (:obj:`int`, optional):
24
+ Hidden size of the encoder model. If None, will be auto-detected from encoder config.
25
+ model_type (:obj:`str`, defaults to :obj:`"sentiment-classifier"`):
26
+ Model type identifier for the Hugging Face Hub.
27
+ """
28
+
29
+ model_type = "sentiment-classifier"
30
+
31
+ def __init__(
32
+ self,
33
+ pretrained_model: str = "xlm-roberta-base",
34
+ num_labels: int = 3,
35
+ dropout: float = 0.1,
36
+ hidden_size: Optional[int] = None,
37
+ **kwargs,
38
+ ):
39
+ """Initialize SentimentClassifierConfig."""
40
+ super().__init__(**kwargs)
41
+
42
+ self.pretrained_model = pretrained_model
43
+ self.num_labels = num_labels
44
+ self.dropout = dropout
45
+ self.hidden_size = hidden_size
sentiment_classifier.py ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Sentiment classifier for text classification."""
2
+
3
+ from typing import Dict, Optional
4
+
5
+ import torch
6
+ import torch.nn as nn
7
+ from transformers import AutoModel, PreTrainedModel
8
+
9
+ # Handle imports for both local usage and HuggingFace Hub
10
+ try:
11
+ from .configuration_sentiment import SentimentClassifierConfig
12
+ except ImportError:
13
+ try:
14
+ from configuration_sentiment import SentimentClassifierConfig
15
+ except ImportError:
16
+ from src.models.configuration_sentiment import SentimentClassifierConfig
17
+
18
+
19
+ class SentimentClassifier(PreTrainedModel):
20
+ """
21
+ Sentiment classifier for sequence classification.
22
+
23
+ Outputs:
24
+ Sentiment label (positive/neutral/negative) - classification
25
+ """
26
+
27
+ config_class = SentimentClassifierConfig
28
+
29
+ def __init__(
30
+ self,
31
+ config: Optional[SentimentClassifierConfig] = None,
32
+ pretrained_model: str = "xlm-roberta-base",
33
+ num_labels: int = 3,
34
+ dropout: float = 0.1,
35
+ hidden_size: Optional[int] = None,
36
+ class_weights: Optional[torch.Tensor] = None,
37
+ use_flash_attention_2: bool = False,
38
+ ):
39
+ """
40
+ Initialize sentiment classifier.
41
+
42
+ Args:
43
+ config: Model configuration object.
44
+ pretrained_model: Name of the pre-trained model.
45
+ num_labels: Number of sentiment classes (default: 3).
46
+ dropout: Dropout probability.
47
+ hidden_size: Hidden size of the model (auto-detected if None).
48
+ class_weights: Tensor of class weights for classification loss.
49
+ use_flash_attention_2: Use Flash Attention 2 for faster attention (if available).
50
+ """
51
+ # Create config if not provided
52
+ if config is None:
53
+ config = SentimentClassifierConfig(
54
+ pretrained_model=pretrained_model,
55
+ num_labels=num_labels,
56
+ dropout=dropout,
57
+ hidden_size=hidden_size,
58
+ )
59
+
60
+ super().__init__(config)
61
+
62
+ # Load pre-trained transformer with optional Flash Attention 2
63
+ encoder_kwargs = {}
64
+ if use_flash_attention_2:
65
+ try:
66
+ encoder_kwargs["attn_implementation"] = "flash_attention_2"
67
+ except Exception:
68
+ # Flash Attention 2 not available, will use default
69
+ pass
70
+
71
+ self.encoder = AutoModel.from_pretrained(config.pretrained_model, **encoder_kwargs)
72
+
73
+ # Get hidden size
74
+ if config.hidden_size is None:
75
+ config.hidden_size = self.encoder.config.hidden_size
76
+
77
+ self.hidden_size = config.hidden_size
78
+ self.num_labels = config.num_labels
79
+
80
+ # Dropout
81
+ self.dropout = nn.Dropout(config.dropout)
82
+
83
+ # Classification head (sentiment label)
84
+ self.classifier = nn.Linear(self.hidden_size, self.num_labels)
85
+
86
+ # Class weights
87
+ self.register_buffer(
88
+ "class_weights",
89
+ class_weights if class_weights is not None else torch.ones(self.num_labels),
90
+ )
91
+
92
+ # Initialize weights
93
+ self.post_init()
94
+
95
+ def _init_weights(self, module):
96
+ """Initialize head weights."""
97
+ if isinstance(module, nn.Linear):
98
+ nn.init.xavier_uniform_(module.weight)
99
+ if module.bias is not None:
100
+ nn.init.zeros_(module.bias)
101
+
102
+ def forward(
103
+ self,
104
+ input_ids: torch.Tensor,
105
+ attention_mask: torch.Tensor,
106
+ labels: Optional[torch.Tensor] = None,
107
+ **kwargs,
108
+ ) -> Dict[str, torch.Tensor]:
109
+ """
110
+ Forward pass for classification.
111
+
112
+ Args:
113
+ input_ids: Input token IDs [batch_size, seq_len].
114
+ attention_mask: Attention mask [batch_size, seq_len].
115
+ labels: Ground truth sentiment labels [batch_size].
116
+ **kwargs: Additional arguments.
117
+
118
+ Returns:
119
+ Dictionary containing loss and logits.
120
+ """
121
+ # Encode with transformer
122
+ outputs = self.encoder(
123
+ input_ids=input_ids,
124
+ attention_mask=attention_mask,
125
+ return_dict=True,
126
+ )
127
+
128
+ # Use [CLS] token representation
129
+ pooled_output = outputs.last_hidden_state[:, 0, :]
130
+
131
+ # Apply dropout
132
+ pooled_output = self.dropout(pooled_output)
133
+
134
+ # Classification head
135
+ logits = self.classifier(pooled_output)
136
+
137
+ # Compute loss if labels provided
138
+ loss = None
139
+ if labels is not None:
140
+ loss_fct = nn.CrossEntropyLoss(weight=self.class_weights)
141
+ loss = loss_fct(logits, labels)
142
+
143
+ return {
144
+ "loss": loss,
145
+ "logits": logits,
146
+ }
147
+
148
+ def predict(
149
+ self,
150
+ input_ids: torch.Tensor,
151
+ attention_mask: torch.Tensor,
152
+ ) -> torch.Tensor:
153
+ """
154
+ Make predictions.
155
+
156
+ Args:
157
+ input_ids: Input token IDs [batch_size, seq_len].
158
+ attention_mask: Attention mask [batch_size, seq_len].
159
+
160
+ Returns:
161
+ Predicted labels [batch_size].
162
+ """
163
+ self.eval()
164
+
165
+ with torch.no_grad():
166
+ outputs = self.forward(input_ids, attention_mask)
167
+ logits = outputs["logits"]
168
+ label_predictions = torch.argmax(logits, dim=-1)
169
+
170
+ return label_predictions
171
+
172
+ def get_probabilities(
173
+ self,
174
+ input_ids: torch.Tensor,
175
+ attention_mask: torch.Tensor,
176
+ ) -> torch.Tensor:
177
+ """
178
+ Get class probabilities.
179
+
180
+ Args:
181
+ input_ids: Input token IDs [batch_size, seq_len].
182
+ attention_mask: Attention mask [batch_size, seq_len].
183
+
184
+ Returns:
185
+ Class probabilities [batch_size, num_labels].
186
+ """
187
+ self.eval()
188
+
189
+ with torch.no_grad():
190
+ outputs = self.forward(input_ids, attention_mask)
191
+ logits = outputs["logits"]
192
+ probabilities = torch.softmax(logits, dim=-1)
193
+
194
+ return probabilities
195
+
196
+ def freeze_encoder(self):
197
+ """Freeze encoder parameters (only train classification head)."""
198
+ for param in self.encoder.parameters():
199
+ param.requires_grad = False
200
+
201
+ def unfreeze_encoder(self):
202
+ """Unfreeze encoder parameters."""
203
+ for param in self.encoder.parameters():
204
+ param.requires_grad = True
205
+
206
+ def get_num_trainable_params(self) -> int:
207
+ """Get number of trainable parameters."""
208
+ return sum(p.numel() for p in self.parameters() if p.requires_grad)