Upload sentiment classifier trained on Amazon Reviews

Browse files

Files changed (3) hide show

README.md +46 -1
configuration_sentiment.py +45 -0
sentiment_classifier.py +208 -0

README.md CHANGED Viewed

@@ -63,14 +63,22 @@ This is a sentiment classification model fine-tuned on Amazon Reviews dataset.
 ## Usage
 ```python
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
 # Load model and tokenizer
 model_name = "anpmts/sentiment-classifier"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForSequenceClassification.from_pretrained(model_name)
 # Prepare input
 text = "This product is amazing! Highly recommend."
@@ -88,6 +96,43 @@ print(f"Sentiment: {labels[sentiment.item()]}")
 print(f"Confidence: {predictions[0][sentiment].item():.2%}")
 ```
 ## Training Metrics Over Epochs
 | Epoch | Train Loss | Val Loss | Val Acc |

 ## Usage
+### Option 1: Using AutoModelForSequenceClassification (Recommended)
+First, make sure the custom model is registered by installing from this repository:
 ```python
+# If loading from HuggingFace Hub, you need to install trust_remote_code
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
 # Load model and tokenizer
 model_name = "anpmts/sentiment-classifier"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForSequenceClassification.from_pretrained(
+    model_name,
+    trust_remote_code=True  # Required for custom models
+)
 # Prepare input
 text = "This product is amazing! Highly recommend."
 print(f"Confidence: {predictions[0][sentiment].item():.2%}")
 ```
+### Option 2: Using Pipeline (Easiest)
+```python
+from transformers import pipeline
+# Load sentiment analysis pipeline
+classifier = pipeline(
+    "text-classification",
+    model="anpmts/sentiment-classifier",
+    trust_remote_code=True
+)
+# Predict
+result = classifier("This product is amazing! Highly recommend.")
+print(result)
+# Output: [{'label': 'positive', 'score': 0.96}]
+```
+### Option 3: Direct Model Loading
+```python
+from transformers import AutoTokenizer
+import torch
+# You need to have the model code available locally
+from src.models import SentimentClassifier
+model = SentimentClassifier.from_pretrained("anpmts/sentiment-classifier")
+tokenizer = AutoTokenizer.from_pretrained("anpmts/sentiment-classifier")
+# Inference
+text = "This product is amazing!"
+inputs = tokenizer(text, return_tensors="pt", max_length=256, truncation=True, padding=True)
+outputs = model(**inputs)
+predictions = torch.softmax(outputs["logits"], dim=-1)
+```
 ## Training Metrics Over Epochs
 | Epoch | Train Loss | Val Loss | Val Acc |

configuration_sentiment.py ADDED Viewed

	@@ -0,0 +1,45 @@

+"""Configuration class for SentimentClassifier."""
+from typing import Optional
+from transformers import PretrainedConfig
+class SentimentClassifierConfig(PretrainedConfig):
+    """
+    Configuration class for SentimentClassifier model.
+    This class stores the configuration of a :class:`~SentimentClassifier` model.
+    It is used to instantiate a SentimentClassifier model according to the specified
+    arguments, defining the model architecture.
+    Args:
+        pretrained_model (:obj:`str`, defaults to :obj:`"xlm-roberta-base"`):
+            Name of the pre-trained transformer model to use as encoder.
+        num_labels (:obj:`int`, defaults to :obj:`3`):
+            Number of sentiment classes (positive/neutral/negative).
+        dropout (:obj:`float`, defaults to :obj:`0.1`):
+            Dropout probability for the classification head.
+        hidden_size (:obj:`int`, optional):
+            Hidden size of the encoder model. If None, will be auto-detected from encoder config.
+        model_type (:obj:`str`, defaults to :obj:`"sentiment-classifier"`):
+            Model type identifier for the Hugging Face Hub.
+    """
+    model_type = "sentiment-classifier"
+    def __init__(
+        self,
+        pretrained_model: str = "xlm-roberta-base",
+        num_labels: int = 3,
+        dropout: float = 0.1,
+        hidden_size: Optional[int] = None,
+        **kwargs,
+    ):
+        """Initialize SentimentClassifierConfig."""
+        super().__init__(**kwargs)
+        self.pretrained_model = pretrained_model
+        self.num_labels = num_labels
+        self.dropout = dropout
+        self.hidden_size = hidden_size

sentiment_classifier.py ADDED Viewed

	@@ -0,0 +1,208 @@

+"""Sentiment classifier for text classification."""
+from typing import Dict, Optional
+import torch
+import torch.nn as nn
+from transformers import AutoModel, PreTrainedModel
+# Handle imports for both local usage and HuggingFace Hub
+try:
+    from .configuration_sentiment import SentimentClassifierConfig
+except ImportError:
+    try:
+        from configuration_sentiment import SentimentClassifierConfig
+    except ImportError:
+        from src.models.configuration_sentiment import SentimentClassifierConfig
+class SentimentClassifier(PreTrainedModel):
+    """
+    Sentiment classifier for sequence classification.
+    Outputs:
+        Sentiment label (positive/neutral/negative) - classification
+    """
+    config_class = SentimentClassifierConfig
+    def __init__(
+        self,
+        config: Optional[SentimentClassifierConfig] = None,
+        pretrained_model: str = "xlm-roberta-base",
+        num_labels: int = 3,
+        dropout: float = 0.1,
+        hidden_size: Optional[int] = None,
+        class_weights: Optional[torch.Tensor] = None,
+        use_flash_attention_2: bool = False,
+    ):
+        """
+        Initialize sentiment classifier.
+        Args:
+            config: Model configuration object.
+            pretrained_model: Name of the pre-trained model.
+            num_labels: Number of sentiment classes (default: 3).
+            dropout: Dropout probability.
+            hidden_size: Hidden size of the model (auto-detected if None).
+            class_weights: Tensor of class weights for classification loss.
+            use_flash_attention_2: Use Flash Attention 2 for faster attention (if available).
+        """
+        # Create config if not provided
+        if config is None:
+            config = SentimentClassifierConfig(
+                pretrained_model=pretrained_model,
+                num_labels=num_labels,
+                dropout=dropout,
+                hidden_size=hidden_size,
+            )
+        super().__init__(config)
+        # Load pre-trained transformer with optional Flash Attention 2
+        encoder_kwargs = {}
+        if use_flash_attention_2:
+            try:
+                encoder_kwargs["attn_implementation"] = "flash_attention_2"
+            except Exception:
+                # Flash Attention 2 not available, will use default
+                pass
+        self.encoder = AutoModel.from_pretrained(config.pretrained_model, **encoder_kwargs)
+        # Get hidden size
+        if config.hidden_size is None:
+            config.hidden_size = self.encoder.config.hidden_size
+        self.hidden_size = config.hidden_size
+        self.num_labels = config.num_labels
+        # Dropout
+        self.dropout = nn.Dropout(config.dropout)
+        # Classification head (sentiment label)
+        self.classifier = nn.Linear(self.hidden_size, self.num_labels)
+        # Class weights
+        self.register_buffer(
+            "class_weights",
+            class_weights if class_weights is not None else torch.ones(self.num_labels),
+        )
+        # Initialize weights
+        self.post_init()
+    def _init_weights(self, module):
+        """Initialize head weights."""
+        if isinstance(module, nn.Linear):
+            nn.init.xavier_uniform_(module.weight)
+            if module.bias is not None:
+                nn.init.zeros_(module.bias)
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        attention_mask: torch.Tensor,
+        labels: Optional[torch.Tensor] = None,
+        **kwargs,
+    ) -> Dict[str, torch.Tensor]:
+        """
+        Forward pass for classification.
+        Args:
+            input_ids: Input token IDs [batch_size, seq_len].
+            attention_mask: Attention mask [batch_size, seq_len].
+            labels: Ground truth sentiment labels [batch_size].
+            **kwargs: Additional arguments.
+        Returns:
+            Dictionary containing loss and logits.
+        """
+        # Encode with transformer
+        outputs = self.encoder(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            return_dict=True,
+        )
+        # Use [CLS] token representation
+        pooled_output = outputs.last_hidden_state[:, 0, :]
+        # Apply dropout
+        pooled_output = self.dropout(pooled_output)
+        # Classification head
+        logits = self.classifier(pooled_output)
+        # Compute loss if labels provided
+        loss = None
+        if labels is not None:
+            loss_fct = nn.CrossEntropyLoss(weight=self.class_weights)
+            loss = loss_fct(logits, labels)
+        return {
+            "loss": loss,
+            "logits": logits,
+        }
+    def predict(
+        self,
+        input_ids: torch.Tensor,
+        attention_mask: torch.Tensor,
+    ) -> torch.Tensor:
+        """
+        Make predictions.
+        Args:
+            input_ids: Input token IDs [batch_size, seq_len].
+            attention_mask: Attention mask [batch_size, seq_len].
+        Returns:
+            Predicted labels [batch_size].
+        """
+        self.eval()
+        with torch.no_grad():
+            outputs = self.forward(input_ids, attention_mask)
+            logits = outputs["logits"]
+            label_predictions = torch.argmax(logits, dim=-1)
+        return label_predictions
+    def get_probabilities(
+        self,
+        input_ids: torch.Tensor,
+        attention_mask: torch.Tensor,
+    ) -> torch.Tensor:
+        """
+        Get class probabilities.
+        Args:
+            input_ids: Input token IDs [batch_size, seq_len].
+            attention_mask: Attention mask [batch_size, seq_len].
+        Returns:
+            Class probabilities [batch_size, num_labels].
+        """
+        self.eval()
+        with torch.no_grad():
+            outputs = self.forward(input_ids, attention_mask)
+            logits = outputs["logits"]
+            probabilities = torch.softmax(logits, dim=-1)
+        return probabilities
+    def freeze_encoder(self):
+        """Freeze encoder parameters (only train classification head)."""
+        for param in self.encoder.parameters():
+            param.requires_grad = False
+    def unfreeze_encoder(self):
+        """Unfreeze encoder parameters."""
+        for param in self.encoder.parameters():
+            param.requires_grad = True
+    def get_num_trainable_params(self) -> int:
+        """Get number of trainable parameters."""
+        return sum(p.numel() for p in self.parameters() if p.requires_grad)