Upload sentiment classifier trained on Amazon Reviews

Files changed (2) hide show

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:25bcb5c098e6ee12a1982c57d0ae4af0e03db286684b66c37283561f7a7563c7
 size 1112208144

 version https://git-lfs.github.com/spec/v1
+oid sha256:0a615a2066e4a0f86b39d9bcd8dedc63e6adc3e785b38ad31f60dfa8baad4c4b
 size 1112208144

sentiment_classifier.py CHANGED Viewed

@@ -36,6 +36,7 @@ class SentimentClassifier(PreTrainedModel):
         hidden_size: Optional[int] = None,
         class_weights: Optional[torch.Tensor] = None,
         use_flash_attention_2: bool = False,
     ):
         """
         Initialize sentiment classifier.
@@ -48,6 +49,7 @@ class SentimentClassifier(PreTrainedModel):
             hidden_size: Hidden size of the model (auto-detected if None).
             class_weights: Tensor of class weights for classification loss.
             use_flash_attention_2: Use Flash Attention 2 for faster attention (if available).
         """
         # Create config if not provided
         if config is None:
@@ -71,6 +73,10 @@ class SentimentClassifier(PreTrainedModel):
         self.encoder = AutoModel.from_pretrained(config.pretrained_model, **encoder_kwargs)
         # Get hidden size
         if config.hidden_size is None:
             config.hidden_size = self.encoder.config.hidden_size

         hidden_size: Optional[int] = None,
         class_weights: Optional[torch.Tensor] = None,
         use_flash_attention_2: bool = False,
+        gradient_checkpointing: bool = False,
     ):
         """
         Initialize sentiment classifier.
             hidden_size: Hidden size of the model (auto-detected if None).
             class_weights: Tensor of class weights for classification loss.
             use_flash_attention_2: Use Flash Attention 2 for faster attention (if available).
+            gradient_checkpointing: Enable gradient checkpointing to save memory.
         """
         # Create config if not provided
         if config is None:
         self.encoder = AutoModel.from_pretrained(config.pretrained_model, **encoder_kwargs)
+        # Enable gradient checkpointing if requested (saves memory at cost of compute)
+        if gradient_checkpointing:
+            self.encoder.gradient_checkpointing_enable()
         # Get hidden size
         if config.hidden_size is None:
             config.hidden_size = self.encoder.config.hidden_size