Upload sentiment classifier trained on Amazon Reviews

Browse files

Files changed (8) hide show

.gitattributes +1 -0
README.md +114 -0
config.json +25 -0
label_mappings.json +12 -0
model.safetensors +3 -0
special_tokens_map.json +15 -0
tokenizer.json +3 -0
tokenizer_config.json +54 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,114 @@

+---
+language: multilingual
+license: apache-2.0
+tags:
+- sentiment-analysis
+- text-classification
+- xlm-roberta
+- amazon-reviews
+datasets:
+- amazon-reviews
+metrics:
+- accuracy
+model-index:
+- name: anpmts/sentiment-classifier
+  results:
+  - task:
+      type: text-classification
+      name: Sentiment Analysis
+    dataset:
+      type: amazon-reviews
+      name: Amazon Reviews
+    metrics:
+    - type: accuracy
+      value: 0.924
+      name: Validation Accuracy
+---
+# Sentiment Classifier - XLM-RoBERTa
+This is a sentiment classification model fine-tuned on Amazon Reviews dataset.
+## Model Description
+- **Base Model**: xlm-roberta-base
+- **Task**: Binary Sentiment Classification (negative/positive)
+- **Languages**: Multilingual (100+ languages)
+- **Parameters**: 278M
+## Training Data
+- **Dataset**: Amazon Reviews (Kaggle)
+- **Training Samples**: 8,500
+- **Validation Samples**: 1,500
+- **Test Samples**: 5,000
+## Performance
+| Metric | Value |
+|--------|-------|
+| Validation Accuracy | 92.4% |
+| Training Accuracy | 85.4% |
+| Validation Loss | 0.179 |
+## Training Details
+- **Epochs**: 10
+- **Batch Size**: 16
+- **Learning Rate**: 2e-5
+- **Mixed Precision**: FP16
+- **Optimizer**: AdamW
+- **Scheduler**: Linear Warmup + Cosine Decay
+## Usage
+```python
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+# Load model and tokenizer
+model_name = "anpmts/sentiment-classifier"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForSequenceClassification.from_pretrained(model_name)
+# Prepare input
+text = "This product is amazing! Highly recommend."
+inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=256)
+# Get prediction
+with torch.no_grad():
+    outputs = model(**inputs)
+    predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
+    sentiment = torch.argmax(predictions, dim=-1)
+# Map to label
+labels = ["negative", "neutral", "positive"]
+print(f"Sentiment: {labels[sentiment.item()]}")
+print(f"Confidence: {predictions[0][sentiment].item():.2%}")
+```
+## Training Metrics Over Epochs
+| Epoch | Train Loss | Val Loss | Val Acc |
+|-------|-----------|----------|---------|
+| 1     | 0.639     | 0.613    | 49.5%   |
+| 5     | 0.551     | 0.455    | 68.9%   |
+| 10    | 0.270     | 0.179    | 92.4%   |
+## Citation
+If you use this model, please cite:
+```
+@misc{sentiment-classifier-xlm-roberta,
+  author = {TrustShop},
+  title = {Sentiment Classifier - XLM-RoBERTa},
+  year = {2025},
+  publisher = {HuggingFace},
+  url = {https://huggingface.co/anpmts/sentiment-classifier}
+}
+```
+## License
+Apache 2.0

config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "architectures": [
+    "SentimentClassifier"
+  ],
+  "dropout": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2"
+  },
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2
+  },
+  "loss_weights": {
+    "classification": 0.7,
+    "regression": 0.3
+  },
+  "model_type": "sentiment-classifier",
+  "pretrained_model": "xlm-roberta-base",
+  "torch_dtype": "float32",
+  "transformers_version": "4.40.2"
+}

label_mappings.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+  "label_to_id": {
+    "negative": 0,
+    "neutral": 1,
+    "positive": 2
+  },
+  "id_to_label": {
+    "0": "negative",
+    "1": "neutral",
+    "2": "positive"
+  }
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b2995c0d64ef46001f404d8cfdf891f65e5b9f198d8ae794e549d3f74f9279a0
+size 1113391228

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "bos_token": "<s>",
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "unk_token": "<unk>"
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3a56def25aa40facc030ea8b0b87f3688e4b3c39eb8b45d5702b3a1300fe2a20
+size 17082734

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "250001": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": "<mask>",
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "tokenizer_class": "XLMRobertaTokenizer",
+  "unk_token": "<unk>"
+}