llm-semantic-router/feedback-detector-dataset
Viewer • Updated • 20.9k • 28 • 1
A high-performance multilingual 4-class feedback classification model fine-tuned on mmBERT-base using AMD MI300X GPU.
This model classifies user feedback into 4 categories:
| Label | ID | Description | F1 Score |
|---|---|---|---|
| SAT | 0 | User is satisfied | 100.0% |
| NEED_CLARIFICATION | 1 | User needs more information | 99.7% |
| WRONG_ANSWER | 2 | System gave incorrect response | 96.2% |
| WANT_DIFFERENT | 3 | User wants something different | 95.9% |
| Metric | Value |
|---|---|
| Accuracy | 98.63% |
| F1 Macro | 97.94% |
| F1 Weighted | 98.62% |
| Parameter | Value |
|---|---|
| Base Model | jhu-clsp/mmBERT-base |
| Epochs | 3 |
| Batch Size | 64 |
| Learning Rate | 2e-5 |
| Max Length | 512 |
| Optimizer | AdamW |
| Component | Specification |
|---|---|
| GPU | AMD Instinct MI300X |
| VRAM | 192 GB HBM3 |
| Framework | PyTorch with ROCm |
| Training Time | ~2 minutes |
from transformers import pipeline
classifier = pipeline("text-classification", model="llm-semantic-router/mmbert-feedback-detector")
result = classifier("Thank you, that was exactly what I needed!")
print(result) # [{'label': 'SAT', 'score': 0.99}]
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
model = AutoModelForSequenceClassification.from_pretrained("llm-semantic-router/mmbert-feedback-detector")
tokenizer = AutoTokenizer.from_pretrained("llm-semantic-router/mmbert-feedback-detector")
labels = ["SAT", "NEED_CLARIFICATION", "WRONG_ANSWER", "WANT_DIFFERENT"]
def classify(text):
inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
with torch.no_grad():
outputs = model(**inputs)
probs = torch.softmax(outputs.logits, dim=-1)
pred = probs.argmax(-1).item()
return labels[pred], probs[0][pred].item()
# Test
label, confidence = classify("Thank you, that was helpful!")
print(f"Label: {label}, Confidence: {confidence:.2%}")
# English - Satisfied
classify("Thanks, that's exactly what I needed!")
# => ('SAT', 0.99)
# English - Need clarification
classify("Can you explain that in more detail?")
# => ('NEED_CLARIFICATION', 0.97)
# English - Wrong answer
classify("That's incorrect, the information you gave me was wrong.")
# => ('WRONG_ANSWER', 0.95)
# English - Want different
classify("Can you show me other options instead?")
# => ('WANT_DIFFERENT', 0.94)
# Japanese - Need clarification
classify("もう少し詳しく教えてください")
# => ('NEED_CLARIFICATION', 0.96)
# Turkish - Wrong answer
classify("Bu yanlış bilgi, düzeltin lütfen")
# => ('WRONG_ANSWER', 0.93)
# German (zero-shot)
classify("Können Sie mir eine andere Option zeigen?")
# => ('WANT_DIFFERENT', 0.89)
# Spanish (zero-shot)
classify("Gracias, eso es exactamente lo que necesitaba!")
# => ('SAT', 0.95)
@model{mmbert_feedback_detector,
title={mmBERT Feedback Detector},
author={LLM Semantic Router Team},
year={2025},
url={https://huggingface.co/llm-semantic-router/mmbert-feedback-detector}
}
Apache 2.0
Base model
jhu-clsp/mmBERT-base