cjen1008's picture
Initial deployment to HF Spaces
d401ec0
"""
Type definitions and model configuration for the sentiment analysis pipeline.
"""
from dataclasses import dataclass, field
from enum import Enum
from typing import Dict, List, Optional, Tuple
class ModelType(str, Enum):
DEFAULT = "default" # DistilBERT SST-2
ROBERTA = "roberta" # NLP Town BERT Multilingual
EMOTION = "emotion" # GoEmotions DistilRoBERTa
AMAZON = "amazon" # Amazon Reviews DistilBERT
TWITTER = "twitter" # CardiffNLP Twitter RoBERTa
SST2 = "sst2" # BERT base uncased SST-2
ZEROSHOT = "zeroshot" # BART Large MNLI (zero-shot)
SUPPORTED_MODELS: Dict[str, Dict] = {
ModelType.DEFAULT: {
"hf_id": "distilbert-base-uncased-finetuned-sst-2-english",
"labels": ["NEGATIVE", "POSITIVE"],
"display": "DistilBERT SST-2",
"task": "POSITIVE / NEGATIVE",
},
ModelType.ROBERTA: {
"hf_id": "nlptown/bert-base-multilingual-uncased-sentiment",
"labels": ["1 STAR", "2 STARS", "3 STARS", "4 STARS", "5 STARS"],
"label_map": {
"1 star": "1 STAR",
"2 stars": "2 STARS",
"3 stars": "3 STARS",
"4 stars": "4 STARS",
"5 stars": "5 STARS",
},
"display": "BERT Multilingual",
"task": "1–5 star rating",
},
ModelType.EMOTION: {
"hf_id": "j-hartmann/emotion-english-distilroberta-base",
"labels": ["ANGER", "DISGUST", "FEAR", "JOY", "NEUTRAL", "SADNESS", "SURPRISE"],
"display": "GoEmotions",
"task": "7-class emotion",
},
ModelType.AMAZON: {
"hf_id": "sohan-ai/sentiment-analysis-model-amazon-reviews",
"tokenizer": "distilbert-base-uncased",
"labels": ["NEGATIVE", "POSITIVE"],
"label_map": {"LABEL_0": "NEGATIVE", "LABEL_1": "POSITIVE"},
"display": "Amazon Reviews BERT",
"task": "POSITIVE / NEGATIVE",
},
ModelType.TWITTER: {
"hf_id": "cardiffnlp/twitter-roberta-base-sentiment-latest",
"labels": ["NEGATIVE", "NEUTRAL", "POSITIVE"],
"label_map": {"Negative": "NEGATIVE", "Neutral": "NEUTRAL", "Positive": "POSITIVE"},
"display": "RoBERTa Twitter",
"task": "NEGATIVE / NEUTRAL / POSITIVE",
},
ModelType.SST2: {
"hf_id": "textattack/bert-base-uncased-SST-2",
"tokenizer": "bert-base-uncased",
"labels": ["NEGATIVE", "POSITIVE"],
"label_map": {"LABEL_0": "NEGATIVE", "LABEL_1": "POSITIVE"},
"display": "BERT SST-2",
"task": "POSITIVE / NEGATIVE",
},
ModelType.ZEROSHOT: {
"hf_id": "facebook/bart-large-mnli",
"pipeline_task": "zero-shot-classification",
"candidate_labels": ["positive", "negative", "neutral"],
"labels": ["POSITIVE", "NEGATIVE", "NEUTRAL"],
"display": "BART Large MNLI",
"task": "Zero-shot Sentiment",
},
}
# Human-readable dropdown labels β†’ ModelType
MODEL_LABEL_TO_TYPE: Dict[str, str] = {
"DistilBERT SST-2 (POSITIVE / NEGATIVE)": ModelType.DEFAULT,
"BERT Multilingual (1–5 star rating)": ModelType.ROBERTA,
"GoEmotions (7 emotions)": ModelType.EMOTION,
"Amazon Reviews BERT (POSITIVE / NEGATIVE)": ModelType.AMAZON,
"RoBERTa Twitter (NEGATIVE / NEUTRAL / POSITIVE)": ModelType.TWITTER,
"BERT SST-2 (POSITIVE / NEGATIVE)": ModelType.SST2,
"BART Large MNLI (Zero-shot Sentiment)": ModelType.ZEROSHOT,
}
@dataclass
class PreprocessResult:
original_text: str
cleaned_text: str
removed_text: str
normalized_text: str
tokenized_text: List[str]
stemmed_text: List[str]
lemmatized_text: List[str]
ner: List[Tuple[str, str]]
pos: List[Tuple[str, str]]
@dataclass
class WordDistribution:
distribution: Dict[str, int] # label β†’ count
word_lists: Dict[str, List[str]] # label β†’ words
@dataclass
class SentimentResult:
sentiment: str
probabilities: List[float]
model_type: str
labels: List[str]
preprocess: PreprocessResult
word_dist: WordDistribution