Spaces:

cjen1008
/

Patient-Report-Measures-NLP-Sentiments

Running

App Files Files Community

Patient-Report-Measures-NLP-Sentiments / src /models.py

cjen1008

Initial deployment to HF Spaces

d401ec0 about 1 month ago

raw

history blame contribute delete

4.38 kB

	"""
	Type definitions and model configuration for the sentiment analysis pipeline.
	"""

	from dataclasses import dataclass, field
	from enum import Enum
	from typing import Dict, List, Optional, Tuple


	class ModelType(str, Enum):
	DEFAULT = "default" # DistilBERT SST-2
	ROBERTA = "roberta" # NLP Town BERT Multilingual
	EMOTION = "emotion" # GoEmotions DistilRoBERTa
	AMAZON = "amazon" # Amazon Reviews DistilBERT
	TWITTER = "twitter" # CardiffNLP Twitter RoBERTa
	SST2 = "sst2" # BERT base uncased SST-2
	ZEROSHOT = "zeroshot" # BART Large MNLI (zero-shot)


	SUPPORTED_MODELS: Dict[str, Dict] = {
	ModelType.DEFAULT: {
	"hf_id": "distilbert-base-uncased-finetuned-sst-2-english",
	"labels": ["NEGATIVE", "POSITIVE"],
	"display": "DistilBERT SST-2",
	"task": "POSITIVE / NEGATIVE",
	},
	ModelType.ROBERTA: {
	"hf_id": "nlptown/bert-base-multilingual-uncased-sentiment",
	"labels": ["1 STAR", "2 STARS", "3 STARS", "4 STARS", "5 STARS"],
	"label_map": {
	"1 star": "1 STAR",
	"2 stars": "2 STARS",
	"3 stars": "3 STARS",
	"4 stars": "4 STARS",
	"5 stars": "5 STARS",
	},
	"display": "BERT Multilingual",
	"task": "1–5 star rating",
	},
	ModelType.EMOTION: {
	"hf_id": "j-hartmann/emotion-english-distilroberta-base",
	"labels": ["ANGER", "DISGUST", "FEAR", "JOY", "NEUTRAL", "SADNESS", "SURPRISE"],
	"display": "GoEmotions",
	"task": "7-class emotion",
	},
	ModelType.AMAZON: {
	"hf_id": "sohan-ai/sentiment-analysis-model-amazon-reviews",
	"tokenizer": "distilbert-base-uncased",
	"labels": ["NEGATIVE", "POSITIVE"],
	"label_map": {"LABEL_0": "NEGATIVE", "LABEL_1": "POSITIVE"},
	"display": "Amazon Reviews BERT",
	"task": "POSITIVE / NEGATIVE",
	},
	ModelType.TWITTER: {
	"hf_id": "cardiffnlp/twitter-roberta-base-sentiment-latest",
	"labels": ["NEGATIVE", "NEUTRAL", "POSITIVE"],
	"label_map": {"Negative": "NEGATIVE", "Neutral": "NEUTRAL", "Positive": "POSITIVE"},
	"display": "RoBERTa Twitter",
	"task": "NEGATIVE / NEUTRAL / POSITIVE",
	},
	ModelType.SST2: {
	"hf_id": "textattack/bert-base-uncased-SST-2",
	"tokenizer": "bert-base-uncased",
	"labels": ["NEGATIVE", "POSITIVE"],
	"label_map": {"LABEL_0": "NEGATIVE", "LABEL_1": "POSITIVE"},
	"display": "BERT SST-2",
	"task": "POSITIVE / NEGATIVE",
	},
	ModelType.ZEROSHOT: {
	"hf_id": "facebook/bart-large-mnli",
	"pipeline_task": "zero-shot-classification",
	"candidate_labels": ["positive", "negative", "neutral"],
	"labels": ["POSITIVE", "NEGATIVE", "NEUTRAL"],
	"display": "BART Large MNLI",
	"task": "Zero-shot Sentiment",
	},
	}

	# Human-readable dropdown labels → ModelType
	MODEL_LABEL_TO_TYPE: Dict[str, str] = {
	"DistilBERT SST-2 (POSITIVE / NEGATIVE)": ModelType.DEFAULT,
	"BERT Multilingual (1–5 star rating)": ModelType.ROBERTA,
	"GoEmotions (7 emotions)": ModelType.EMOTION,
	"Amazon Reviews BERT (POSITIVE / NEGATIVE)": ModelType.AMAZON,
	"RoBERTa Twitter (NEGATIVE / NEUTRAL / POSITIVE)": ModelType.TWITTER,
	"BERT SST-2 (POSITIVE / NEGATIVE)": ModelType.SST2,
	"BART Large MNLI (Zero-shot Sentiment)": ModelType.ZEROSHOT,
	}


	@dataclass
	class PreprocessResult:
	original_text: str
	cleaned_text: str
	removed_text: str
	normalized_text: str
	tokenized_text: List[str]
	stemmed_text: List[str]
	lemmatized_text: List[str]
	ner: List[Tuple[str, str]]
	pos: List[Tuple[str, str]]


	@dataclass
	class WordDistribution:
	distribution: Dict[str, int] # label → count
	word_lists: Dict[str, List[str]] # label → words


	@dataclass
	class SentimentResult:
	sentiment: str
	probabilities: List[float]
	model_type: str
	labels: List[str]
	preprocess: PreprocessResult
	word_dist: WordDistribution