Spaces:

Nomio4640
/

NLP-intelligence

Sleeping

App Files Files Community

NLP-intelligence / nlp_core /sentiment.py

Nomio4640

bert chunk problem

d4ff564 24 days ago

raw

history blame contribute delete

2.88 kB

	"""
	Sentiment Analysis service using HuggingFace XLM-RoBERTa.
	Wraps cardiffnlp/twitter-xlm-roberta-base-sentiment model.
	"""

	from typing import List, Optional
	from .models import SentimentResult


	# Map model labels to human-readable labels.
	# Keys include both original-case and .lower() forms because we call
	# result["label"].lower() before the lookup — the uppercase forms would
	# never match after lowercasing.
	LABEL_MAP = {
	"positive": "positive",
	"neutral": "neutral",
	"negative": "negative",
	# Original-case (kept for safety if .lower() is ever removed)
	"LABEL_0": "negative",
	"LABEL_1": "neutral",
	"LABEL_2": "positive",
	# Lowercased forms — these are what actually get looked up
	"label_0": "negative",
	"label_1": "neutral",
	"label_2": "positive",
	}


	class SentimentAnalyzer:
	"""Sentiment analysis service using XLM-RoBERTa."""

	def __init__(self, model_name: str = "cardiffnlp/twitter-xlm-roberta-base-sentiment"):
	self.model_name = model_name
	self._pipeline = None

	def _load_pipeline(self):
	"""Lazy-load the sentiment pipeline."""
	if self._pipeline is None:
	from transformers import pipeline
	self._pipeline = pipeline(
	"sentiment-analysis",
	model=self.model_name,
	tokenizer=self.model_name,
	truncation=True,
	max_length=512,
	)
	return self._pipeline

	def analyze(self, text: str) -> SentimentResult:
	"""Analyze sentiment of a single text."""
	if not text or not text.strip():
	return SentimentResult(label="neutral", score=0.0)
	pipe = self._load_pipeline()
	try:
	result = pipe(text)[0]
	raw_label = result.get("label", "neutral").lower()
	label = LABEL_MAP.get(raw_label, raw_label)
	return SentimentResult(
	label=label,
	score=float(result.get("score", 0.0)),
	)
	except Exception:
	return SentimentResult(label="neutral", score=0.0)

	def analyze_batch(self, texts: List[str], batch_size: int = 16) -> List[SentimentResult]:
	"""Analyze sentiment of a batch of texts."""
	if not texts:
	return []
	pipe = self._load_pipeline()
	try:
	results = pipe(texts, batch_size=batch_size)
	out = []
	for result in results:
	raw_label = result.get("label", "neutral").lower()
	label = LABEL_MAP.get(raw_label, raw_label)
	out.append(SentimentResult(
	label=label,
	score=float(result.get("score", 0.0)),
	))
	return out
	except Exception:
	return [SentimentResult(label="neutral", score=0.0) for _ in texts]