Spaces:
Sleeping
Sleeping
File size: 3,491 Bytes
52a0fe9 02b5142 52a0fe9 02b5142 52a0fe9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 | """
Sentiment analysis using NLTK's VADER (Valence Aware Dictionary and sEntiment Reasoner).
Provides both overall and sentence-level sentiment analysis.
"""
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.tokenize import sent_tokenize
from models.schemas import SentimentResult, SentimentBreakdown
from config import SENTIMENT_THRESHOLDS
from typing import List
# Download required NLTK data
try:
nltk.data.find("sentiment/vader_lexicon.zip")
except LookupError:
nltk.download("vader_lexicon", quiet=True)
try:
nltk.data.find("tokenizers/punkt")
except LookupError:
try:
nltk.download("punkt", quiet=True)
except Exception:
# Fallback for environments using newer punkt resource naming.
nltk.download("punkt_tab", quiet=True)
# Initialize analyzer
sia = SentimentIntensityAnalyzer()
def _get_sentiment_label(compound: float) -> str:
"""Convert compound score to human-readable label."""
if compound >= 0.5:
return "Very Positive"
elif compound >= SENTIMENT_THRESHOLDS["positive"]:
return "Positive"
elif compound <= -0.5:
return "Very Negative"
elif compound <= SENTIMENT_THRESHOLDS["negative"]:
return "Negative"
else:
return "Neutral"
def analyze_sentiment(text: str) -> SentimentResult:
"""
Perform sentiment analysis on the given text.
Returns overall sentiment scores and sentence-level breakdown.
Args:
text: The input text to analyze.
Returns:
SentimentResult with overall and per-sentence sentiment analysis.
"""
if not text.strip():
return SentimentResult(
overall_compound=0.0,
overall_positive=0.0,
overall_negative=0.0,
overall_neutral=1.0,
overall_label="Neutral",
sentence_breakdown=[],
confidence=0.0,
)
# Overall sentiment
overall_scores = sia.polarity_scores(text)
# Sentence-level breakdown
sentences = sent_tokenize(text)
sentence_breakdown: List[SentimentBreakdown] = []
# Limit to first 50 sentences for performance
for sent in sentences[:50]:
sent = sent.strip()
if not sent or len(sent) < 5:
continue
scores = sia.polarity_scores(sent)
sentence_breakdown.append(SentimentBreakdown(
text=sent[:200], # Truncate very long sentences
compound=round(scores["compound"], 4),
positive=round(scores["pos"], 4),
negative=round(scores["neg"], 4),
neutral=round(scores["neu"], 4),
label=_get_sentiment_label(scores["compound"]),
))
# Calculate confidence based on consistency of sentence sentiments
if sentence_breakdown:
compounds = [sb.compound for sb in sentence_breakdown]
avg_magnitude = sum(abs(c) for c in compounds) / len(compounds)
confidence = min(avg_magnitude * 2, 1.0) # Scale to 0-1
else:
confidence = abs(overall_scores["compound"])
return SentimentResult(
overall_compound=round(overall_scores["compound"], 4),
overall_positive=round(overall_scores["pos"], 4),
overall_negative=round(overall_scores["neg"], 4),
overall_neutral=round(overall_scores["neu"], 4),
overall_label=_get_sentiment_label(overall_scores["compound"]),
sentence_breakdown=sentence_breakdown,
confidence=round(confidence, 4),
)
|