Spaces:
Sleeping
Sleeping
| """ | |
| Sentiment analysis using NLTK's VADER (Valence Aware Dictionary and sEntiment Reasoner). | |
| Provides both overall and sentence-level sentiment analysis. | |
| """ | |
| import nltk | |
| from nltk.sentiment.vader import SentimentIntensityAnalyzer | |
| from nltk.tokenize import sent_tokenize | |
| from models.schemas import SentimentResult, SentimentBreakdown | |
| from config import SENTIMENT_THRESHOLDS | |
| from typing import List | |
| # Download required NLTK data | |
| try: | |
| nltk.data.find("sentiment/vader_lexicon.zip") | |
| except LookupError: | |
| nltk.download("vader_lexicon", quiet=True) | |
| try: | |
| nltk.data.find("tokenizers/punkt") | |
| except LookupError: | |
| try: | |
| nltk.download("punkt", quiet=True) | |
| except Exception: | |
| # Fallback for environments using newer punkt resource naming. | |
| nltk.download("punkt_tab", quiet=True) | |
| # Initialize analyzer | |
| sia = SentimentIntensityAnalyzer() | |
| def _get_sentiment_label(compound: float) -> str: | |
| """Convert compound score to human-readable label.""" | |
| if compound >= 0.5: | |
| return "Very Positive" | |
| elif compound >= SENTIMENT_THRESHOLDS["positive"]: | |
| return "Positive" | |
| elif compound <= -0.5: | |
| return "Very Negative" | |
| elif compound <= SENTIMENT_THRESHOLDS["negative"]: | |
| return "Negative" | |
| else: | |
| return "Neutral" | |
| def analyze_sentiment(text: str) -> SentimentResult: | |
| """ | |
| Perform sentiment analysis on the given text. | |
| Returns overall sentiment scores and sentence-level breakdown. | |
| Args: | |
| text: The input text to analyze. | |
| Returns: | |
| SentimentResult with overall and per-sentence sentiment analysis. | |
| """ | |
| if not text.strip(): | |
| return SentimentResult( | |
| overall_compound=0.0, | |
| overall_positive=0.0, | |
| overall_negative=0.0, | |
| overall_neutral=1.0, | |
| overall_label="Neutral", | |
| sentence_breakdown=[], | |
| confidence=0.0, | |
| ) | |
| # Overall sentiment | |
| overall_scores = sia.polarity_scores(text) | |
| # Sentence-level breakdown | |
| sentences = sent_tokenize(text) | |
| sentence_breakdown: List[SentimentBreakdown] = [] | |
| # Limit to first 50 sentences for performance | |
| for sent in sentences[:50]: | |
| sent = sent.strip() | |
| if not sent or len(sent) < 5: | |
| continue | |
| scores = sia.polarity_scores(sent) | |
| sentence_breakdown.append(SentimentBreakdown( | |
| text=sent[:200], # Truncate very long sentences | |
| compound=round(scores["compound"], 4), | |
| positive=round(scores["pos"], 4), | |
| negative=round(scores["neg"], 4), | |
| neutral=round(scores["neu"], 4), | |
| label=_get_sentiment_label(scores["compound"]), | |
| )) | |
| # Calculate confidence based on consistency of sentence sentiments | |
| if sentence_breakdown: | |
| compounds = [sb.compound for sb in sentence_breakdown] | |
| avg_magnitude = sum(abs(c) for c in compounds) / len(compounds) | |
| confidence = min(avg_magnitude * 2, 1.0) # Scale to 0-1 | |
| else: | |
| confidence = abs(overall_scores["compound"]) | |
| return SentimentResult( | |
| overall_compound=round(overall_scores["compound"], 4), | |
| overall_positive=round(overall_scores["pos"], 4), | |
| overall_negative=round(overall_scores["neg"], 4), | |
| overall_neutral=round(overall_scores["neu"], 4), | |
| overall_label=_get_sentiment_label(overall_scores["compound"]), | |
| sentence_breakdown=sentence_breakdown, | |
| confidence=round(confidence, 4), | |
| ) | |