Spaces:

cwpkd
/

Todlong

Runtime error

App Files Files Community

cwpkd commited on Nov 3, 2025

Commit

d128b27

verified ·

1 Parent(s): e9f1adf

Create utils/sentiment_analyzer.py

Browse files

Files changed (1) hide show

utils/sentiment_analyzer.py +143 -0

utils/sentiment_analyzer.py ADDED Viewed

	@@ -0,0 +1,143 @@

+# utils/sentiment_analyzer.py
+"""
+Sentiment analysis using VADER and FinBERT
+"""
+import torch
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
+import numpy as np
+from typing import Dict, Tuple
+from config import FINBERT_MODEL, SENTIMENT_THRESHOLDS
+class SentimentAnalyzer:
+    """Analyze sentiment using multiple methods"""
+    def __init__(self):
+        """Initialize sentiment analysis models"""
+        # VADER for general sentiment
+        self.vader = SentimentIntensityAnalyzer()
+        # FinBERT for financial sentiment
+        print("Loading FinBERT model...")
+        self.finbert_tokenizer = AutoTokenizer.from_pretrained(FINBERT_MODEL)
+        self.finbert_model = AutoModelForSequenceClassification.from_pretrained(FINBERT_MODEL)
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.finbert_model.to(self.device)
+        self.finbert_model.eval()
+        print("FinBERT loaded successfully!")
+    def analyze_vader(self, text: str) -> Dict[str, float]:
+        """
+        Analyze sentiment using VADER
+        Args:
+            text: Text to analyze
+        Returns:
+            Dictionary with sentiment scores
+        """
+        scores = self.vader.polarity_scores(text)
+        return {
+            'positive': scores['pos'],
+            'neutral': scores['neu'],
+            'negative': scores['neg'],
+            'compound': scores['compound']
+        }
+    def analyze_finbert(self, text: str) -> Dict[str, float]:
+        """
+        Analyze sentiment using FinBERT
+        Args:
+            text: Text to analyze
+        Returns:
+            Dictionary with sentiment probabilities
+        """
+        # Tokenize
+        inputs = self.finbert_tokenizer(
+            text,
+            return_tensors="pt",
+            truncation=True,
+            max_length=512,
+            padding=True
+        ).to(self.device)
+        # Get predictions
+        with torch.no_grad():
+            outputs = self.finbert_model(**inputs)
+            probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
+        # FinBERT labels: positive, negative, neutral
+        probs = probs.cpu().numpy()[0]
+        return {
+            'positive': float(probs[0]),
+            'negative': float(probs[1]),
+            'neutral': float(probs[2])
+        }
+    def get_sentiment_label(self, compound_score: float) -> str:
+        """
+        Convert compound score to label
+        Args:
+            compound_score: VADER compound score
+        Returns:
+            Sentiment label
+        """
+        if compound_score >= SENTIMENT_THRESHOLDS['positive']:
+            return "Positive"
+        elif compound_score <= SENTIMENT_THRESHOLDS['negative']:
+            return "Negative"
+        else:
+            return "Neutral"
+    def analyze_comprehensive(self, text: str) -> Dict:
+        """
+        Perform comprehensive sentiment analysis
+        Args:
+            text: Text to analyze
+        Returns:
+            Dictionary with all sentiment metrics
+        """
+        # VADER analysis
+        vader_scores = self.analyze_vader(text)
+        # FinBERT analysis
+        finbert_scores = self.analyze_finbert(text)
+        # Combined score (weighted average)
+        combined_score = (
+            vader_scores['compound'] * 0.3 +
+            (finbert_scores['positive'] - finbert_scores['negative']) * 0.7
+        )
+        return {
+            'vader': vader_scores,
+            'finbert': finbert_scores,
+            'combined_score': combined_score,
+            'sentiment_label': self.get_sentiment_label(combined_score),
+            'confidence': max(finbert_scores.values())
+        }
+    def analyze_batch(self, texts: list) -> list:
+        """
+        Analyze multiple texts
+        Args:
+            texts: List of texts to analyze
+        Returns:
+            List of sentiment analysis results
+        """
+        results = []
+        for text in texts:
+            result = self.analyze_comprehensive(text)
+            results.append(result)
+        return results