# utils/sentiment_analyzer.py """ Sentiment analysis using VADER and FinBERT """ import torch from transformers import AutoTokenizer, AutoModelForSequenceClassification from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer import numpy as np from typing import Dict, Tuple from config import FINBERT_MODEL, SENTIMENT_THRESHOLDS class SentimentAnalyzer: """Analyze sentiment using multiple methods""" def __init__(self): """Initialize sentiment analysis models""" # VADER for general sentiment self.vader = SentimentIntensityAnalyzer() # FinBERT for financial sentiment print("Loading FinBERT model...") self.finbert_tokenizer = AutoTokenizer.from_pretrained(FINBERT_MODEL) self.finbert_model = AutoModelForSequenceClassification.from_pretrained(FINBERT_MODEL) self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.finbert_model.to(self.device) self.finbert_model.eval() print("FinBERT loaded successfully!") def analyze_vader(self, text: str) -> Dict[str, float]: """ Analyze sentiment using VADER Args: text: Text to analyze Returns: Dictionary with sentiment scores """ scores = self.vader.polarity_scores(text) return { 'positive': scores['pos'], 'neutral': scores['neu'], 'negative': scores['neg'], 'compound': scores['compound'] } def analyze_finbert(self, text: str) -> Dict[str, float]: """ Analyze sentiment using FinBERT Args: text: Text to analyze Returns: Dictionary with sentiment probabilities """ # Tokenize inputs = self.finbert_tokenizer( text, return_tensors="pt", truncation=True, max_length=512, padding=True ).to(self.device) # Get predictions with torch.no_grad(): outputs = self.finbert_model(**inputs) probs = torch.nn.functional.softmax(outputs.logits, dim=-1) # FinBERT labels: positive, negative, neutral probs = probs.cpu().numpy()[0] return { 'positive': float(probs[0]), 'negative': float(probs[1]), 'neutral': float(probs[2]) } def get_sentiment_label(self, compound_score: float) -> str: """ Convert compound score to label Args: compound_score: VADER compound score Returns: Sentiment label """ if compound_score >= SENTIMENT_THRESHOLDS['positive']: return "Positive" elif compound_score <= SENTIMENT_THRESHOLDS['negative']: return "Negative" else: return "Neutral" def analyze_comprehensive(self, text: str) -> Dict: """ Perform comprehensive sentiment analysis Args: text: Text to analyze Returns: Dictionary with all sentiment metrics """ # VADER analysis vader_scores = self.analyze_vader(text) # FinBERT analysis finbert_scores = self.analyze_finbert(text) # Combined score (weighted average) combined_score = ( vader_scores['compound'] * 0.3 + (finbert_scores['positive'] - finbert_scores['negative']) * 0.7 ) return { 'vader': vader_scores, 'finbert': finbert_scores, 'combined_score': combined_score, 'sentiment_label': self.get_sentiment_label(combined_score), 'confidence': max(finbert_scores.values()) } def analyze_batch(self, texts: list) -> list: """ Analyze multiple texts Args: texts: List of texts to analyze Returns: List of sentiment analysis results """ results = [] for text in texts: result = self.analyze_comprehensive(text) results.append(result) return results