Spaces:

cwpkd
/

Todlong

Runtime error

File size: 4,328 Bytes

d128b27

# utils/sentiment_analyzer.py
"""
Sentiment analysis using VADER and FinBERT
"""

import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import numpy as np
from typing import Dict, Tuple
from config import FINBERT_MODEL, SENTIMENT_THRESHOLDS


class SentimentAnalyzer:
    """Analyze sentiment using multiple methods"""
    
    def __init__(self):
        """Initialize sentiment analysis models"""
        # VADER for general sentiment
        self.vader = SentimentIntensityAnalyzer()
        
        # FinBERT for financial sentiment
        print("Loading FinBERT model...")
        self.finbert_tokenizer = AutoTokenizer.from_pretrained(FINBERT_MODEL)
        self.finbert_model = AutoModelForSequenceClassification.from_pretrained(FINBERT_MODEL)
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.finbert_model.to(self.device)
        self.finbert_model.eval()
        print("FinBERT loaded successfully!")
    
    def analyze_vader(self, text: str) -> Dict[str, float]:
        """
        Analyze sentiment using VADER
        
        Args:
            text: Text to analyze
            
        Returns:
            Dictionary with sentiment scores
        """
        scores = self.vader.polarity_scores(text)
        return {
            'positive': scores['pos'],
            'neutral': scores['neu'],
            'negative': scores['neg'],
            'compound': scores['compound']
        }
    
    def analyze_finbert(self, text: str) -> Dict[str, float]:
        """
        Analyze sentiment using FinBERT
        
        Args:
            text: Text to analyze
            
        Returns:
            Dictionary with sentiment probabilities
        """
        # Tokenize
        inputs = self.finbert_tokenizer(
            text,
            return_tensors="pt",
            truncation=True,
            max_length=512,
            padding=True
        ).to(self.device)
        
        # Get predictions
        with torch.no_grad():
            outputs = self.finbert_model(**inputs)
            probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
        
        # FinBERT labels: positive, negative, neutral
        probs = probs.cpu().numpy()[0]
        
        return {
            'positive': float(probs[0]),
            'negative': float(probs[1]),
            'neutral': float(probs[2])
        }
    
    def get_sentiment_label(self, compound_score: float) -> str:
        """
        Convert compound score to label
        
        Args:
            compound_score: VADER compound score
            
        Returns:
            Sentiment label
        """
        if compound_score >= SENTIMENT_THRESHOLDS['positive']:
            return "Positive"
        elif compound_score <= SENTIMENT_THRESHOLDS['negative']:
            return "Negative"
        else:
            return "Neutral"
    
    def analyze_comprehensive(self, text: str) -> Dict:
        """
        Perform comprehensive sentiment analysis
        
        Args:
            text: Text to analyze
            
        Returns:
            Dictionary with all sentiment metrics
        """
        # VADER analysis
        vader_scores = self.analyze_vader(text)
        
        # FinBERT analysis
        finbert_scores = self.analyze_finbert(text)
        
        # Combined score (weighted average)
        combined_score = (
            vader_scores['compound'] * 0.3 +
            (finbert_scores['positive'] - finbert_scores['negative']) * 0.7
        )
        
        return {
            'vader': vader_scores,
            'finbert': finbert_scores,
            'combined_score': combined_score,
            'sentiment_label': self.get_sentiment_label(combined_score),
            'confidence': max(finbert_scores.values())
        }
    
    def analyze_batch(self, texts: list) -> list:
        """
        Analyze multiple texts
        
        Args:
            texts: List of texts to analyze
            
        Returns:
            List of sentiment analysis results
        """
        results = []
        for text in texts:
            result = self.analyze_comprehensive(text)
            results.append(result)
        return results