File size: 4,328 Bytes
d128b27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# utils/sentiment_analyzer.py
"""
Sentiment analysis using VADER and FinBERT
"""

import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import numpy as np
from typing import Dict, Tuple
from config import FINBERT_MODEL, SENTIMENT_THRESHOLDS


class SentimentAnalyzer:
    """Analyze sentiment using multiple methods"""
    
    def __init__(self):
        """Initialize sentiment analysis models"""
        # VADER for general sentiment
        self.vader = SentimentIntensityAnalyzer()
        
        # FinBERT for financial sentiment
        print("Loading FinBERT model...")
        self.finbert_tokenizer = AutoTokenizer.from_pretrained(FINBERT_MODEL)
        self.finbert_model = AutoModelForSequenceClassification.from_pretrained(FINBERT_MODEL)
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.finbert_model.to(self.device)
        self.finbert_model.eval()
        print("FinBERT loaded successfully!")
    
    def analyze_vader(self, text: str) -> Dict[str, float]:
        """
        Analyze sentiment using VADER
        
        Args:
            text: Text to analyze
            
        Returns:
            Dictionary with sentiment scores
        """
        scores = self.vader.polarity_scores(text)
        return {
            'positive': scores['pos'],
            'neutral': scores['neu'],
            'negative': scores['neg'],
            'compound': scores['compound']
        }
    
    def analyze_finbert(self, text: str) -> Dict[str, float]:
        """
        Analyze sentiment using FinBERT
        
        Args:
            text: Text to analyze
            
        Returns:
            Dictionary with sentiment probabilities
        """
        # Tokenize
        inputs = self.finbert_tokenizer(
            text,
            return_tensors="pt",
            truncation=True,
            max_length=512,
            padding=True
        ).to(self.device)
        
        # Get predictions
        with torch.no_grad():
            outputs = self.finbert_model(**inputs)
            probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
        
        # FinBERT labels: positive, negative, neutral
        probs = probs.cpu().numpy()[0]
        
        return {
            'positive': float(probs[0]),
            'negative': float(probs[1]),
            'neutral': float(probs[2])
        }
    
    def get_sentiment_label(self, compound_score: float) -> str:
        """
        Convert compound score to label
        
        Args:
            compound_score: VADER compound score
            
        Returns:
            Sentiment label
        """
        if compound_score >= SENTIMENT_THRESHOLDS['positive']:
            return "Positive"
        elif compound_score <= SENTIMENT_THRESHOLDS['negative']:
            return "Negative"
        else:
            return "Neutral"
    
    def analyze_comprehensive(self, text: str) -> Dict:
        """
        Perform comprehensive sentiment analysis
        
        Args:
            text: Text to analyze
            
        Returns:
            Dictionary with all sentiment metrics
        """
        # VADER analysis
        vader_scores = self.analyze_vader(text)
        
        # FinBERT analysis
        finbert_scores = self.analyze_finbert(text)
        
        # Combined score (weighted average)
        combined_score = (
            vader_scores['compound'] * 0.3 +
            (finbert_scores['positive'] - finbert_scores['negative']) * 0.7
        )
        
        return {
            'vader': vader_scores,
            'finbert': finbert_scores,
            'combined_score': combined_score,
            'sentiment_label': self.get_sentiment_label(combined_score),
            'confidence': max(finbert_scores.values())
        }
    
    def analyze_batch(self, texts: list) -> list:
        """
        Analyze multiple texts
        
        Args:
            texts: List of texts to analyze
            
        Returns:
            List of sentiment analysis results
        """
        results = []
        for text in texts:
            result = self.analyze_comprehensive(text)
            results.append(result)
        return results