cwpkd commited on
Commit
d128b27
Β·
verified Β·
1 Parent(s): e9f1adf

Create utils/sentiment_analyzer.py

Browse files
Files changed (1) hide show
  1. utils/sentiment_analyzer.py +143 -0
utils/sentiment_analyzer.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # utils/sentiment_analyzer.py
2
+ """
3
+ Sentiment analysis using VADER and FinBERT
4
+ """
5
+
6
+ import torch
7
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
8
+ from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
9
+ import numpy as np
10
+ from typing import Dict, Tuple
11
+ from config import FINBERT_MODEL, SENTIMENT_THRESHOLDS
12
+
13
+
14
+ class SentimentAnalyzer:
15
+ """Analyze sentiment using multiple methods"""
16
+
17
+ def __init__(self):
18
+ """Initialize sentiment analysis models"""
19
+ # VADER for general sentiment
20
+ self.vader = SentimentIntensityAnalyzer()
21
+
22
+ # FinBERT for financial sentiment
23
+ print("Loading FinBERT model...")
24
+ self.finbert_tokenizer = AutoTokenizer.from_pretrained(FINBERT_MODEL)
25
+ self.finbert_model = AutoModelForSequenceClassification.from_pretrained(FINBERT_MODEL)
26
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
27
+ self.finbert_model.to(self.device)
28
+ self.finbert_model.eval()
29
+ print("FinBERT loaded successfully!")
30
+
31
+ def analyze_vader(self, text: str) -> Dict[str, float]:
32
+ """
33
+ Analyze sentiment using VADER
34
+
35
+ Args:
36
+ text: Text to analyze
37
+
38
+ Returns:
39
+ Dictionary with sentiment scores
40
+ """
41
+ scores = self.vader.polarity_scores(text)
42
+ return {
43
+ 'positive': scores['pos'],
44
+ 'neutral': scores['neu'],
45
+ 'negative': scores['neg'],
46
+ 'compound': scores['compound']
47
+ }
48
+
49
+ def analyze_finbert(self, text: str) -> Dict[str, float]:
50
+ """
51
+ Analyze sentiment using FinBERT
52
+
53
+ Args:
54
+ text: Text to analyze
55
+
56
+ Returns:
57
+ Dictionary with sentiment probabilities
58
+ """
59
+ # Tokenize
60
+ inputs = self.finbert_tokenizer(
61
+ text,
62
+ return_tensors="pt",
63
+ truncation=True,
64
+ max_length=512,
65
+ padding=True
66
+ ).to(self.device)
67
+
68
+ # Get predictions
69
+ with torch.no_grad():
70
+ outputs = self.finbert_model(**inputs)
71
+ probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
72
+
73
+ # FinBERT labels: positive, negative, neutral
74
+ probs = probs.cpu().numpy()[0]
75
+
76
+ return {
77
+ 'positive': float(probs[0]),
78
+ 'negative': float(probs[1]),
79
+ 'neutral': float(probs[2])
80
+ }
81
+
82
+ def get_sentiment_label(self, compound_score: float) -> str:
83
+ """
84
+ Convert compound score to label
85
+
86
+ Args:
87
+ compound_score: VADER compound score
88
+
89
+ Returns:
90
+ Sentiment label
91
+ """
92
+ if compound_score >= SENTIMENT_THRESHOLDS['positive']:
93
+ return "Positive"
94
+ elif compound_score <= SENTIMENT_THRESHOLDS['negative']:
95
+ return "Negative"
96
+ else:
97
+ return "Neutral"
98
+
99
+ def analyze_comprehensive(self, text: str) -> Dict:
100
+ """
101
+ Perform comprehensive sentiment analysis
102
+
103
+ Args:
104
+ text: Text to analyze
105
+
106
+ Returns:
107
+ Dictionary with all sentiment metrics
108
+ """
109
+ # VADER analysis
110
+ vader_scores = self.analyze_vader(text)
111
+
112
+ # FinBERT analysis
113
+ finbert_scores = self.analyze_finbert(text)
114
+
115
+ # Combined score (weighted average)
116
+ combined_score = (
117
+ vader_scores['compound'] * 0.3 +
118
+ (finbert_scores['positive'] - finbert_scores['negative']) * 0.7
119
+ )
120
+
121
+ return {
122
+ 'vader': vader_scores,
123
+ 'finbert': finbert_scores,
124
+ 'combined_score': combined_score,
125
+ 'sentiment_label': self.get_sentiment_label(combined_score),
126
+ 'confidence': max(finbert_scores.values())
127
+ }
128
+
129
+ def analyze_batch(self, texts: list) -> list:
130
+ """
131
+ Analyze multiple texts
132
+
133
+ Args:
134
+ texts: List of texts to analyze
135
+
136
+ Returns:
137
+ List of sentiment analysis results
138
+ """
139
+ results = []
140
+ for text in texts:
141
+ result = self.analyze_comprehensive(text)
142
+ results.append(result)
143
+ return results