|
|
|
|
|
""" |
|
|
Sentiment analysis using VADER and FinBERT |
|
|
""" |
|
|
|
|
|
import torch |
|
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
|
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer |
|
|
import numpy as np |
|
|
from typing import Dict, Tuple |
|
|
from config import FINBERT_MODEL, SENTIMENT_THRESHOLDS |
|
|
|
|
|
|
|
|
class SentimentAnalyzer: |
|
|
"""Analyze sentiment using multiple methods""" |
|
|
|
|
|
def __init__(self): |
|
|
"""Initialize sentiment analysis models""" |
|
|
|
|
|
self.vader = SentimentIntensityAnalyzer() |
|
|
|
|
|
|
|
|
print("Loading FinBERT model...") |
|
|
self.finbert_tokenizer = AutoTokenizer.from_pretrained(FINBERT_MODEL) |
|
|
self.finbert_model = AutoModelForSequenceClassification.from_pretrained(FINBERT_MODEL) |
|
|
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
|
self.finbert_model.to(self.device) |
|
|
self.finbert_model.eval() |
|
|
print("FinBERT loaded successfully!") |
|
|
|
|
|
def analyze_vader(self, text: str) -> Dict[str, float]: |
|
|
""" |
|
|
Analyze sentiment using VADER |
|
|
|
|
|
Args: |
|
|
text: Text to analyze |
|
|
|
|
|
Returns: |
|
|
Dictionary with sentiment scores |
|
|
""" |
|
|
scores = self.vader.polarity_scores(text) |
|
|
return { |
|
|
'positive': scores['pos'], |
|
|
'neutral': scores['neu'], |
|
|
'negative': scores['neg'], |
|
|
'compound': scores['compound'] |
|
|
} |
|
|
|
|
|
def analyze_finbert(self, text: str) -> Dict[str, float]: |
|
|
""" |
|
|
Analyze sentiment using FinBERT |
|
|
|
|
|
Args: |
|
|
text: Text to analyze |
|
|
|
|
|
Returns: |
|
|
Dictionary with sentiment probabilities |
|
|
""" |
|
|
|
|
|
inputs = self.finbert_tokenizer( |
|
|
text, |
|
|
return_tensors="pt", |
|
|
truncation=True, |
|
|
max_length=512, |
|
|
padding=True |
|
|
).to(self.device) |
|
|
|
|
|
|
|
|
with torch.no_grad(): |
|
|
outputs = self.finbert_model(**inputs) |
|
|
probs = torch.nn.functional.softmax(outputs.logits, dim=-1) |
|
|
|
|
|
|
|
|
probs = probs.cpu().numpy()[0] |
|
|
|
|
|
return { |
|
|
'positive': float(probs[0]), |
|
|
'negative': float(probs[1]), |
|
|
'neutral': float(probs[2]) |
|
|
} |
|
|
|
|
|
def get_sentiment_label(self, compound_score: float) -> str: |
|
|
""" |
|
|
Convert compound score to label |
|
|
|
|
|
Args: |
|
|
compound_score: VADER compound score |
|
|
|
|
|
Returns: |
|
|
Sentiment label |
|
|
""" |
|
|
if compound_score >= SENTIMENT_THRESHOLDS['positive']: |
|
|
return "Positive" |
|
|
elif compound_score <= SENTIMENT_THRESHOLDS['negative']: |
|
|
return "Negative" |
|
|
else: |
|
|
return "Neutral" |
|
|
|
|
|
def analyze_comprehensive(self, text: str) -> Dict: |
|
|
""" |
|
|
Perform comprehensive sentiment analysis |
|
|
|
|
|
Args: |
|
|
text: Text to analyze |
|
|
|
|
|
Returns: |
|
|
Dictionary with all sentiment metrics |
|
|
""" |
|
|
|
|
|
vader_scores = self.analyze_vader(text) |
|
|
|
|
|
|
|
|
finbert_scores = self.analyze_finbert(text) |
|
|
|
|
|
|
|
|
combined_score = ( |
|
|
vader_scores['compound'] * 0.3 + |
|
|
(finbert_scores['positive'] - finbert_scores['negative']) * 0.7 |
|
|
) |
|
|
|
|
|
return { |
|
|
'vader': vader_scores, |
|
|
'finbert': finbert_scores, |
|
|
'combined_score': combined_score, |
|
|
'sentiment_label': self.get_sentiment_label(combined_score), |
|
|
'confidence': max(finbert_scores.values()) |
|
|
} |
|
|
|
|
|
def analyze_batch(self, texts: list) -> list: |
|
|
""" |
|
|
Analyze multiple texts |
|
|
|
|
|
Args: |
|
|
texts: List of texts to analyze |
|
|
|
|
|
Returns: |
|
|
List of sentiment analysis results |
|
|
""" |
|
|
results = [] |
|
|
for text in texts: |
|
|
result = self.analyze_comprehensive(text) |
|
|
results.append(result) |
|
|
return results |