Todlong / utils /sentiment_analyzer.py
cwpkd's picture
Create utils/sentiment_analyzer.py
d128b27 verified
# utils/sentiment_analyzer.py
"""
Sentiment analysis using VADER and FinBERT
"""
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import numpy as np
from typing import Dict, Tuple
from config import FINBERT_MODEL, SENTIMENT_THRESHOLDS
class SentimentAnalyzer:
"""Analyze sentiment using multiple methods"""
def __init__(self):
"""Initialize sentiment analysis models"""
# VADER for general sentiment
self.vader = SentimentIntensityAnalyzer()
# FinBERT for financial sentiment
print("Loading FinBERT model...")
self.finbert_tokenizer = AutoTokenizer.from_pretrained(FINBERT_MODEL)
self.finbert_model = AutoModelForSequenceClassification.from_pretrained(FINBERT_MODEL)
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.finbert_model.to(self.device)
self.finbert_model.eval()
print("FinBERT loaded successfully!")
def analyze_vader(self, text: str) -> Dict[str, float]:
"""
Analyze sentiment using VADER
Args:
text: Text to analyze
Returns:
Dictionary with sentiment scores
"""
scores = self.vader.polarity_scores(text)
return {
'positive': scores['pos'],
'neutral': scores['neu'],
'negative': scores['neg'],
'compound': scores['compound']
}
def analyze_finbert(self, text: str) -> Dict[str, float]:
"""
Analyze sentiment using FinBERT
Args:
text: Text to analyze
Returns:
Dictionary with sentiment probabilities
"""
# Tokenize
inputs = self.finbert_tokenizer(
text,
return_tensors="pt",
truncation=True,
max_length=512,
padding=True
).to(self.device)
# Get predictions
with torch.no_grad():
outputs = self.finbert_model(**inputs)
probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
# FinBERT labels: positive, negative, neutral
probs = probs.cpu().numpy()[0]
return {
'positive': float(probs[0]),
'negative': float(probs[1]),
'neutral': float(probs[2])
}
def get_sentiment_label(self, compound_score: float) -> str:
"""
Convert compound score to label
Args:
compound_score: VADER compound score
Returns:
Sentiment label
"""
if compound_score >= SENTIMENT_THRESHOLDS['positive']:
return "Positive"
elif compound_score <= SENTIMENT_THRESHOLDS['negative']:
return "Negative"
else:
return "Neutral"
def analyze_comprehensive(self, text: str) -> Dict:
"""
Perform comprehensive sentiment analysis
Args:
text: Text to analyze
Returns:
Dictionary with all sentiment metrics
"""
# VADER analysis
vader_scores = self.analyze_vader(text)
# FinBERT analysis
finbert_scores = self.analyze_finbert(text)
# Combined score (weighted average)
combined_score = (
vader_scores['compound'] * 0.3 +
(finbert_scores['positive'] - finbert_scores['negative']) * 0.7
)
return {
'vader': vader_scores,
'finbert': finbert_scores,
'combined_score': combined_score,
'sentiment_label': self.get_sentiment_label(combined_score),
'confidence': max(finbert_scores.values())
}
def analyze_batch(self, texts: list) -> list:
"""
Analyze multiple texts
Args:
texts: List of texts to analyze
Returns:
List of sentiment analysis results
"""
results = []
for text in texts:
result = self.analyze_comprehensive(text)
results.append(result)
return results