File size: 4,328 Bytes
d128b27 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
# utils/sentiment_analyzer.py
"""
Sentiment analysis using VADER and FinBERT
"""
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import numpy as np
from typing import Dict, Tuple
from config import FINBERT_MODEL, SENTIMENT_THRESHOLDS
class SentimentAnalyzer:
"""Analyze sentiment using multiple methods"""
def __init__(self):
"""Initialize sentiment analysis models"""
# VADER for general sentiment
self.vader = SentimentIntensityAnalyzer()
# FinBERT for financial sentiment
print("Loading FinBERT model...")
self.finbert_tokenizer = AutoTokenizer.from_pretrained(FINBERT_MODEL)
self.finbert_model = AutoModelForSequenceClassification.from_pretrained(FINBERT_MODEL)
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.finbert_model.to(self.device)
self.finbert_model.eval()
print("FinBERT loaded successfully!")
def analyze_vader(self, text: str) -> Dict[str, float]:
"""
Analyze sentiment using VADER
Args:
text: Text to analyze
Returns:
Dictionary with sentiment scores
"""
scores = self.vader.polarity_scores(text)
return {
'positive': scores['pos'],
'neutral': scores['neu'],
'negative': scores['neg'],
'compound': scores['compound']
}
def analyze_finbert(self, text: str) -> Dict[str, float]:
"""
Analyze sentiment using FinBERT
Args:
text: Text to analyze
Returns:
Dictionary with sentiment probabilities
"""
# Tokenize
inputs = self.finbert_tokenizer(
text,
return_tensors="pt",
truncation=True,
max_length=512,
padding=True
).to(self.device)
# Get predictions
with torch.no_grad():
outputs = self.finbert_model(**inputs)
probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
# FinBERT labels: positive, negative, neutral
probs = probs.cpu().numpy()[0]
return {
'positive': float(probs[0]),
'negative': float(probs[1]),
'neutral': float(probs[2])
}
def get_sentiment_label(self, compound_score: float) -> str:
"""
Convert compound score to label
Args:
compound_score: VADER compound score
Returns:
Sentiment label
"""
if compound_score >= SENTIMENT_THRESHOLDS['positive']:
return "Positive"
elif compound_score <= SENTIMENT_THRESHOLDS['negative']:
return "Negative"
else:
return "Neutral"
def analyze_comprehensive(self, text: str) -> Dict:
"""
Perform comprehensive sentiment analysis
Args:
text: Text to analyze
Returns:
Dictionary with all sentiment metrics
"""
# VADER analysis
vader_scores = self.analyze_vader(text)
# FinBERT analysis
finbert_scores = self.analyze_finbert(text)
# Combined score (weighted average)
combined_score = (
vader_scores['compound'] * 0.3 +
(finbert_scores['positive'] - finbert_scores['negative']) * 0.7
)
return {
'vader': vader_scores,
'finbert': finbert_scores,
'combined_score': combined_score,
'sentiment_label': self.get_sentiment_label(combined_score),
'confidence': max(finbert_scores.values())
}
def analyze_batch(self, texts: list) -> list:
"""
Analyze multiple texts
Args:
texts: List of texts to analyze
Returns:
List of sentiment analysis results
"""
results = []
for text in texts:
result = self.analyze_comprehensive(text)
results.append(result)
return results |