Spaces:

cwpkd
/

Todlong

Runtime error

App Files Files Community

Todlong / utils /sentiment_analyzer.py

cwpkd

Create utils/sentiment_analyzer.py

d128b27 verified about 2 months ago

raw

history blame contribute delete

4.33 kB

	# utils/sentiment_analyzer.py
	"""
	Sentiment analysis using VADER and FinBERT
	"""

	import torch
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
	import numpy as np
	from typing import Dict, Tuple
	from config import FINBERT_MODEL, SENTIMENT_THRESHOLDS


	class SentimentAnalyzer:
	"""Analyze sentiment using multiple methods"""

	def __init__(self):
	"""Initialize sentiment analysis models"""
	# VADER for general sentiment
	self.vader = SentimentIntensityAnalyzer()

	# FinBERT for financial sentiment
	print("Loading FinBERT model...")
	self.finbert_tokenizer = AutoTokenizer.from_pretrained(FINBERT_MODEL)
	self.finbert_model = AutoModelForSequenceClassification.from_pretrained(FINBERT_MODEL)
	self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	self.finbert_model.to(self.device)
	self.finbert_model.eval()
	print("FinBERT loaded successfully!")

	def analyze_vader(self, text: str) -> Dict[str, float]:
	"""
	Analyze sentiment using VADER

	Args:
	text: Text to analyze

	Returns:
	Dictionary with sentiment scores
	"""
	scores = self.vader.polarity_scores(text)
	return {
	'positive': scores['pos'],
	'neutral': scores['neu'],
	'negative': scores['neg'],
	'compound': scores['compound']
	}

	def analyze_finbert(self, text: str) -> Dict[str, float]:
	"""
	Analyze sentiment using FinBERT

	Args:
	text: Text to analyze

	Returns:
	Dictionary with sentiment probabilities
	"""
	# Tokenize
	inputs = self.finbert_tokenizer(
	text,
	return_tensors="pt",
	truncation=True,
	max_length=512,
	padding=True
	).to(self.device)

	# Get predictions
	with torch.no_grad():
	outputs = self.finbert_model(**inputs)
	probs = torch.nn.functional.softmax(outputs.logits, dim=-1)

	# FinBERT labels: positive, negative, neutral
	probs = probs.cpu().numpy()[0]

	return {
	'positive': float(probs[0]),
	'negative': float(probs[1]),
	'neutral': float(probs[2])
	}

	def get_sentiment_label(self, compound_score: float) -> str:
	"""
	Convert compound score to label

	Args:
	compound_score: VADER compound score

	Returns:
	Sentiment label
	"""
	if compound_score >= SENTIMENT_THRESHOLDS['positive']:
	return "Positive"
	elif compound_score <= SENTIMENT_THRESHOLDS['negative']:
	return "Negative"
	else:
	return "Neutral"

	def analyze_comprehensive(self, text: str) -> Dict:
	"""
	Perform comprehensive sentiment analysis

	Args:
	text: Text to analyze

	Returns:
	Dictionary with all sentiment metrics
	"""
	# VADER analysis
	vader_scores = self.analyze_vader(text)

	# FinBERT analysis
	finbert_scores = self.analyze_finbert(text)

	# Combined score (weighted average)
	combined_score = (
	vader_scores['compound'] * 0.3 +
	(finbert_scores['positive'] - finbert_scores['negative']) * 0.7
	)

	return {
	'vader': vader_scores,
	'finbert': finbert_scores,
	'combined_score': combined_score,
	'sentiment_label': self.get_sentiment_label(combined_score),
	'confidence': max(finbert_scores.values())
	}

	def analyze_batch(self, texts: list) -> list:
	"""
	Analyze multiple texts

	Args:
	texts: List of texts to analyze

	Returns:
	List of sentiment analysis results
	"""
	results = []
	for text in texts:
	result = self.analyze_comprehensive(text)
	results.append(result)
	return results