import pandas as pd import nltk from nltk.sentiment import SentimentIntensityAnalyzer from textblob import TextBlob from transformers import pipeline class SentimentAnalyzer: def __init__(self): nltk.download('vader_lexicon') self.sia = SentimentIntensityAnalyzer() self.transformer_model = pipeline("sentiment-analysis") def analyze(self, data, text_column): # VADER Sentiment Analysis data['vader_sentiment'] = data[text_column].apply(lambda x: self.sia.polarity_scores(x)['compound']) # TextBlob Sentiment Analysis data['textblob_sentiment'] = data[text_column].apply(lambda x: TextBlob(x).sentiment.polarity) # Transformer-based Sentiment Analysis transformer_results = self.transformer_model(data[text_column].tolist()) data['transformer_sentiment'] = [result['score'] if result['label'] == 'POSITIVE' else -result['score'] for result in transformer_results] # Aggregate sentiment data['aggregate_sentiment'] = (data['vader_sentiment'] + data['textblob_sentiment'] + data['transformer_sentiment']) / 3 return data def get_sentiment_summary(self, data): summary = { 'positive': (data['aggregate_sentiment'] > 0.05).sum(), 'neutral': ((data['aggregate_sentiment'] >= -0.05) & (data['aggregate_sentiment'] <= 0.05)).sum(), 'negative': (data['aggregate_sentiment'] < -0.05).sum() } return summary def plot_sentiment_distribution(self, data): import matplotlib.pyplot as plt import seaborn as sns fig, ax = plt.subplots(figsize=(10, 6)) sns.histplot(data['aggregate_sentiment'], kde=True, ax=ax) ax.set_title('Distribution of Sentiment Scores') ax.set_xlabel('Sentiment Score') ax.set_ylabel('Frequency') return fig