|
|
import pandas as pd |
|
|
import nltk |
|
|
from nltk.sentiment import SentimentIntensityAnalyzer |
|
|
from textblob import TextBlob |
|
|
from transformers import pipeline |
|
|
|
|
|
class SentimentAnalyzer: |
|
|
def __init__(self): |
|
|
nltk.download('vader_lexicon') |
|
|
self.sia = SentimentIntensityAnalyzer() |
|
|
self.transformer_model = pipeline("sentiment-analysis") |
|
|
|
|
|
def analyze(self, data, text_column): |
|
|
|
|
|
data['vader_sentiment'] = data[text_column].apply(lambda x: self.sia.polarity_scores(x)['compound']) |
|
|
|
|
|
|
|
|
data['textblob_sentiment'] = data[text_column].apply(lambda x: TextBlob(x).sentiment.polarity) |
|
|
|
|
|
|
|
|
transformer_results = self.transformer_model(data[text_column].tolist()) |
|
|
data['transformer_sentiment'] = [result['score'] if result['label'] == 'POSITIVE' else -result['score'] for result in transformer_results] |
|
|
|
|
|
|
|
|
data['aggregate_sentiment'] = (data['vader_sentiment'] + data['textblob_sentiment'] + data['transformer_sentiment']) / 3 |
|
|
|
|
|
return data |
|
|
|
|
|
def get_sentiment_summary(self, data): |
|
|
summary = { |
|
|
'positive': (data['aggregate_sentiment'] > 0.05).sum(), |
|
|
'neutral': ((data['aggregate_sentiment'] >= -0.05) & (data['aggregate_sentiment'] <= 0.05)).sum(), |
|
|
'negative': (data['aggregate_sentiment'] < -0.05).sum() |
|
|
} |
|
|
return summary |
|
|
|
|
|
def plot_sentiment_distribution(self, data): |
|
|
import matplotlib.pyplot as plt |
|
|
import seaborn as sns |
|
|
|
|
|
fig, ax = plt.subplots(figsize=(10, 6)) |
|
|
sns.histplot(data['aggregate_sentiment'], kde=True, ax=ax) |
|
|
ax.set_title('Distribution of Sentiment Scores') |
|
|
ax.set_xlabel('Sentiment Score') |
|
|
ax.set_ylabel('Frequency') |
|
|
|
|
|
return fig |