File size: 1,909 Bytes
6f0b83f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
import pandas as pd
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
from textblob import TextBlob
from transformers import pipeline
class SentimentAnalyzer:
def __init__(self):
nltk.download('vader_lexicon')
self.sia = SentimentIntensityAnalyzer()
self.transformer_model = pipeline("sentiment-analysis")
def analyze(self, data, text_column):
# VADER Sentiment Analysis
data['vader_sentiment'] = data[text_column].apply(lambda x: self.sia.polarity_scores(x)['compound'])
# TextBlob Sentiment Analysis
data['textblob_sentiment'] = data[text_column].apply(lambda x: TextBlob(x).sentiment.polarity)
# Transformer-based Sentiment Analysis
transformer_results = self.transformer_model(data[text_column].tolist())
data['transformer_sentiment'] = [result['score'] if result['label'] == 'POSITIVE' else -result['score'] for result in transformer_results]
# Aggregate sentiment
data['aggregate_sentiment'] = (data['vader_sentiment'] + data['textblob_sentiment'] + data['transformer_sentiment']) / 3
return data
def get_sentiment_summary(self, data):
summary = {
'positive': (data['aggregate_sentiment'] > 0.05).sum(),
'neutral': ((data['aggregate_sentiment'] >= -0.05) & (data['aggregate_sentiment'] <= 0.05)).sum(),
'negative': (data['aggregate_sentiment'] < -0.05).sum()
}
return summary
def plot_sentiment_distribution(self, data):
import matplotlib.pyplot as plt
import seaborn as sns
fig, ax = plt.subplots(figsize=(10, 6))
sns.histplot(data['aggregate_sentiment'], kde=True, ax=ax)
ax.set_title('Distribution of Sentiment Scores')
ax.set_xlabel('Sentiment Score')
ax.set_ylabel('Frequency')
return fig |