File size: 1,909 Bytes
6f0b83f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import pandas as pd
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
from textblob import TextBlob
from transformers import pipeline

class SentimentAnalyzer:
    def __init__(self):
        nltk.download('vader_lexicon')
        self.sia = SentimentIntensityAnalyzer()
        self.transformer_model = pipeline("sentiment-analysis")

    def analyze(self, data, text_column):
        # VADER Sentiment Analysis
        data['vader_sentiment'] = data[text_column].apply(lambda x: self.sia.polarity_scores(x)['compound'])
        
        # TextBlob Sentiment Analysis
        data['textblob_sentiment'] = data[text_column].apply(lambda x: TextBlob(x).sentiment.polarity)
        
        # Transformer-based Sentiment Analysis
        transformer_results = self.transformer_model(data[text_column].tolist())
        data['transformer_sentiment'] = [result['score'] if result['label'] == 'POSITIVE' else -result['score'] for result in transformer_results]
        
        # Aggregate sentiment
        data['aggregate_sentiment'] = (data['vader_sentiment'] + data['textblob_sentiment'] + data['transformer_sentiment']) / 3
        
        return data

    def get_sentiment_summary(self, data):
        summary = {
            'positive': (data['aggregate_sentiment'] > 0.05).sum(),
            'neutral': ((data['aggregate_sentiment'] >= -0.05) & (data['aggregate_sentiment'] <= 0.05)).sum(),
            'negative': (data['aggregate_sentiment'] < -0.05).sum()
        }
        return summary

    def plot_sentiment_distribution(self, data):
        import matplotlib.pyplot as plt
        import seaborn as sns

        fig, ax = plt.subplots(figsize=(10, 6))
        sns.histplot(data['aggregate_sentiment'], kde=True, ax=ax)
        ax.set_title('Distribution of Sentiment Scores')
        ax.set_xlabel('Sentiment Score')
        ax.set_ylabel('Frequency')
        
        return fig