File size: 7,998 Bytes
7545f62
2c64d59
56b9772
6ac8f81
 
 
 
 
 
 
 
 
 
 
04ef2d6
6ac8f81
 
 
 
2c64d59
 
04ef2d6
6ac8f81
 
 
 
04ef2d6
6ac8f81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
04ef2d6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6ac8f81
 
 
04ef2d6
6ac8f81
04ef2d6
6ac8f81
04ef2d6
 
 
 
 
 
 
6ac8f81
04ef2d6
6ac8f81
04ef2d6
6ac8f81
04ef2d6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6ac8f81
 
 
 
04ef2d6
6ac8f81
 
 
 
 
 
04ef2d6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
import streamlit as st
from transformers import pipeline
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import plotly.express as px
from sklearn.linear_model import LinearRegression
from wordcloud import WordCloud
import base64
from io import BytesIO
import nltk
from textblob import TextBlob

# Download NLTK data
nltk.download('punkt')

# Initialize sentiment models
bert_sentiment = pipeline("sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment")
vader_analyzer = SentimentIntensityAnalyzer()

# Generate sample past sentiment data (kept from original for demo purposes)
dates = [datetime.today() - timedelta(days=i) for i in range(14)]
sentiment_scores = np.random.uniform(-1, 1, len(dates))
df = pd.DataFrame({"Date": dates, "Sentiment Score": sentiment_scores})

# Train a regression model for predictions
X = np.array(range(len(df))).reshape(-1, 1)
y = df["Sentiment Score"]
model = LinearRegression()
model.fit(X, y)

# Predict for next 7 days
future_dates = [datetime.today() + timedelta(days=i) for i in range(1, 8)]
X_future = np.array(range(len(df), len(df) + 7)).reshape(-1, 1)
predictions = model.predict(X_future)
future_df = pd.DataFrame({"Date": future_dates, "Predicted Sentiment": predictions})

# Generate Word Cloud
def generate_wordcloud(text):
    wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
    img = BytesIO()
    wordcloud.to_image().save(img, format='PNG')
    return base64.b64encode(img.getvalue()).decode()

# Helper function to convert BERT labels to numerical scores
def bert_score(result):
    label = result['label']
    if label == '1 star':
        return -1
    elif label == '2 stars':
        return -0.5
    elif label == '3 stars':
        return 0
    elif label == '4 stars':
        return 0.5
    elif label == '5 stars':
        return 1
    return 0

# Get overall sentiment score based on selected model
def get_overall_score(text, model_choice):
    if model_choice == "VADER":
        return vader_analyzer.polarity_scores(text)['compound']
    elif model_choice == "BERT":
        result = bert_sentiment(text)[0]
        return bert_score(result)
    elif model_choice == "TextBlob":
        return TextBlob(text).sentiment.polarity

# Streamlit app setup
st.title("๐ŸŒŸ Advanced Sentiment Analysis Dashboard")

# Sidebar for user input and controls
st.sidebar.header("๐Ÿ” Sentiment Analysis Controls")
analysis_mode = st.sidebar.radio("Analysis Mode", ["Single Text", "Compare Two Texts", "Analyze CSV File"])

if analysis_mode == "Single Text":
    user_input = st.sidebar.text_area("Enter text for sentiment analysis")
elif analysis_mode == "Compare Two Texts":
    user_input_a = st.sidebar.text_area("Enter first text")
    user_input_b = st.sidebar.text_area("Enter second text")
elif analysis_mode == "Analyze CSV File":
    uploaded_file = st.sidebar.file_uploader("Upload a CSV file with 'text' column", type=["csv"])

model_choice = st.sidebar.selectbox("Choose Sentiment Model", ["VADER", "BERT", "TextBlob"])

# Analyze button handler
if st.sidebar.button("Analyze Sentiment"):
    if analysis_mode == "Single Text":
        if not user_input.strip():
            st.error("Please enter some text for analysis.")
        elif not any(c.isalpha() for c in user_input):
            st.error("Input should contain at least one alphabetic character.")
        else:
            with st.spinner("Analyzing text..."):
                overall_score = get_overall_score(user_input, model_choice)
                st.subheader("๐Ÿ“Š Overall Sentiment Analysis")
                st.write(f"**Sentiment Score ({model_choice})**: {overall_score:.2f}")

                # Sentence-level analysis
                sentences = nltk.sent_tokenize(user_input)
                if model_choice == "VADER":
                    sentence_scores = [vader_analyzer.polarity_scores(s)['compound'] for s in sentences]
                elif model_choice == "BERT":
                    sentence_scores = [bert_score(bert_sentiment(s)[0]) for s in sentences]
                elif model_choice == "TextBlob":
                    sentence_scores = [TextBlob(s).sentiment.polarity for s in sentences]

                sentiment_df = pd.DataFrame({"Sentence": sentences, "Sentiment Score": sentence_scores})
                st.subheader("๐Ÿ” Sentence-Level Sentiment")
                st.write(sentiment_df)
                fig = px.bar(sentiment_df, x="Sentence", y="Sentiment Score", title="Sentiment per Sentence")
                st.plotly_chart(fig)

                # Word cloud
                st.subheader("โ˜๏ธ Word Cloud")
                wordcloud_img = f'data:image/png;base64,{generate_wordcloud(user_input)}'
                st.image(wordcloud_img, use_column_width=True)

                # Download results
                @st.cache_data
                def convert_df_to_csv(df):
                    return df.to_csv(index=False).encode('utf-8')
                csv = convert_df_to_csv(sentiment_df)
                st.download_button(
                    label="Download Sentiment Data",
                    data=csv,
                    file_name='sentiment_data.csv',
                    mime='text/csv',
                )

    elif analysis_mode == "Compare Two Texts":
        if not user_input_a.strip() or not user_input_b.strip():
            st.error("Please enter both texts for comparison.")
        elif not any(c.isalpha() for c in user_input_a) or not any(c.isalpha() for c in user_input_b):
            st.error("Both inputs should contain at least one alphabetic character.")
        else:
            with st.spinner("Analyzing texts..."):
                overall_score_a = get_overall_score(user_input_a, model_choice)
                overall_score_b = get_overall_score(user_input_b, model_choice)
                col1, col2 = st.columns(2)
                with col1:
                    st.subheader("Text A")
                    st.write(f"**Sentiment Score ({model_choice})**: {overall_score_a:.2f}")
                with col2:
                    st.subheader("Text B")
                    st.write(f"**Sentiment Score ({model_choice})**: {overall_score_b:.2f}")
                comparison_df = pd.DataFrame({
                    "Text": ["Text A", "Text B"],
                    "Sentiment Score": [overall_score_a, overall_score_b]
                })
                fig = px.bar(comparison_df, x="Text", y="Sentiment Score", title="Sentiment Comparison")
                st.plotly_chart(fig)

    elif analysis_mode == "Analyze CSV File":
        if uploaded_file is not None:
            df_uploaded = pd.read_csv(uploaded_file)
            if "text" not in df_uploaded.columns:
                st.error("CSV file must contain a 'text' column.")
            else:
                with st.spinner("Analyzing uploaded texts..."):
                    df_uploaded['sentiment'] = df_uploaded['text'].apply(lambda x: get_overall_score(x, model_choice))
                    st.subheader("Uploaded Data Sentiment Analysis")
                    st.write(df_uploaded)
                    fig = px.histogram(df_uploaded, x='sentiment', title='Sentiment Distribution')
                    st.plotly_chart(fig)
        else:
            st.error("Please upload a CSV file.")

# Past sentiment trends (kept from original)
st.subheader("๐Ÿ“… Past Sentiment Trends (Last 14 Days)")
fig1 = px.line(df, x='Date', y='Sentiment Score', title='Sentiment Over Time', markers=True, line_shape='spline')
st.plotly_chart(fig1)

# Future sentiment predictions (kept from original)
st.subheader("๐Ÿ”ฎ Sentiment Prediction for Next 7 Days")
fig2 = px.line(future_df, x='Date', y='Predicted Sentiment', title='Predicted Sentiment Trend', markers=True, line_shape='spline')
st.plotly_chart(fig2)

# Reset button
if st.sidebar.button('๐Ÿ”„ Reset Analysis'):
    st.experimental_rerun()