Soundaryasos commited on
Commit
04ef2d6
ยท
verified ยท
1 Parent(s): e44387d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +123 -45
app.py CHANGED
@@ -12,18 +12,19 @@ from io import BytesIO
12
  import nltk
13
  from textblob import TextBlob
14
 
 
15
  nltk.download('punkt')
16
 
17
  # Initialize sentiment models
18
  bert_sentiment = pipeline("sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment")
19
  vader_analyzer = SentimentIntensityAnalyzer()
20
 
21
- # Generate sample past sentiment data
22
  dates = [datetime.today() - timedelta(days=i) for i in range(14)]
23
  sentiment_scores = np.random.uniform(-1, 1, len(dates))
24
  df = pd.DataFrame({"Date": dates, "Sentiment Score": sentiment_scores})
25
 
26
- # Train a regression model
27
  X = np.array(range(len(df))).reshape(-1, 1)
28
  y = df["Sentiment Score"]
29
  model = LinearRegression()
@@ -33,7 +34,6 @@ model.fit(X, y)
33
  future_dates = [datetime.today() + timedelta(days=i) for i in range(1, 8)]
34
  X_future = np.array(range(len(df), len(df) + 7)).reshape(-1, 1)
35
  predictions = model.predict(X_future)
36
-
37
  future_df = pd.DataFrame({"Date": future_dates, "Predicted Sentiment": predictions})
38
 
39
  # Generate Word Cloud
@@ -43,63 +43,141 @@ def generate_wordcloud(text):
43
  wordcloud.to_image().save(img, format='PNG')
44
  return base64.b64encode(img.getvalue()).decode()
45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  # Streamlit app setup
47
  st.title("๐ŸŒŸ Advanced Sentiment Analysis Dashboard")
48
 
49
- # Sidebar for user input
50
  st.sidebar.header("๐Ÿ” Sentiment Analysis Controls")
51
- user_input = st.sidebar.text_area("Enter text for sentiment analysis")
52
-
53
- # Display sentiment analysis results
54
- def display_sentiment_analysis(vader_score, bert_result, textblob_score):
55
- st.subheader("๐Ÿ“Š Sentiment Analysis Results:")
56
- st.write(f"**VADER Sentiment Score**: {vader_score:.2f}")
57
- st.write(f"**BERT Sentiment**: {bert_result['label']} ({bert_result['score']:.2f})")
58
- st.write(f"**TextBlob Sentiment Polarity**: {textblob_score:.2f}")
59
 
60
- sentiment_data = {'Positive': max(0, vader_score), 'Negative': min(0, vader_score), 'Neutral': 1 - abs(vader_score)}
61
- sentiment_df = pd.DataFrame(list(sentiment_data.items()), columns=["Sentiment", "Score"])
62
- st.bar_chart(sentiment_df.set_index("Sentiment"))
 
 
 
 
63
 
64
- wordcloud_img = f'data:image/png;base64,{generate_wordcloud(user_input)}'
65
- st.image(wordcloud_img, use_column_width=True)
66
 
 
67
  if st.sidebar.button("Analyze Sentiment"):
68
- if user_input:
69
- with st.spinner("Analyzing text..."):
70
- vader_score = vader_analyzer.polarity_scores(user_input)['compound']
71
- bert_result = bert_sentiment(user_input)[0]
72
- textblob_score = TextBlob(user_input).sentiment.polarity
73
- display_sentiment_analysis(vader_score, bert_result, textblob_score)
74
- else:
75
- st.warning("โš ๏ธ Please enter some text for analysis.")
76
-
77
- # Past sentiment trends
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  st.subheader("๐Ÿ“… Past Sentiment Trends (Last 14 Days)")
79
  fig1 = px.line(df, x='Date', y='Sentiment Score', title='Sentiment Over Time', markers=True, line_shape='spline')
80
  st.plotly_chart(fig1)
81
 
82
- # Future sentiment predictions
83
  st.subheader("๐Ÿ”ฎ Sentiment Prediction for Next 7 Days")
84
  fig2 = px.line(future_df, x='Date', y='Predicted Sentiment', title='Predicted Sentiment Trend', markers=True, line_shape='spline')
85
  st.plotly_chart(fig2)
86
 
87
- # Sentiment distribution pie chart
88
- st.subheader("๐Ÿ“Š Sentiment Distribution")
89
- fig3 = px.pie(values=[sum(df['Sentiment Score'] > 0), sum(df['Sentiment Score'] <= 0)], names=['Positive', 'Negative'], title='Sentiment Distribution', hole=0.3)
90
- st.plotly_chart(fig3)
91
-
92
- # Sentiment scatter plot
93
- st.subheader("๐Ÿ”Ž Sentiment Scatter Plot (Last 14 Days)")
94
- fig4 = px.scatter(df, x='Date', y='Sentiment Score', title='Sentiment Over Time')
95
- st.plotly_chart(fig4)
96
-
97
- # Rolling average sentiment
98
- st.subheader("๐Ÿ“ˆ Rolling Average of Sentiment (7-Day Window)")
99
- df['Rolling Avg Sentiment'] = df['Sentiment Score'].rolling(window=7).mean()
100
- fig5 = px.line(df, x='Date', y='Rolling Avg Sentiment', title="7-Day Rolling Average Sentiment")
101
- st.plotly_chart(fig5)
102
-
103
  # Reset button
104
  if st.sidebar.button('๐Ÿ”„ Reset Analysis'):
105
- st.experimental_rerun()
 
12
  import nltk
13
  from textblob import TextBlob
14
 
15
+ # Download NLTK data
16
  nltk.download('punkt')
17
 
18
  # Initialize sentiment models
19
  bert_sentiment = pipeline("sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment")
20
  vader_analyzer = SentimentIntensityAnalyzer()
21
 
22
+ # Generate sample past sentiment data (kept from original for demo purposes)
23
  dates = [datetime.today() - timedelta(days=i) for i in range(14)]
24
  sentiment_scores = np.random.uniform(-1, 1, len(dates))
25
  df = pd.DataFrame({"Date": dates, "Sentiment Score": sentiment_scores})
26
 
27
+ # Train a regression model for predictions
28
  X = np.array(range(len(df))).reshape(-1, 1)
29
  y = df["Sentiment Score"]
30
  model = LinearRegression()
 
34
  future_dates = [datetime.today() + timedelta(days=i) for i in range(1, 8)]
35
  X_future = np.array(range(len(df), len(df) + 7)).reshape(-1, 1)
36
  predictions = model.predict(X_future)
 
37
  future_df = pd.DataFrame({"Date": future_dates, "Predicted Sentiment": predictions})
38
 
39
  # Generate Word Cloud
 
43
  wordcloud.to_image().save(img, format='PNG')
44
  return base64.b64encode(img.getvalue()).decode()
45
 
46
+ # Helper function to convert BERT labels to numerical scores
47
+ def bert_score(result):
48
+ label = result['label']
49
+ if label == '1 star':
50
+ return -1
51
+ elif label == '2 stars':
52
+ return -0.5
53
+ elif label == '3 stars':
54
+ return 0
55
+ elif label == '4 stars':
56
+ return 0.5
57
+ elif label == '5 stars':
58
+ return 1
59
+ return 0
60
+
61
+ # Get overall sentiment score based on selected model
62
+ def get_overall_score(text, model_choice):
63
+ if model_choice == "VADER":
64
+ return vader_analyzer.polarity_scores(text)['compound']
65
+ elif model_choice == "BERT":
66
+ result = bert_sentiment(text)[0]
67
+ return bert_score(result)
68
+ elif model_choice == "TextBlob":
69
+ return TextBlob(text).sentiment.polarity
70
+
71
  # Streamlit app setup
72
  st.title("๐ŸŒŸ Advanced Sentiment Analysis Dashboard")
73
 
74
+ # Sidebar for user input and controls
75
  st.sidebar.header("๐Ÿ” Sentiment Analysis Controls")
76
+ analysis_mode = st.sidebar.radio("Analysis Mode", ["Single Text", "Compare Two Texts", "Analyze CSV File"])
 
 
 
 
 
 
 
77
 
78
+ if analysis_mode == "Single Text":
79
+ user_input = st.sidebar.text_area("Enter text for sentiment analysis")
80
+ elif analysis_mode == "Compare Two Texts":
81
+ user_input_a = st.sidebar.text_area("Enter first text")
82
+ user_input_b = st.sidebar.text_area("Enter second text")
83
+ elif analysis_mode == "Analyze CSV File":
84
+ uploaded_file = st.sidebar.file_uploader("Upload a CSV file with 'text' column", type=["csv"])
85
 
86
+ model_choice = st.sidebar.selectbox("Choose Sentiment Model", ["VADER", "BERT", "TextBlob"])
 
87
 
88
+ # Analyze button handler
89
  if st.sidebar.button("Analyze Sentiment"):
90
+ if analysis_mode == "Single Text":
91
+ if not user_input.strip():
92
+ st.error("Please enter some text for analysis.")
93
+ elif not any(c.isalpha() for c in user_input):
94
+ st.error("Input should contain at least one alphabetic character.")
95
+ else:
96
+ with st.spinner("Analyzing text..."):
97
+ overall_score = get_overall_score(user_input, model_choice)
98
+ st.subheader("๐Ÿ“Š Overall Sentiment Analysis")
99
+ st.write(f"**Sentiment Score ({model_choice})**: {overall_score:.2f}")
100
+
101
+ # Sentence-level analysis
102
+ sentences = nltk.sent_tokenize(user_input)
103
+ if model_choice == "VADER":
104
+ sentence_scores = [vader_analyzer.polarity_scores(s)['compound'] for s in sentences]
105
+ elif model_choice == "BERT":
106
+ sentence_scores = [bert_score(bert_sentiment(s)[0]) for s in sentences]
107
+ elif model_choice == "TextBlob":
108
+ sentence_scores = [TextBlob(s).sentiment.polarity for s in sentences]
109
+
110
+ sentiment_df = pd.DataFrame({"Sentence": sentences, "Sentiment Score": sentence_scores})
111
+ st.subheader("๐Ÿ” Sentence-Level Sentiment")
112
+ st.write(sentiment_df)
113
+ fig = px.bar(sentiment_df, x="Sentence", y="Sentiment Score", title="Sentiment per Sentence")
114
+ st.plotly_chart(fig)
115
+
116
+ # Word cloud
117
+ st.subheader("โ˜๏ธ Word Cloud")
118
+ wordcloud_img = f'data:image/png;base64,{generate_wordcloud(user_input)}'
119
+ st.image(wordcloud_img, use_column_width=True)
120
+
121
+ # Download results
122
+ @st.cache_data
123
+ def convert_df_to_csv(df):
124
+ return df.to_csv(index=False).encode('utf-8')
125
+ csv = convert_df_to_csv(sentiment_df)
126
+ st.download_button(
127
+ label="Download Sentiment Data",
128
+ data=csv,
129
+ file_name='sentiment_data.csv',
130
+ mime='text/csv',
131
+ )
132
+
133
+ elif analysis_mode == "Compare Two Texts":
134
+ if not user_input_a.strip() or not user_input_b.strip():
135
+ st.error("Please enter both texts for comparison.")
136
+ elif not any(c.isalpha() for c in user_input_a) or not any(c.isalpha() for c in user_input_b):
137
+ st.error("Both inputs should contain at least one alphabetic character.")
138
+ else:
139
+ with st.spinner("Analyzing texts..."):
140
+ overall_score_a = get_overall_score(user_input_a, model_choice)
141
+ overall_score_b = get_overall_score(user_input_b, model_choice)
142
+ col1, col2 = st.columns(2)
143
+ with col1:
144
+ st.subheader("Text A")
145
+ st.write(f"**Sentiment Score ({model_choice})**: {overall_score_a:.2f}")
146
+ with col2:
147
+ st.subheader("Text B")
148
+ st.write(f"**Sentiment Score ({model_choice})**: {overall_score_b:.2f}")
149
+ comparison_df = pd.DataFrame({
150
+ "Text": ["Text A", "Text B"],
151
+ "Sentiment Score": [overall_score_a, overall_score_b]
152
+ })
153
+ fig = px.bar(comparison_df, x="Text", y="Sentiment Score", title="Sentiment Comparison")
154
+ st.plotly_chart(fig)
155
+
156
+ elif analysis_mode == "Analyze CSV File":
157
+ if uploaded_file is not None:
158
+ df_uploaded = pd.read_csv(uploaded_file)
159
+ if "text" not in df_uploaded.columns:
160
+ st.error("CSV file must contain a 'text' column.")
161
+ else:
162
+ with st.spinner("Analyzing uploaded texts..."):
163
+ df_uploaded['sentiment'] = df_uploaded['text'].apply(lambda x: get_overall_score(x, model_choice))
164
+ st.subheader("Uploaded Data Sentiment Analysis")
165
+ st.write(df_uploaded)
166
+ fig = px.histogram(df_uploaded, x='sentiment', title='Sentiment Distribution')
167
+ st.plotly_chart(fig)
168
+ else:
169
+ st.error("Please upload a CSV file.")
170
+
171
+ # Past sentiment trends (kept from original)
172
  st.subheader("๐Ÿ“… Past Sentiment Trends (Last 14 Days)")
173
  fig1 = px.line(df, x='Date', y='Sentiment Score', title='Sentiment Over Time', markers=True, line_shape='spline')
174
  st.plotly_chart(fig1)
175
 
176
+ # Future sentiment predictions (kept from original)
177
  st.subheader("๐Ÿ”ฎ Sentiment Prediction for Next 7 Days")
178
  fig2 = px.line(future_df, x='Date', y='Predicted Sentiment', title='Predicted Sentiment Trend', markers=True, line_shape='spline')
179
  st.plotly_chart(fig2)
180
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
  # Reset button
182
  if st.sidebar.button('๐Ÿ”„ Reset Analysis'):
183
+ st.experimental_rerun()