Soundaryasos commited on
Commit
7251b8f
ยท
verified ยท
1 Parent(s): 04ef2d6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +219 -139
app.py CHANGED
@@ -5,36 +5,93 @@ import numpy as np
5
  import pandas as pd
6
  from datetime import datetime, timedelta
7
  import plotly.express as px
8
- from sklearn.linear_model import LinearRegression
9
  from wordcloud import WordCloud
10
  import base64
11
  from io import BytesIO
12
  import nltk
13
  from textblob import TextBlob
 
 
14
 
15
  # Download NLTK data
16
  nltk.download('punkt')
 
17
 
18
  # Initialize sentiment models
19
  bert_sentiment = pipeline("sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment")
20
  vader_analyzer = SentimentIntensityAnalyzer()
21
 
22
- # Generate sample past sentiment data (kept from original for demo purposes)
23
- dates = [datetime.today() - timedelta(days=i) for i in range(14)]
24
- sentiment_scores = np.random.uniform(-1, 1, len(dates))
25
- df = pd.DataFrame({"Date": dates, "Sentiment Score": sentiment_scores})
 
 
 
 
 
26
 
27
- # Train a regression model for predictions
28
- X = np.array(range(len(df))).reshape(-1, 1)
29
- y = df["Sentiment Score"]
30
- model = LinearRegression()
31
- model.fit(X, y)
32
 
33
- # Predict for next 7 days
34
- future_dates = [datetime.today() + timedelta(days=i) for i in range(1, 8)]
35
- X_future = np.array(range(len(df), len(df) + 7)).reshape(-1, 1)
36
- predictions = model.predict(X_future)
37
- future_df = pd.DataFrame({"Date": future_dates, "Predicted Sentiment": predictions})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
  # Generate Word Cloud
40
  def generate_wordcloud(text):
@@ -43,140 +100,163 @@ def generate_wordcloud(text):
43
  wordcloud.to_image().save(img, format='PNG')
44
  return base64.b64encode(img.getvalue()).decode()
45
 
46
- # Helper function to convert BERT labels to numerical scores
47
- def bert_score(result):
48
- label = result['label']
49
- if label == '1 star':
50
- return -1
51
- elif label == '2 stars':
52
- return -0.5
53
- elif label == '3 stars':
54
- return 0
55
- elif label == '4 stars':
56
- return 0.5
57
- elif label == '5 stars':
58
- return 1
59
- return 0
60
-
61
- # Get overall sentiment score based on selected model
62
- def get_overall_score(text, model_choice):
63
- if model_choice == "VADER":
64
- return vader_analyzer.polarity_scores(text)['compound']
65
- elif model_choice == "BERT":
66
- result = bert_sentiment(text)[0]
67
- return bert_score(result)
68
- elif model_choice == "TextBlob":
69
- return TextBlob(text).sentiment.polarity
70
-
71
  # Streamlit app setup
72
- st.title("๐ŸŒŸ Advanced Sentiment Analysis Dashboard")
73
 
74
- # Sidebar for user input and controls
75
  st.sidebar.header("๐Ÿ” Sentiment Analysis Controls")
76
- analysis_mode = st.sidebar.radio("Analysis Mode", ["Single Text", "Compare Two Texts", "Analyze CSV File"])
 
 
 
 
77
 
78
- if analysis_mode == "Single Text":
79
- user_input = st.sidebar.text_area("Enter text for sentiment analysis")
80
- elif analysis_mode == "Compare Two Texts":
81
- user_input_a = st.sidebar.text_area("Enter first text")
82
- user_input_b = st.sidebar.text_area("Enter second text")
83
- elif analysis_mode == "Analyze CSV File":
84
- uploaded_file = st.sidebar.file_uploader("Upload a CSV file with 'text' column", type=["csv"])
85
 
86
- model_choice = st.sidebar.selectbox("Choose Sentiment Model", ["VADER", "BERT", "TextBlob"])
 
 
 
 
 
 
87
 
88
- # Analyze button handler
89
- if st.sidebar.button("Analyze Sentiment"):
90
- if analysis_mode == "Single Text":
91
- if not user_input.strip():
92
- st.error("Please enter some text for analysis.")
93
- elif not any(c.isalpha() for c in user_input):
94
- st.error("Input should contain at least one alphabetic character.")
95
- else:
96
- with st.spinner("Analyzing text..."):
97
- overall_score = get_overall_score(user_input, model_choice)
98
- st.subheader("๐Ÿ“Š Overall Sentiment Analysis")
99
- st.write(f"**Sentiment Score ({model_choice})**: {overall_score:.2f}")
100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  # Sentence-level analysis
102
  sentences = nltk.sent_tokenize(user_input)
103
- if model_choice == "VADER":
104
- sentence_scores = [vader_analyzer.polarity_scores(s)['compound'] for s in sentences]
105
- elif model_choice == "BERT":
106
- sentence_scores = [bert_score(bert_sentiment(s)[0]) for s in sentences]
107
- elif model_choice == "TextBlob":
108
- sentence_scores = [TextBlob(s).sentiment.polarity for s in sentences]
109
-
110
- sentiment_df = pd.DataFrame({"Sentence": sentences, "Sentiment Score": sentence_scores})
111
- st.subheader("๐Ÿ” Sentence-Level Sentiment")
112
- st.write(sentiment_df)
113
- fig = px.bar(sentiment_df, x="Sentence", y="Sentiment Score", title="Sentiment per Sentence")
114
- st.plotly_chart(fig)
115
-
116
- # Word cloud
117
- st.subheader("โ˜๏ธ Word Cloud")
118
  wordcloud_img = f'data:image/png;base64,{generate_wordcloud(user_input)}'
119
- st.image(wordcloud_img, use_column_width=True)
120
-
121
- # Download results
122
- @st.cache_data
123
- def convert_df_to_csv(df):
124
- return df.to_csv(index=False).encode('utf-8')
125
- csv = convert_df_to_csv(sentiment_df)
126
- st.download_button(
127
- label="Download Sentiment Data",
128
- data=csv,
129
- file_name='sentiment_data.csv',
130
- mime='text/csv',
131
- )
132
-
133
- elif analysis_mode == "Compare Two Texts":
134
- if not user_input_a.strip() or not user_input_b.strip():
135
- st.error("Please enter both texts for comparison.")
136
- elif not any(c.isalpha() for c in user_input_a) or not any(c.isalpha() for c in user_input_b):
137
- st.error("Both inputs should contain at least one alphabetic character.")
138
- else:
139
- with st.spinner("Analyzing texts..."):
140
- overall_score_a = get_overall_score(user_input_a, model_choice)
141
- overall_score_b = get_overall_score(user_input_b, model_choice)
142
- col1, col2 = st.columns(2)
143
- with col1:
144
- st.subheader("Text A")
145
- st.write(f"**Sentiment Score ({model_choice})**: {overall_score_a:.2f}")
146
- with col2:
147
- st.subheader("Text B")
148
- st.write(f"**Sentiment Score ({model_choice})**: {overall_score_b:.2f}")
149
- comparison_df = pd.DataFrame({
150
- "Text": ["Text A", "Text B"],
151
- "Sentiment Score": [overall_score_a, overall_score_b]
152
- })
153
- fig = px.bar(comparison_df, x="Text", y="Sentiment Score", title="Sentiment Comparison")
154
- st.plotly_chart(fig)
155
-
156
- elif analysis_mode == "Analyze CSV File":
157
- if uploaded_file is not None:
158
- df_uploaded = pd.read_csv(uploaded_file)
159
- if "text" not in df_uploaded.columns:
160
- st.error("CSV file must contain a 'text' column.")
161
- else:
162
- with st.spinner("Analyzing uploaded texts..."):
163
- df_uploaded['sentiment'] = df_uploaded['text'].apply(lambda x: get_overall_score(x, model_choice))
164
- st.subheader("Uploaded Data Sentiment Analysis")
165
- st.write(df_uploaded)
166
- fig = px.histogram(df_uploaded, x='sentiment', title='Sentiment Distribution')
167
- st.plotly_chart(fig)
168
- else:
169
- st.error("Please upload a CSV file.")
170
-
171
- # Past sentiment trends (kept from original)
172
- st.subheader("๐Ÿ“… Past Sentiment Trends (Last 14 Days)")
173
- fig1 = px.line(df, x='Date', y='Sentiment Score', title='Sentiment Over Time', markers=True, line_shape='spline')
174
- st.plotly_chart(fig1)
175
-
176
- # Future sentiment predictions (kept from original)
177
- st.subheader("๐Ÿ”ฎ Sentiment Prediction for Next 7 Days")
178
- fig2 = px.line(future_df, x='Date', y='Predicted Sentiment', title='Predicted Sentiment Trend', markers=True, line_shape='spline')
179
- st.plotly_chart(fig2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
 
181
  # Reset button
182
  if st.sidebar.button('๐Ÿ”„ Reset Analysis'):
 
5
  import pandas as pd
6
  from datetime import datetime, timedelta
7
  import plotly.express as px
8
+ from sklearn.linear_model import Ridge
9
  from wordcloud import WordCloud
10
  import base64
11
  from io import BytesIO
12
  import nltk
13
  from textblob import TextBlob
14
+ import praw
15
+ from googleapiclient.discovery import build
16
 
17
  # Download NLTK data
18
  nltk.download('punkt')
19
+ nltk.download('stopwords')
20
 
21
  # Initialize sentiment models
22
  bert_sentiment = pipeline("sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment")
23
  vader_analyzer = SentimentIntensityAnalyzer()
24
 
25
+ # Reddit API setup with your credentials
26
+ REDDIT_CLIENT_ID = "S7pTXhj5JDFGDb3-_zrJEA"
27
+ REDDIT_CLIENT_SECRET = "QP3NYN4lrAKVLrBamzLGrpFywiVg8w"
28
+ REDDIT_USER_AGENT = "SoundaryaR_Bot/1.0"
29
+ reddit = praw.Reddit(
30
+ client_id=REDDIT_CLIENT_ID,
31
+ client_secret=REDDIT_CLIENT_SECRET,
32
+ user_agent=REDDIT_USER_AGENT
33
+ )
34
 
35
+ # YouTube API setup with your API key
36
+ YOUTUBE_API_KEY = "AIzaSyAChqXPaiNE9hKhApkgjgonzdgiCCOo"
37
+ youtube = build('youtube', 'v3', developerKey=YOUTUBE_API_KEY)
 
 
38
 
39
+ # Helper function to convert BERT label to numerical score
40
+ def bert_score(result):
41
+ label = result['label']
42
+ if label == '1 star': return -1
43
+ elif label == '2 stars': return -0.5
44
+ elif label == '3 stars': return 0
45
+ elif label == '4 stars': return 0.5
46
+ elif label == '5 stars': return 1
47
+ return 0
48
+
49
+ # Analyze sentiment and return scores from all models
50
+ def analyze_text(text):
51
+ vader_score = vader_analyzer.polarity_scores(text)['compound']
52
+ bert_result = bert_sentiment(text)[0]
53
+ bert_num = bert_score(bert_result)
54
+ textblob_score = TextBlob(text).sentiment.polarity
55
+ return vader_score, bert_num, textblob_score
56
+
57
+ # Fetch live Reddit data
58
+ @st.cache_data
59
+ def fetch_reddit_data(keyword):
60
+ try:
61
+ subreddit = reddit.subreddit("all")
62
+ posts = subreddit.search(keyword, limit=100)
63
+ data = []
64
+ for post in posts:
65
+ data.append({
66
+ 'date': datetime.fromtimestamp(post.created_utc),
67
+ 'text': post.title + " " + post.selftext,
68
+ 'source': 'Reddit'
69
+ })
70
+ return pd.DataFrame(data)
71
+ except Exception as e:
72
+ st.error(f"Error fetching Reddit data: {e}")
73
+ return pd.DataFrame()
74
+
75
+ # Fetch live YouTube data
76
+ @st.cache_data
77
+ def fetch_youtube_data(keyword):
78
+ try:
79
+ request = youtube.search().list(q=keyword, part="snippet", maxResults=50, type="video")
80
+ response = request.execute()
81
+ data = []
82
+ for item in response['items']:
83
+ title = item['snippet']['title']
84
+ description = item['snippet']['description']
85
+ published_at = datetime.strptime(item['snippet']['publishedAt'], '%Y-%m-%dT%H:%M:%SZ')
86
+ data.append({
87
+ 'date': published_at,
88
+ 'text': title + " " + description,
89
+ 'source': 'YouTube'
90
+ })
91
+ return pd.DataFrame(data)
92
+ except Exception as e:
93
+ st.error(f"Error fetching YouTube data: {e}")
94
+ return pd.DataFrame()
95
 
96
  # Generate Word Cloud
97
  def generate_wordcloud(text):
 
100
  wordcloud.to_image().save(img, format='PNG')
101
  return base64.b64encode(img.getvalue()).decode()
102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  # Streamlit app setup
104
+ st.title("๐ŸŒŸ SentimentSync: Live Sentiment Analysis Dashboard")
105
 
106
+ # Sidebar for user input and keyword
107
  st.sidebar.header("๐Ÿ” Sentiment Analysis Controls")
108
+ analysis_mode = st.sidebar.radio("Analysis Mode", ["Manual Text", "Live Data (Reddit & YouTube)"])
109
+ if analysis_mode == "Manual Text":
110
+ user_input = st.sidebar.text_area("Enter text for sentiment analysis", height=200)
111
+ else:
112
+ keyword = st.sidebar.text_input("Enter keyword for live data (e.g., 'Tesla')")
113
 
114
+ # Display sentiment analysis results
115
+ def display_sentiment_analysis(vader_score, bert_result, textblob_score, df=None):
116
+ st.subheader("๐Ÿ“Š Sentiment Analysis Results")
117
+ st.write(f"**VADER Sentiment Score**: {vader_score:.2f}")
118
+ st.write(f"**BERT Sentiment**: {bert_result['label']} ({bert_result['score']:.2f})")
119
+ st.write(f"**TextBlob Sentiment Polarity**: {textblob_score:.2f}")
 
120
 
121
+ sentiment_data = {
122
+ 'Positive': max(0, vader_score),
123
+ 'Negative': min(0, vader_score),
124
+ 'Neutral': 1 - abs(vader_score)
125
+ }
126
+ sentiment_df_overall = pd.DataFrame(list(sentiment_data.items()), columns=["Sentiment", "Score"])
127
+ st.bar_chart(sentiment_df_overall.set_index("Sentiment"))
128
 
129
+ if df is not None and not df.empty:
130
+ st.subheader("๐Ÿ” Detailed Sentiment Data")
131
+ st.write(df[['date', 'text', 'VADER', 'BERT', 'TextBlob', 'Average']])
 
 
 
 
 
 
 
 
 
132
 
133
+ # Process and analyze
134
+ if st.sidebar.button("Analyze Sentiment"):
135
+ if analysis_mode == "Manual Text" and (not user_input or not any(c.isalpha() for c in user_input)):
136
+ st.warning("โš ๏ธ Please enter text with at least one alphabetic character.")
137
+ elif analysis_mode == "Live Data (Reddit & YouTube)" and not keyword:
138
+ st.warning("โš ๏ธ Please enter a keyword for live data analysis.")
139
+ else:
140
+ with st.spinner("Analyzing..."):
141
+ if analysis_mode == "Manual Text":
142
+ # Overall sentiment for manual input
143
+ vader_score, bert_num, textblob_score = analyze_text(user_input)
144
+ bert_result = bert_sentiment(user_input)[0]
145
+
146
  # Sentence-level analysis
147
  sentences = nltk.sent_tokenize(user_input)
148
+ if len(sentences) > 1:
149
+ dates = [datetime.today() - timedelta(days=len(sentences) - 1 - i) for i in range(len(sentences))]
150
+ sentence_data = [analyze_text(s) for s in sentences]
151
+ df = pd.DataFrame({
152
+ "date": dates,
153
+ "text": sentences,
154
+ "VADER": [d[0] for d in sentence_data],
155
+ "BERT": [d[1] for d in sentence_data],
156
+ "TextBlob": [d[2] for d in sentence_data]
157
+ })
158
+ df["Average"] = df[["VADER", "BERT", "TextBlob"]].mean(axis=1)
159
+ else:
160
+ df = pd.DataFrame()
161
+
162
+ display_sentiment_analysis(vader_score, bert_result, textblob_score, df)
163
  wordcloud_img = f'data:image/png;base64,{generate_wordcloud(user_input)}'
164
+ st.image(wordcloud_img, use_column_width=True, caption="Word Cloud of Input Text")
165
+
166
+ elif analysis_mode == "Live Data (Reddit & YouTube)":
167
+ # Fetch live data with your credentials
168
+ reddit_df = fetch_reddit_data(keyword)
169
+ youtube_df = fetch_youtube_data(keyword)
170
+ df = pd.concat([reddit_df, youtube_df], ignore_index=True)
171
+
172
+ if not df.empty:
173
+ # Analyze sentiment for each post
174
+ sentiment_data = [analyze_text(row['text']) for _, row in df.iterrows()]
175
+ df['VADER'] = [d[0] for d in sentiment_data]
176
+ df['BERT'] = [d[1] for d in sentiment_data]
177
+ df['TextBlob'] = [d[2] for d in sentiment_data]
178
+ df['Average'] = df[['VADER', 'BERT', 'TextBlob']].mean(axis=1)
179
+
180
+ # Overall sentiment for all fetched data
181
+ combined_text = " ".join(df['text'])
182
+ vader_score, bert_num, textblob_score = analyze_text(combined_text)
183
+ bert_result = bert_sentiment(combined_text)[0]
184
+ display_sentiment_analysis(vader_score, bert_result, textblob_score, df)
185
+
186
+ # Word cloud
187
+ wordcloud_img = f'data:image/png;base64,{generate_wordcloud(combined_text)}'
188
+ st.image(wordcloud_img, use_column_width=True, caption=f"Word Cloud for '{keyword}'")
189
+
190
+ # Dynamic 14-day filter based on current date
191
+ df['date'] = pd.to_datetime(df['date'])
192
+ current_date = datetime.today()
193
+ cutoff_date = current_date - timedelta(days=14)
194
+ df_recent = df[df['date'] >= cutoff_date].sort_values('date')
195
+
196
+ # Past sentiment trends
197
+ if not df_recent.empty:
198
+ st.subheader("๐Ÿ“… Past Sentiment Trends (Last 14 Days)")
199
+ fig1 = px.line(df_recent, x='date', y=["VADER", "BERT", "TextBlob", "Average"],
200
+ title=f'Sentiment Over Time for "{keyword}" (Last 14 Days from {current_date.date()})',
201
+ markers=True, line_shape='spline')
202
+ st.plotly_chart(fig1)
203
+
204
+ # Sentiment prediction with Ridge Regression
205
+ st.subheader("๐Ÿ”ฎ Sentiment Prediction for Next 7 Days")
206
+ X = np.array((df_recent['date'] - df_recent['date'].min()).dt.total_seconds() / 86400).reshape(-1, 1)
207
+ future_dates = [current_date + timedelta(days=i) for i in range(1, 8)]
208
+ X_future = np.array(range(int(X[-1]) + 1, int(X[-1]) + 8)).reshape(-1, 1)
209
+
210
+ models = {
211
+ "VADER": Ridge(alpha=1.0).fit(X, df_recent["VADER"]),
212
+ "BERT": Ridge(alpha=1.0).fit(X, df_recent["BERT"]),
213
+ "TextBlob": Ridge(alpha=1.0).fit(X, df_recent["TextBlob"]),
214
+ "Average": Ridge(alpha=1.0).fit(X, df_recent["Average"])
215
+ }
216
+ predictions = {name: model.predict(X_future) for name, model in models.items()}
217
+ future_df = pd.DataFrame({
218
+ "Date": future_dates,
219
+ "VADER": predictions["VADER"],
220
+ "BERT": predictions["BERT"],
221
+ "TextBlob": predictions["TextBlob"],
222
+ "Average": predictions["Average"]
223
+ })
224
+ fig2 = px.line(future_df, x='Date', y=["VADER", "BERT", "TextBlob", "Average"],
225
+ title=f'Predicted Sentiment Trend for "{keyword}" (Next 7 Days from {current_date.date()})',
226
+ markers=True, line_shape='spline')
227
+ st.plotly_chart(fig2)
228
+
229
+ # Sentiment distribution
230
+ st.subheader("๐Ÿ“Š Sentiment Distribution")
231
+ dist_values = [
232
+ sum(df_recent['Average'] > 0),
233
+ sum(df_recent['Average'] < 0),
234
+ sum(df_recent['Average'] == 0)
235
+ ]
236
+ fig3 = px.pie(values=dist_values, names=['Positive', 'Negative', 'Neutral'],
237
+ title=f'Sentiment Distribution for "{keyword}" (Last 14 Days)', hole=0.3)
238
+ st.plotly_chart(fig3)
239
+
240
+ # Sentiment scatter plot
241
+ st.subheader("๐Ÿ”Ž Sentiment Scatter Plot")
242
+ fig4 = px.scatter(df_recent, x='date', y="Average",
243
+ title=f'Sentiment Over Time for "{keyword}" (Last 14 Days)',
244
+ text=df_recent["text"].str[:20] + "...", color="source")
245
+ fig4.update_traces(textposition='top center')
246
+ st.plotly_chart Masters(fig4)
247
+
248
+ # Rolling average
249
+ st.subheader("๐Ÿ“ˆ Rolling Average Sentiment")
250
+ df_recent = df_recent.sort_values('date')
251
+ df_recent['Rolling Avg'] = df_recent['Average'].rolling(window=min(7, len(df_recent)), min_periods=1).mean()
252
+ fig5 = px.line(df_recent, x='date', y='Rolling Avg',
253
+ title=f"Rolling Average Sentiment for '{keyword}' (Last 14 Days, Window: {min(7, len(df_recent))})",
254
+ markers=True)
255
+ st.plotly_chart(fig5)
256
+ else:
257
+ st.info(f"No data within the last 14 days (from {cutoff_date.date()} to {current_date.date()}) for this keyword.")
258
+ else:
259
+ st.error("No data fetched. Check API credentials or keyword.")
260
 
261
  # Reset button
262
  if st.sidebar.button('๐Ÿ”„ Reset Analysis'):