Soundaryasos commited on
Commit
e8b61e5
ยท
verified ยท
1 Parent(s): 78b064a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +346 -207
app.py CHANGED
@@ -13,251 +13,390 @@ import nltk
13
  from textblob import TextBlob
14
  import praw
15
  from googleapiclient.discovery import build
 
16
 
17
- # Download NLTK data
18
- nltk.download('punkt')
19
- nltk.download('stopwords')
20
 
21
- # Initialize sentiment models
22
- bert_sentiment = pipeline("sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment")
23
- vader_analyzer = SentimentIntensityAnalyzer()
24
-
25
- # Reddit API setup with your credentials
26
- REDDIT_CLIENT_ID = "S7pTXhj5JDFGDb3-_zrJEA"
27
- REDDIT_CLIENT_SECRET = "QP3NYN4lrAKVLrBamzLGrpFywiVg8w"
28
- REDDIT_USER_AGENT = "SoundaryaR_Bot/1.0"
29
- reddit = praw.Reddit(
30
- client_id=REDDIT_CLIENT_ID,
31
- client_secret=REDDIT_CLIENT_SECRET,
32
- user_agent=REDDIT_USER_AGENT
33
  )
34
 
35
- # YouTube API setup with your API key
36
- YOUTUBE_API_KEY = "AIzaSyAChqXPaiNE9hKhApkgjgonzdgiCCOo"
37
- youtube = build('youtube', 'v3', developerKey=YOUTUBE_API_KEY)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
- # Helper function to convert BERT label to numerical score
40
  def bert_score(result):
41
- label = result['label']
42
- if label == '1 star': return -1
43
- elif label == '2 stars': return -0.5
44
- elif label == '3 stars': return 0
45
- elif label == '4 stars': return 0.5
46
- elif label == '5 stars': return 1
47
- return 0
 
 
48
 
49
- # Analyze sentiment and return scores from all models
50
  def analyze_text(text):
51
- vader_score = vader_analyzer.polarity_scores(text)['compound']
52
- bert_result = bert_sentiment(text)[0]
53
- bert_num = bert_score(bert_result)
54
- textblob_score = TextBlob(text).sentiment.polarity
55
- return vader_score, bert_num, textblob_score
 
 
 
 
 
56
 
57
- # Fetch live Reddit data
58
- @st.cache_data
59
- def fetch_reddit_data(keyword):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  try:
61
  subreddit = reddit.subreddit("all")
62
- posts = subreddit.search(keyword, limit=100)
 
63
  data = []
64
  for post in posts:
65
  data.append({
66
  'date': datetime.fromtimestamp(post.created_utc),
67
- 'text': post.title + " " + post.selftext,
68
- 'source': 'Reddit'
 
69
  })
70
  return pd.DataFrame(data)
71
  except Exception as e:
72
- st.error(f"Error fetching Reddit data: {e}")
73
  return pd.DataFrame()
74
 
75
- # Fetch live YouTube data
76
- @st.cache_data
77
- def fetch_youtube_data(keyword):
78
  try:
79
- request = youtube.search().list(q=keyword, part="snippet", maxResults=50, type="video")
 
 
 
 
 
 
80
  response = request.execute()
 
81
  data = []
82
  for item in response['items']:
83
- title = item['snippet']['title']
84
- description = item['snippet']['description']
85
- published_at = datetime.strptime(item['snippet']['publishedAt'], '%Y-%m-%dT%H:%M:%SZ')
86
  data.append({
87
- 'date': published_at,
88
- 'text': title + " " + description,
89
- 'source': 'YouTube'
 
90
  })
91
  return pd.DataFrame(data)
92
  except Exception as e:
93
- st.error(f"Error fetching YouTube data: {e}")
94
  return pd.DataFrame()
95
 
96
- # Generate Word Cloud
97
- def generate_wordcloud(text):
98
- wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
99
- img = BytesIO()
100
- wordcloud.to_image().save(img, format='PNG')
101
- return base64.b64encode(img.getvalue()).decode()
102
-
103
- # Streamlit app setup
104
- st.title("๐ŸŒŸ SentimentSync: Live Sentiment Analysis Dashboard")
105
 
106
- # Sidebar for user input and keyword
107
- st.sidebar.header("๐Ÿ” Sentiment Analysis Controls")
108
- analysis_mode = st.sidebar.radio("Analysis Mode", ["Manual Text", "Live Data (Reddit & YouTube)"])
109
- if analysis_mode == "Manual Text":
110
- user_input = st.sidebar.text_area("Enter text for sentiment analysis", height=200)
111
- else:
112
- keyword = st.sidebar.text_input("Enter keyword for live data (e.g., 'Tesla')")
113
-
114
- # Display sentiment analysis results
115
- def display_sentiment_analysis(vader_score, bert_result, textblob_score, df=None):
116
- st.subheader("๐Ÿ“Š Sentiment Analysis Results")
117
- st.write(f"**VADER Sentiment Score**: {vader_score:.2f}")
118
- st.write(f"**BERT Sentiment**: {bert_result['label']} ({bert_result['score']:.2f})")
119
- st.write(f"**TextBlob Sentiment Polarity**: {textblob_score:.2f}")
 
 
 
 
 
 
120
 
121
- sentiment_data = {
122
- 'Positive': max(0, vader_score),
123
- 'Negative': min(0, vader_score),
124
- 'Neutral': 1 - abs(vader_score)
125
- }
126
- sentiment_df_overall = pd.DataFrame(list(sentiment_data.items()), columns=["Sentiment", "Score"])
127
- st.bar_chart(sentiment_df_overall.set_index("Sentiment"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
 
129
- if df is not None and not df.empty:
130
- st.subheader("๐Ÿ” Detailed Sentiment Data")
131
- st.write(df[['date', 'text', 'VADER', 'BERT', 'TextBlob', 'Average']])
132
 
133
- # Process and analyze
134
- if st.sidebar.button("Analyze Sentiment"):
135
- if analysis_mode == "Manual Text" and (not user_input or not any(c.isalpha() for c in user_input)):
136
- st.warning("โš ๏ธ Please enter text with at least one alphabetic character.")
137
- elif analysis_mode == "Live Data (Reddit & YouTube)" and not keyword:
138
- st.warning("โš ๏ธ Please enter a keyword for live data analysis.")
139
- else:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  with st.spinner("Analyzing..."):
141
  if analysis_mode == "Manual Text":
142
- # Overall sentiment for manual input
143
- vader_score, bert_num, textblob_score = analyze_text(user_input)
144
- bert_result = bert_sentiment(user_input)[0]
145
 
146
- # Sentence-level analysis
147
- sentences = nltk.sent_tokenize(user_input)
148
- if len(sentences) > 1:
149
- dates = [datetime.today() - timedelta(days=len(sentences) - 1 - i) for i in range(len(sentences))]
150
- sentence_data = [analyze_text(s) for s in sentences]
151
- df = pd.DataFrame({
152
- "date": dates,
153
- "text": sentences,
154
- "VADER": [d[0] for d in sentence_data],
155
- "BERT": [d[1] for d in sentence_data],
156
- "TextBlob": [d[2] for d in sentence_data]
157
- })
158
- df["Average"] = df[["VADER", "BERT", "TextBlob"]].mean(axis=1)
159
- else:
160
- df = pd.DataFrame()
161
-
162
- display_sentiment_analysis(vader_score, bert_result, textblob_score, df)
163
  wordcloud_img = f'data:image/png;base64,{generate_wordcloud(user_input)}'
164
- st.image(wordcloud_img, use_column_width=True, caption="Word Cloud of Input Text")
165
-
166
- elif analysis_mode == "Live Data (Reddit & YouTube)":
167
- # Fetch live data with your credentials
168
- reddit_df = fetch_reddit_data(keyword)
169
- youtube_df = fetch_youtube_data(keyword)
170
- df = pd.concat([reddit_df, youtube_df], ignore_index=True)
171
-
172
- if not df.empty:
173
- # Analyze sentiment for each post
174
- sentiment_data = [analyze_text(row['text']) for _, row in df.iterrows()]
175
- df['VADER'] = [d[0] for d in sentiment_data]
176
- df['BERT'] = [d[1] for d in sentiment_data]
177
- df['TextBlob'] = [d[2] for d in sentiment_data]
178
- df['Average'] = df[['VADER', 'BERT', 'TextBlob']].mean(axis=1)
179
-
180
- # Overall sentiment for all fetched data
181
- combined_text = " ".join(df['text'])
182
- vader_score, bert_num, textblob_score = analyze_text(combined_text)
183
- bert_result = bert_sentiment(combined_text)[0]
184
- display_sentiment_analysis(vader_score, bert_result, textblob_score, df)
185
-
186
- # Word cloud
187
- wordcloud_img = f'data:image/png;base64,{generate_wordcloud(combined_text)}'
188
- st.image(wordcloud_img, use_column_width=True, caption=f"Word Cloud for '{keyword}'")
189
-
190
- # Dynamic 14-day filter based on current date
191
- df['date'] = pd.to_datetime(df['date'])
192
- current_date = datetime.today()
193
- cutoff_date = current_date - timedelta(days=14)
194
- df_recent = df[df['date'] >= cutoff_date].sort_values('date')
195
-
196
- # Past sentiment trends
197
- if not df_recent.empty:
198
- st.subheader("๐Ÿ“… Past Sentiment Trends (Last 14 Days)")
199
- fig1 = px.line(df_recent, x='date', y=["VADER", "BERT", "TextBlob", "Average"],
200
- title=f'Sentiment Over Time for "{keyword}" (Last 14 Days from {current_date.date()})',
201
- markers=True, line_shape='spline')
202
- st.plotly_chart(fig1)
203
-
204
- # Sentiment prediction with Ridge Regression
205
- st.subheader("๐Ÿ”ฎ Sentiment Prediction for Next 7 Days")
206
- X = np.array((df_recent['date'] - df_recent['date'].min()).dt.total_seconds() / 86400).reshape(-1, 1)
207
- future_dates = [current_date + timedelta(days=i) for i in range(1, 8)]
208
- X_future = np.array(range(int(X[-1]) + 1, int(X[-1]) + 8)).reshape(-1, 1)
209
-
210
- models = {
211
- "VADER": Ridge(alpha=1.0).fit(X, df_recent["VADER"]),
212
- "BERT": Ridge(alpha=1.0).fit(X, df_recent["BERT"]),
213
- "TextBlob": Ridge(alpha=1.0).fit(X, df_recent["TextBlob"]),
214
- "Average": Ridge(alpha=1.0).fit(X, df_recent["Average"])
215
- }
216
- predictions = {name: model.predict(X_future) for name, model in models.items()}
217
- future_df = pd.DataFrame({
218
- "Date": future_dates,
219
- "VADER": predictions["VADER"],
220
- "BERT": predictions["BERT"],
221
- "TextBlob": predictions["TextBlob"],
222
- "Average": predictions["Average"]
223
  })
224
- fig2 = px.line(future_df, x='Date', y=["VADER", "BERT", "TextBlob", "Average"],
225
- title=f'Predicted Sentiment Trend for "{keyword}" (Next 7 Days from {current_date.date()})',
226
- markers=True, line_shape='spline')
227
- st.plotly_chart(fig2)
228
-
229
- # Sentiment distribution
230
- st.subheader("๐Ÿ“Š Sentiment Distribution")
231
- dist_values = [
232
- sum(df_recent['Average'] > 0),
233
- sum(df_recent['Average'] < 0),
234
- sum(df_recent['Average'] == 0)
235
- ]
236
- fig3 = px.pie(values=dist_values, names=['Positive', 'Negative', 'Neutral'],
237
- title=f'Sentiment Distribution for "{keyword}" (Last 14 Days)', hole=0.3)
238
- st.plotly_chart(fig3)
239
-
240
- # Sentiment scatter plot (corrected line)
241
- st.subheader("๐Ÿ”Ž Sentiment Scatter Plot")
242
- fig4 = px.scatter(df_recent, x='date', y="Average",
243
- title=f'Sentiment Over Time for "{keyword}" (Last 14 Days)',
244
- text=df_recent["text"].str[:20] + "...", color="source")
245
- fig4.update_traces(textposition='top center')
246
- st.plotly_chart(fig4) # Fixed syntax error here
247
-
248
- # Rolling average
249
- st.subheader("๐Ÿ“ˆ Rolling Average Sentiment")
250
- df_recent = df_recent.sort_values('date')
251
- df_recent['Rolling Avg'] = df_recent['Average'].rolling(window=min(7, len(df_recent)), min_periods=1).mean()
252
- fig5 = px.line(df_recent, x='date', y='Rolling Avg',
253
- title=f"Rolling Average Sentiment for '{keyword}' (Last 14 Days, Window: {min(7, len(df_recent))})",
254
- markers=True)
255
- st.plotly_chart(fig5)
256
- else:
257
- st.info(f"No data within the last 14 days (from {cutoff_date.date()} to {current_date.date()}) for this keyword.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
  else:
259
- st.error("No data fetched. Check API credentials or keyword.")
260
 
261
- # Reset button
262
- if st.sidebar.button('๐Ÿ”„ Reset Analysis'):
263
- st.experimental_rerun()
 
13
  from textblob import TextBlob
14
  import praw
15
  from googleapiclient.discovery import build
16
+ import os
17
 
18
+ # --------------------------
19
+ # Initial Setup & Configuration
20
+ # --------------------------
21
 
22
+ # Set page config
23
+ st.set_page_config(
24
+ page_title="๐ŸŒŸ SentimentSync: Live Sentiment Analysis Dashboard",
25
+ page_icon="๐Ÿ“Š",
26
+ layout="wide"
 
 
 
 
 
 
 
27
  )
28
 
29
+ # --------------------------
30
+ # NLTK Data Download
31
+ # --------------------------
32
+
33
+ def download_nltk_data():
34
+ try:
35
+ nltk_data_dir = os.path.join(os.path.expanduser("~"), "nltk_data")
36
+ if not os.path.exists(nltk_data_dir):
37
+ os.makedirs(nltk_data_dir)
38
+
39
+ nltk.download('punkt', download_dir=nltk_data_dir)
40
+ nltk.download('stopwords', download_dir=nltk_data_dir)
41
+ nltk.download('punkt_tab', download_dir=nltk_data_dir)
42
+ nltk.data.path.append(nltk_data_dir)
43
+ except Exception as e:
44
+ st.error(f"Error downloading NLTK data: {str(e)}")
45
+ return False
46
+ return True
47
+
48
+ if not download_nltk_data():
49
+ st.warning("Some NLTK features may not work properly without the required data files.")
50
+
51
+ # --------------------------
52
+ # Model Initialization
53
+ # --------------------------
54
+
55
+ @st.cache_resource
56
+ def load_models():
57
+ try:
58
+ # Initialize sentiment models
59
+ bert_sentiment = pipeline(
60
+ "sentiment-analysis",
61
+ model="nlptown/bert-base-multilingual-uncased-sentiment"
62
+ )
63
+ vader_analyzer = SentimentIntensityAnalyzer()
64
+ return bert_sentiment, vader_analyzer
65
+ except Exception as e:
66
+ st.error(f"Error loading models: {str(e)}")
67
+ return None, None
68
+
69
+ bert_sentiment, vader_analyzer = load_models()
70
+
71
+ if bert_sentiment is None or vader_analyzer is None:
72
+ st.stop()
73
+
74
+ # --------------------------
75
+ # API Clients Setup
76
+ # --------------------------
77
+
78
+ @st.cache_resource
79
+ def setup_api_clients():
80
+ try:
81
+ # Reddit API setup
82
+ reddit = praw.Reddit(
83
+ client_id="S7pTXhj5JDFGDb3-_zrJEA",
84
+ client_secret="QP3NYN4lrAKVLrBamzLGrpFywiVg8w",
85
+ user_agent="SoundaryaR_Bot/1.0"
86
+ )
87
+
88
+ # YouTube API setup
89
+ youtube = build('youtube', 'v3', developerKey="AIzaSyAChqXPaiNE9hKhApkgjgonzdgiCCOo")
90
+
91
+ return reddit, youtube
92
+ except Exception as e:
93
+ st.error(f"Error setting up API clients: {str(e)}")
94
+ return None, None
95
+
96
+ reddit, youtube = setup_api_clients()
97
+
98
+ if reddit is None or youtube is None:
99
+ st.stop()
100
+
101
+ # --------------------------
102
+ # Helper Functions
103
+ # --------------------------
104
 
 
105
  def bert_score(result):
106
+ """Convert BERT label to numerical score"""
107
+ label_map = {
108
+ '1 star': -1,
109
+ '2 stars': -0.5,
110
+ '3 stars': 0,
111
+ '4 stars': 0.5,
112
+ '5 stars': 1
113
+ }
114
+ return label_map.get(result['label'], 0)
115
 
 
116
  def analyze_text(text):
117
+ """Analyze sentiment using multiple models"""
118
+ try:
119
+ vader_score = vader_analyzer.polarity_scores(text)['compound']
120
+ bert_result = bert_sentiment(text[:512])[0] # Truncate to avoid token limits
121
+ bert_num = bert_score(bert_result)
122
+ textblob_score = TextBlob(text).sentiment.polarity
123
+ return vader_score, bert_num, textblob_score, bert_result
124
+ except Exception as e:
125
+ st.error(f"Error analyzing text: {str(e)}")
126
+ return 0, 0, 0, {'label': 'Error', 'score': 0}
127
 
128
+ def generate_wordcloud(text):
129
+ """Generate word cloud image"""
130
+ try:
131
+ wordcloud = WordCloud(
132
+ width=800,
133
+ height=400,
134
+ background_color='white',
135
+ stopwords=nltk.corpus.stopwords.words('english')
136
+ ).generate(text)
137
+
138
+ img = BytesIO()
139
+ wordcloud.to_image().save(img, format='PNG')
140
+ return base64.b64encode(img.getvalue()).decode()
141
+ except Exception as e:
142
+ st.error(f"Error generating word cloud: {str(e)}")
143
+ return ""
144
+
145
+ # --------------------------
146
+ # Data Fetching Functions
147
+ # --------------------------
148
+
149
+ @st.cache_data(ttl=3600) # Cache for 1 hour
150
+ def fetch_reddit_data(keyword, limit=50):
151
+ """Fetch Reddit posts containing the keyword"""
152
  try:
153
  subreddit = reddit.subreddit("all")
154
+ posts = subreddit.search(keyword, limit=limit)
155
+
156
  data = []
157
  for post in posts:
158
  data.append({
159
  'date': datetime.fromtimestamp(post.created_utc),
160
+ 'text': f"{post.title}\n{post.selftext}",
161
+ 'source': 'Reddit',
162
+ 'url': f"https://reddit.com{post.permalink}"
163
  })
164
  return pd.DataFrame(data)
165
  except Exception as e:
166
+ st.error(f"Error fetching Reddit data: {str(e)}")
167
  return pd.DataFrame()
168
 
169
+ @st.cache_data(ttl=3600) # Cache for 1 hour
170
+ def fetch_youtube_data(keyword, limit=25):
171
+ """Fetch YouTube videos containing the keyword"""
172
  try:
173
+ request = youtube.search().list(
174
+ q=keyword,
175
+ part="snippet",
176
+ maxResults=limit,
177
+ type="video",
178
+ order="relevance"
179
+ )
180
  response = request.execute()
181
+
182
  data = []
183
  for item in response['items']:
 
 
 
184
  data.append({
185
+ 'date': datetime.strptime(item['snippet']['publishedAt'], '%Y-%m-%dT%H:%M:%SZ'),
186
+ 'text': f"{item['snippet']['title']}\n{item['snippet']['description']}",
187
+ 'source': 'YouTube',
188
+ 'url': f"https://youtube.com/watch?v={item['id']['videoId']}"
189
  })
190
  return pd.DataFrame(data)
191
  except Exception as e:
192
+ st.error(f"Error fetching YouTube data: {str(e)}")
193
  return pd.DataFrame()
194
 
195
+ # --------------------------
196
+ # Visualization Functions
197
+ # --------------------------
 
 
 
 
 
 
198
 
199
+ def plot_sentiment_trends(df, keyword):
200
+ """Plot sentiment trends over time"""
201
+ try:
202
+ fig = px.line(
203
+ df,
204
+ x='date',
205
+ y=["VADER", "BERT", "TextBlob", "Average"],
206
+ title=f'Sentiment Over Time for "{keyword}"',
207
+ labels={'value': 'Sentiment Score', 'date': 'Date'},
208
+ color_discrete_map={
209
+ "VADER": "#636EFA",
210
+ "BERT": "#EF553B",
211
+ "TextBlob": "#00CC96",
212
+ "Average": "#AB63FA"
213
+ }
214
+ )
215
+ fig.update_layout(hovermode="x unified")
216
+ st.plotly_chart(fig, use_container_width=True)
217
+ except Exception as e:
218
+ st.error(f"Error plotting sentiment trends: {str(e)}")
219
 
220
+ def plot_sentiment_distribution(df, keyword):
221
+ """Plot sentiment distribution"""
222
+ try:
223
+ dist_values = [
224
+ sum(df['Average'] > 0.1), # Positive
225
+ sum(df['Average'] < -0.1), # Negative
226
+ sum((df['Average'] >= -0.1) & (df['Average'] <= 0.1)) # Neutral
227
+ ]
228
+
229
+ fig = px.pie(
230
+ values=dist_values,
231
+ names=['Positive', 'Negative', 'Neutral'],
232
+ title=f'Sentiment Distribution for "{keyword}"',
233
+ color=['Positive', 'Negative', 'Neutral'],
234
+ color_discrete_map={
235
+ 'Positive': '#00CC96',
236
+ 'Negative': '#EF553B',
237
+ 'Neutral': '#636EFA'
238
+ },
239
+ hole=0.3
240
+ )
241
+ st.plotly_chart(fig, use_container_width=True)
242
+ except Exception as e:
243
+ st.error(f"Error plotting sentiment distribution: {str(e)}")
244
 
245
+ # --------------------------
246
+ # Main App Interface
247
+ # --------------------------
248
 
249
+ def main():
250
+ st.title("๐ŸŒŸ SentimentSync: Live Sentiment Analysis Dashboard")
251
+
252
+ # Sidebar controls
253
+ with st.sidebar:
254
+ st.header("๐Ÿ” Analysis Controls")
255
+ analysis_mode = st.radio(
256
+ "Analysis Mode",
257
+ ["Manual Text", "Live Data (Reddit & YouTube)"],
258
+ index=0
259
+ )
260
+
261
+ if analysis_mode == "Manual Text":
262
+ user_input = st.text_area(
263
+ "Enter text for sentiment analysis",
264
+ height=200,
265
+ placeholder="Type or paste your text here..."
266
+ )
267
+ analyze_btn = st.button("Analyze Text")
268
+ else:
269
+ keyword = st.text_input(
270
+ "Enter keyword for live data",
271
+ placeholder="e.g., Tesla, Bitcoin, etc."
272
+ )
273
+ analyze_btn = st.button("Fetch & Analyze Data")
274
+
275
+ st.markdown("---")
276
+ st.markdown("### Settings")
277
+ show_raw_data = st.checkbox("Show raw data", value=False)
278
+ st.markdown("---")
279
+ st.button("๐Ÿ”„ Reset Analysis")
280
+
281
+ # Main content area
282
+ if analyze_btn:
283
  with st.spinner("Analyzing..."):
284
  if analysis_mode == "Manual Text":
285
+ if not user_input or not any(c.isalpha() for c in user_input):
286
+ st.warning("Please enter valid text for analysis")
287
+ return
288
 
289
+ # Analyze the text
290
+ vader_score, bert_num, textblob_score, bert_result = analyze_text(user_input)
291
+
292
+ # Display results
293
+ st.subheader("๐Ÿ“Š Sentiment Analysis Results")
294
+ cols = st.columns(3)
295
+ cols[0].metric("VADER Score", f"{vader_score:.2f}",
296
+ "Positive" if vader_score > 0 else "Negative" if vader_score < 0 else "Neutral")
297
+ cols[1].metric("BERT Sentiment", bert_result['label'], f"Confidence: {bert_result['score']:.2f}")
298
+ cols[2].metric("TextBlob Polarity", f"{textblob_score:.2f}",
299
+ "Positive" if textblob_score > 0 else "Negative" if textblob_score < 0 else "Neutral")
300
+
301
+ # Word cloud
302
+ st.subheader("๏ฟฝ๏ฟฝ๏ฟฝ Word Cloud")
 
 
 
303
  wordcloud_img = f'data:image/png;base64,{generate_wordcloud(user_input)}'
304
+ st.image(wordcloud_img, use_column_width=True)
305
+
306
+ # Sentence-level analysis
307
+ try:
308
+ sentences = nltk.sent_tokenize(user_input)
309
+ if len(sentences) > 1:
310
+ st.subheader("๐Ÿ” Sentence-level Analysis")
311
+ dates = [datetime.now() - timedelta(minutes=len(sentences)-i) for i in range(len(sentences))]
312
+ sentence_data = [analyze_text(s) for s in sentences]
313
+
314
+ df = pd.DataFrame({
315
+ "Sentence": sentences,
316
+ "VADER": [d[0] for d in sentence_data],
317
+ "BERT": [d[1] for d in sentence_data],
318
+ "TextBlob": [d[2] for d in sentence_data]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
319
  })
320
+ df["Average"] = df[["VADER", "BERT", "TextBlob"]].mean(axis=1)
321
+
322
+ st.dataframe(df.style.background_gradient(
323
+ cmap='RdYlGn',
324
+ subset=["VADER", "BERT", "TextBlob", "Average"],
325
+ vmin=-1, vmax=1
326
+ ), use_container_width=True)
327
+
328
+ plot_sentiment_trends(df, "Your Text")
329
+ except Exception as e:
330
+ st.error(f"Error in sentence analysis: {str(e)}")
331
+
332
+ else: # Live Data mode
333
+ if not keyword:
334
+ st.warning("Please enter a keyword to search")
335
+ return
336
+
337
+ # Fetch data
338
+ with st.spinner(f"Fetching data for '{keyword}'..."):
339
+ reddit_df = fetch_reddit_data(keyword)
340
+ youtube_df = fetch_youtube_data(keyword)
341
+
342
+ if reddit_df.empty and youtube_df.empty:
343
+ st.error("No data found. Try a different keyword.")
344
+ return
345
+
346
+ df = pd.concat([reddit_df, youtube_df], ignore_index=True)
347
+
348
+ # Analyze sentiment for each item
349
+ with st.spinner("Analyzing sentiment..."):
350
+ results = []
351
+ for _, row in df.iterrows():
352
+ vader, bert, textblob, _ = analyze_text(row['text'])
353
+ results.append((vader, bert, textblob))
354
+
355
+ df['VADER'] = [r[0] for r in results]
356
+ df['BERT'] = [r[1] for r in results]
357
+ df['TextBlob'] = [r[2] for r in results]
358
+ df['Average'] = df[['VADER', 'BERT', 'TextBlob']].mean(axis=1)
359
+
360
+ # Display results
361
+ st.subheader(f"๐Ÿ“Š Overall Sentiment for '{keyword}'")
362
+
363
+ # Metrics
364
+ avg_sentiment = df['Average'].mean()
365
+ pos_pct = len(df[df['Average'] > 0.1]) / len(df) * 100
366
+ neg_pct = len(df[df['Average'] < -0.1]) / len(df) * 100
367
+
368
+ cols = st.columns(3)
369
+ cols[0].metric("Average Sentiment", f"{avg_sentiment:.2f}",
370
+ "Positive" if avg_sentiment > 0 else "Negative" if avg_sentiment < 0 else "Neutral")
371
+ cols[1].metric("Positive Content", f"{pos_pct:.1f}%")
372
+ cols[2].metric("Negative Content", f"{neg_pct:.1f}%")
373
+
374
+ # Word cloud
375
+ st.subheader("๐Ÿ“ Word Cloud")
376
+ combined_text = " ".join(df['text'])
377
+ wordcloud_img = f'data:image/png;base64,{generate_wordcloud(combined_text)}'
378
+ st.image(wordcloud_img, use_column_width=True)
379
+
380
+ # Filter recent data (last 14 days)
381
+ df['date'] = pd.to_datetime(df['date'])
382
+ cutoff_date = datetime.now() - timedelta(days=14)
383
+ df_recent = df[df['date'] >= cutoff_date].sort_values('date')
384
+
385
+ if not df_recent.empty:
386
+ # Sentiment trends
387
+ st.subheader("๐Ÿ“… Sentiment Trends (Last 14 Days)")
388
+ plot_sentiment_trends(df_recent, keyword)
389
+
390
+ # Sentiment distribution
391
+ st.subheader("๐Ÿ“Š Sentiment Distribution")
392
+ plot_sentiment_distribution(df_recent, keyword)
393
+
394
+ # Raw data (if enabled)
395
+ if show_raw_data:
396
+ st.subheader("๐Ÿ“‹ Raw Data")
397
+ st.dataframe(df_recent[['date', 'source', 'text', 'Average']], use_container_width=True)
398
  else:
399
+ st.info("No recent data found (within last 14 days).")
400
 
401
+ if __name__ == "__main__":
402
+ main()