Soundaryasos commited on
Commit
78be88f
Β·
verified Β·
1 Parent(s): 96222b0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +307 -399
app.py CHANGED
@@ -1,493 +1,401 @@
1
  import streamlit as st
2
- import pandas as pd
 
3
  import numpy as np
 
4
  from datetime import datetime, timedelta
5
- import nltk
6
- from nltk.sentiment import SentimentIntensityAnalyzer
7
- from textblob import TextBlob
8
- from transformers import pipeline
9
  from wordcloud import WordCloud
10
  import base64
11
  from io import BytesIO
12
- import plotly.express as px
 
13
  import praw
14
  from googleapiclient.discovery import build
15
- from sklearn.linear_model import Ridge
16
  import os
17
- import warnings
18
-
19
- # Suppress the ScriptRunContext warning
20
- warnings.filterwarnings("ignore", message="missing ScriptRunContext")
21
 
22
  # --------------------------
23
- # Initial Setup
24
  # --------------------------
25
 
26
- # Configure page
27
  st.set_page_config(
28
- page_title="SentimentSync Pro",
29
- page_icon="πŸ“ˆ",
30
- layout="wide",
31
- initial_sidebar_state="expanded"
32
  )
33
 
34
  # --------------------------
35
- # Configuration
36
  # --------------------------
37
 
38
- class Config:
39
- # API Keys - Replace with your actual keys or use environment variables
40
- YOUTUBE_API_KEY = os.getenv("YT_API_KEY", "AIzaSyDcUAkcoPvkTwN_tksmiW0dVPI5Bse7qos")
41
- REDDIT_CLIENT_ID = os.getenv("REDDIT_CLIENT_ID", "your_reddit_client_id")
42
- REDDIT_CLIENT_SECRET = os.getenv("REDDIT_CLIENT_SECRET", "your_reddit_secret")
43
- REDDIT_USER_AGENT = "SentimentAnalysisBot/1.0"
44
-
45
- # NLTK data path
46
- NLTK_DATA_PATH = os.path.join(os.path.expanduser("~"), "nltk_data")
47
-
48
- # Sentiment thresholds
49
- POSITIVE_THRESHOLD = 0.1
50
- NEGATIVE_THRESHOLD = -0.1
 
 
 
 
51
 
52
  # --------------------------
53
- # Initialize Resources
54
  # --------------------------
55
 
56
- def initialize_resources():
57
- """Initialize all required resources with proper error handling"""
58
  try:
59
- # Setup NLTK data
60
- os.makedirs(Config.NLTK_DATA_PATH, exist_ok=True)
61
- nltk.data.path.append(Config.NLTK_DATA_PATH)
62
-
63
- required_nltk = ['punkt', 'stopwords', 'vader_lexicon']
64
- for package in required_nltk:
65
- try:
66
- nltk.data.find(f'tokenizers/{package}')
67
- except LookupError:
68
- nltk.download(package, download_dir=Config.NLTK_DATA_PATH)
69
- except Exception as e:
70
- st.error(f"NLTK initialization failed: {str(e)}")
71
- return False
72
-
73
- # Initialize sentiment analyzers
74
- try:
75
- st.session_state.vader = SentimentIntensityAnalyzer()
76
- st.session_state.bert = pipeline(
77
  "sentiment-analysis",
78
  model="nlptown/bert-base-multilingual-uncased-sentiment"
79
  )
 
 
80
  except Exception as e:
81
- st.error(f"Model initialization failed: {str(e)}")
82
- return False
83
-
84
- # Initialize API clients
 
 
 
 
 
 
 
 
 
 
85
  try:
86
- st.session_state.reddit = praw.Reddit(
87
- client_id=Config.REDDIT_CLIENT_ID,
88
- client_secret=Config.REDDIT_CLIENT_SECRET,
89
- user_agent=Config.REDDIT_USER_AGENT
 
90
  )
 
 
 
 
91
  except Exception as e:
92
- st.error(f"Reddit client initialization failed: {str(e)}")
93
- st.session_state.reddit = None
94
-
95
- try:
96
- if Config.YOUTUBE_API_KEY.startswith("your_"):
97
- st.session_state.youtube = None
98
- else:
99
- st.session_state.youtube = build(
100
- 'youtube',
101
- 'v3',
102
- developerKey=Config.YOUTUBE_API_KEY,
103
- cache_discovery=False
104
- )
105
- except Exception as e:
106
- st.error(f"YouTube client initialization failed: {str(e)}")
107
- st.session_state.youtube = None
108
-
109
- return True
110
 
111
  # --------------------------
112
- # Core Functions
113
  # --------------------------
114
 
115
- def analyze_sentiment(text):
116
- """Analyze text using multiple sentiment models"""
117
- results = {
118
- 'vader': 0,
119
- 'bert': 0,
120
- 'textblob': 0,
121
- 'bert_label': 'Error',
122
- 'bert_score': 0
123
  }
124
-
 
 
 
125
  try:
126
- # VADER
127
- results['vader'] = st.session_state.vader.polarity_scores(text)['compound']
128
-
129
- # BERT (with truncation for long texts)
130
- bert_result = st.session_state.bert(text[:512])[0]
131
- results['bert_label'] = bert_result['label']
132
- results['bert_score'] = bert_result['score']
133
-
134
- # Convert BERT label to numeric score
135
- label_map = {
136
- '1 star': -1,
137
- '2 stars': -0.5,
138
- '3 stars': 0,
139
- '4 stars': 0.5,
140
- '5 stars': 1
141
- }
142
- results['bert'] = label_map.get(bert_result['label'], 0)
143
-
144
- # TextBlob
145
- results['textblob'] = TextBlob(text).sentiment.polarity
146
-
147
  except Exception as e:
148
- st.error(f"Sentiment analysis error: {str(e)}")
149
-
150
- return results
151
 
152
- def fetch_youtube_data(keyword, max_results=25):
153
- """Fetch YouTube data with enhanced error handling"""
154
- if st.session_state.youtube is None:
155
- st.warning("YouTube API not configured")
156
- return pd.DataFrame()
157
-
158
  try:
159
- # Search for videos
160
- search_response = st.session_state.youtube.search().list(
161
- q=keyword,
162
- part="snippet",
163
- maxResults=max_results,
164
- type="video",
165
- order="relevance",
166
- safeSearch="moderate"
167
- ).execute()
168
 
169
- # Get video details
170
- video_ids = [item['id']['videoId'] for item in search_response['items']]
171
- videos_response = st.session_state.youtube.videos().list(
172
- part="snippet,statistics",
173
- id=",".join(video_ids)
174
- ).execute()
 
 
 
 
 
 
 
 
 
 
 
175
 
176
- # Process results
177
  data = []
178
- for item in videos_response['items']:
179
- snippet = item['snippet']
180
- stats = item.get('statistics', {})
181
-
182
  data.append({
183
- 'source': 'YouTube',
184
- 'date': datetime.strptime(snippet['publishedAt'], '%Y-%m-%dT%H:%M:%SZ'),
185
- 'title': snippet['title'],
186
- 'text': f"{snippet['title']}\n{snippet['description']}",
187
- 'url': f"https://youtu.be/{item['id']}",
188
- 'views': int(stats.get('viewCount', 0)),
189
- 'likes': int(stats.get('likeCount', 0)),
190
- 'comments': int(stats.get('commentCount', 0)),
191
- 'thumbnail': snippet['thumbnails']['default']['url']
192
  })
193
-
194
  return pd.DataFrame(data)
195
-
196
  except Exception as e:
197
- st.error(f"Error fetching YouTube data: {str(e)}")
198
  return pd.DataFrame()
199
 
200
- def fetch_reddit_data(keyword, limit=50):
201
- """Fetch Reddit posts with error handling"""
202
- if st.session_state.reddit is None:
203
- st.warning("Reddit API not configured")
204
- return pd.DataFrame()
205
-
206
  try:
207
- posts = st.session_state.reddit.subreddit("all").search(
208
- query=keyword,
209
- limit=limit,
210
- time_filter="month"
 
 
211
  )
 
212
 
213
  data = []
214
- for post in posts:
215
  data.append({
216
- 'source': 'Reddit',
217
- 'date': datetime.fromtimestamp(post.created_utc),
218
- 'title': post.title,
219
- 'text': f"{post.title}\n\n{post.selftext}",
220
- 'url': f"https://reddit.com{post.permalink}",
221
- 'upvotes': post.score,
222
- 'comments': post.num_comments,
223
- 'thumbnail': post.thumbnail if post.thumbnail not in ['self', 'default'] else None
224
  })
225
-
226
  return pd.DataFrame(data)
227
-
228
  except Exception as e:
229
- st.error(f"Error fetching Reddit data: {str(e)}")
230
  return pd.DataFrame()
231
 
232
  # --------------------------
233
  # Visualization Functions
234
  # --------------------------
235
 
236
- def create_wordcloud(text):
237
- """Generate a word cloud with proper error handling"""
238
  try:
239
- wc = WordCloud(
240
- width=800,
241
- height=400,
242
- background_color='white',
243
- stopwords=set(nltk.corpus.stopwords.words('english')),
244
- collocations=False
245
- ).generate(text)
246
-
247
- img = BytesIO()
248
- wc.to_image().save(img, format='PNG')
249
- return base64.b64encode(img.getvalue()).decode()
 
 
 
 
250
  except Exception as e:
251
- st.error(f"Word cloud error: {str(e)}")
252
- return None
253
 
254
- def plot_sentiment_timeline(df):
255
- """Interactive timeline plot of sentiment"""
256
  try:
257
- fig = px.line(
258
- df,
259
- x='date',
260
- y='average_sentiment',
261
- color='source',
262
- title='Sentiment Over Time',
263
- labels={'average_sentiment': 'Sentiment Score', 'date': 'Date'},
264
- hover_data=['title', 'source', 'url'],
265
- template='plotly_white'
 
 
 
 
 
 
 
 
266
  )
267
- fig.update_traces(mode='markers+lines')
268
- fig.update_layout(hovermode='x unified')
269
  st.plotly_chart(fig, use_container_width=True)
270
  except Exception as e:
271
- st.error(f"Plotting error: {str(e)}")
272
 
273
  # --------------------------
274
- # UI Components
275
  # --------------------------
276
 
277
- def sidebar_controls():
278
- """Render sidebar controls"""
 
 
279
  with st.sidebar:
280
- st.title("πŸ”§ Controls")
281
-
282
  analysis_mode = st.radio(
283
  "Analysis Mode",
284
- ["Text Input", "Live Data"],
285
- index=0,
286
- key='analysis_mode'
287
  )
288
 
289
- if st.session_state.analysis_mode == "Text Input":
290
- st.session_state.user_text = st.text_area(
291
- "Enter your text:",
292
  height=200,
293
- placeholder="Type or paste text here..."
294
  )
 
295
  else:
296
- st.session_state.search_keyword = st.text_input(
297
- "Search keyword:",
298
- placeholder="e.g., Tesla, AI, etc."
299
- )
300
-
301
- col1, col2 = st.columns(2)
302
- st.session_state.use_reddit = col1.checkbox("Reddit", True)
303
- st.session_state.use_youtube = col2.checkbox("YouTube", True)
304
-
305
- st.session_state.max_results = st.slider(
306
- "Max results per source:",
307
- 10, 100, 25
308
  )
 
309
 
310
  st.markdown("---")
311
- if st.button("Analyze", type="primary"):
312
- st.session_state.analyze_clicked = True
313
- if st.button("Reset"):
314
- st.session_state.clear()
315
- st.rerun()
316
-
317
- # --------------------------
318
- # Main App
319
- # --------------------------
320
-
321
- def main():
322
- if not initialize_resources():
323
- st.error("Critical initialization failed. Check error messages above.")
324
- return
325
-
326
- st.title("πŸ“Š SentimentSync Pro")
327
- st.caption("Advanced sentiment analysis across multiple platforms")
328
-
329
- sidebar_controls()
330
-
331
- if not hasattr(st.session_state, 'analyze_clicked') or not st.session_state.analyze_clicked:
332
- st.info("Configure your analysis using the sidebar controls")
333
- return
334
-
335
- # Perform analysis based on selected mode
336
- if st.session_state.analysis_mode == "Text Input":
337
- analyze_text_input()
338
- else:
339
- analyze_live_data()
340
-
341
- def analyze_text_input():
342
- """Analyze manually entered text"""
343
- if not st.session_state.user_text or len(st.session_state.user_text.strip()) < 10:
344
- st.warning("Please enter at least 10 characters of text")
345
- return
346
 
347
- with st.spinner("Analyzing text..."):
348
- # Overall sentiment
349
- sentiment = analyze_sentiment(st.session_state.user_text)
350
-
351
- # Display results
352
- col1, col2, col3 = st.columns(3)
353
- col1.metric("VADER Score", f"{sentiment['vader']:.2f}",
354
- delta_color="inverse" if sentiment['vader'] < 0 else "normal")
355
- col2.metric("BERT Sentiment", sentiment['bert_label'], f"{sentiment['bert_score']:.2f}")
356
- col3.metric("TextBlob Score", f"{sentiment['textblob']:.2f}")
357
-
358
- # Word cloud
359
- st.subheader("Word Cloud")
360
- wc_img = create_wordcloud(st.session_state.user_text)
361
- if wc_img:
362
- st.image(f"data:image/png;base64,{wc_img}", use_container_width=True)
363
-
364
- # Sentence-level analysis
365
- try:
366
- sentences = nltk.sent_tokenize(st.session_state.user_text)
367
- if len(sentences) > 1:
368
- st.subheader("Sentence Breakdown")
369
 
370
- sent_data = []
371
- for i, sent in enumerate(sentences):
372
- sent_sentiment = analyze_sentiment(sent)
373
- sent_data.append({
374
- 'Sentence': sent[:150] + ("..." if len(sent) > 150 else ""),
375
- 'VADER': sent_sentiment['vader'],
376
- 'BERT': sent_sentiment['bert'],
377
- 'TextBlob': sent_sentiment['textblob'],
378
- 'Average': np.mean([
379
- sent_sentiment['vader'],
380
- sent_sentiment['bert'],
381
- sent_sentiment['textblob']
382
- ])
383
- })
384
 
385
- sent_df = pd.DataFrame(sent_data)
 
 
 
 
 
 
 
386
 
387
- # Fixed dataframe display with proper parenthesis closure
388
- styled_df = sent_df.style.background_gradient(
389
- cmap='RdYlGn',
390
- subset=['VADER', 'BERT', 'TextBlob', 'Average'],
391
- vmin=-1,
392
- vmax=1
393
- )
394
- st.dataframe(
395
- styled_df,
396
- use_container_width=True,
397
- height=min(400, 35 * len(sent_df))
398
- )
399
 
400
- except Exception as e:
401
- st.error(f"Sentence analysis error: {str(e)}")
402
-
403
- def analyze_live_data():
404
- """Analyze live data from APIs"""
405
- if not st.session_state.search_keyword:
406
- st.warning("Please enter a search keyword")
407
- return
408
-
409
- if not st.session_state.use_reddit and not st.session_state.use_youtube:
410
- st.warning("Please select at least one data source")
411
- return
412
-
413
- with st.spinner(f"Fetching data for '{st.session_state.search_keyword}'..."):
414
- # Fetch data
415
- dfs = []
416
-
417
- if st.session_state.use_reddit:
418
- reddit_df = fetch_reddit_data(
419
- st.session_state.search_keyword,
420
- st.session_state.max_results
421
- )
422
- if not reddit_df.empty:
423
- dfs.append(reddit_df)
424
-
425
- if st.session_state.use_youtube:
426
- youtube_df = fetch_youtube_data(
427
- st.session_state.search_keyword,
428
- st.session_state.max_results
429
- )
430
- if not youtube_df.empty:
431
- dfs.append(youtube_df)
432
-
433
- if not dfs:
434
- st.error("No data found. Try different keywords or sources.")
435
- return
436
-
437
- df = pd.concat(dfs, ignore_index=True)
438
-
439
- # Analyze sentiment
440
- with st.spinner("Analyzing sentiment..."):
441
- sentiment_results = []
442
- for text in df['text']:
443
- res = analyze_sentiment(text)
444
- sentiment_results.append({
445
- 'vader': res['vader'],
446
- 'bert': res['bert'],
447
- 'textblob': res['textblob'],
448
- 'average_sentiment': np.mean([res['vader'], res['bert'], res['textblob']])
449
- })
450
-
451
- sentiment_df = pd.DataFrame(sentiment_results)
452
- df = pd.concat([df, sentiment_df], axis=1)
453
 
454
- # Filter recent data
455
- df = df[df['date'] >= (datetime.now() - timedelta(days=60))]
456
- df = df.sort_values('date')
457
-
458
- # Calculate moving average
459
- df['rolling_sentiment'] = df['average_sentiment'].rolling(
460
- window=7,
461
- min_periods=1
462
- ).mean()
463
-
464
- # Display results
465
- st.subheader(f"Results for: '{st.session_state.search_keyword}'")
466
-
467
- # Overall metrics
468
- avg_sentiment = df['average_sentiment'].mean()
469
- pos_pct = (df['average_sentiment'] > Config.POSITIVE_THRESHOLD).mean() * 100
470
- neg_pct = (df['average_sentiment'] < Config.NEGATIVE_THRESHOLD).mean() * 100
471
-
472
- col1, col2, col3 = st.columns(3)
473
- col1.metric("Average Sentiment", f"{avg_sentiment:.2f}")
474
- col2.metric("Positive Content", f"{pos_pct:.1f}%")
475
- col3.metric("Negative Content", f"{neg_pct:.1f}%")
476
-
477
- # Word cloud
478
- st.subheader("Word Cloud")
479
- combined_text = " ".join(df['text'])
480
- wc_img = create_wordcloud(combined_text)
481
- if wc_img:
482
- st.image(f"data:image/png;base64,{wc_img}", use_container_width=True)
483
-
484
- # Timeline visualization
485
- st.subheader("Sentiment Timeline")
486
- plot_sentiment_timeline(df)
487
-
488
- # Raw data
489
- with st.expander("View Raw Data"):
490
- st.dataframe(df, use_container_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
491
 
492
  if __name__ == "__main__":
493
  main()
 
1
  import streamlit as st
2
+ from transformers import pipeline
3
+ from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
4
  import numpy as np
5
+ import pandas as pd
6
  from datetime import datetime, timedelta
7
+ import plotly.express as px
8
+ from sklearn.linear_model import Ridge
 
 
9
  from wordcloud import WordCloud
10
  import base64
11
  from io import BytesIO
12
+ import nltk
13
+ from textblob import TextBlob
14
  import praw
15
  from googleapiclient.discovery import build
 
16
  import os
 
 
 
 
17
 
18
  # --------------------------
19
+ # Initial Setup & Configuration
20
  # --------------------------
21
 
22
+ # Set page config
23
  st.set_page_config(
24
+ page_title="🌟 SentimentSync: Live Sentiment Analysis Dashboard",
25
+ page_icon="πŸ“Š",
26
+ layout="wide"
 
27
  )
28
 
29
  # --------------------------
30
+ # NLTK Data Download
31
  # --------------------------
32
 
33
+ def download_nltk_data():
34
+ try:
35
+ nltk_data_dir = os.path.join(os.path.expanduser("~"), "nltk_data")
36
+ if not os.path.exists(nltk_data_dir):
37
+ os.makedirs(nltk_data_dir)
38
+
39
+ nltk.download('punkt', download_dir=nltk_data_dir)
40
+ nltk.download('stopwords', download_dir=nltk_data_dir)
41
+ nltk.download('punkt_tab', download_dir=nltk_data_dir)
42
+ nltk.data.path.append(nltk_data_dir)
43
+ except Exception as e:
44
+ st.error(f"Error downloading NLTK data: {str(e)}")
45
+ return False
46
+ return True
47
+
48
+ if not download_nltk_data():
49
+ st.warning("Some NLTK features may not work properly without the required data files.")
50
 
51
  # --------------------------
52
+ # Model Initialization
53
  # --------------------------
54
 
55
+ @st.cache_resource
56
+ def load_models():
57
  try:
58
+ # Initialize sentiment models
59
+ bert_sentiment = pipeline(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  "sentiment-analysis",
61
  model="nlptown/bert-base-multilingual-uncased-sentiment"
62
  )
63
+ vader_analyzer = SentimentIntensityAnalyzer()
64
+ return bert_sentiment, vader_analyzer
65
  except Exception as e:
66
+ st.error(f"Error loading models: {str(e)}")
67
+ return None, None
68
+
69
+ bert_sentiment, vader_analyzer = load_models()
70
+
71
+ if bert_sentiment is None or vader_analyzer is None:
72
+ st.stop()
73
+
74
+ # --------------------------
75
+ # API Clients Setup
76
+ # --------------------------
77
+
78
+ @st.cache_resource
79
+ def setup_api_clients():
80
  try:
81
+ # Reddit API setup
82
+ reddit = praw.Reddit(
83
+ client_id="S7pTXhj5JDFGDb3-_zrJEA",
84
+ client_secret="QP3NYN4lrAKVLrBamzLGrpFywiVg8w",
85
+ user_agent="SoundaryaR_Bot/1.0"
86
  )
87
+
88
+ youtube = build('youtube', 'v3', developerKey="AIzaSyDcUAkcoPvkTwN_tksmiW0dVPI5Bse7qos")
89
+
90
+ return reddit, youtube
91
  except Exception as e:
92
+ st.error(f"Error setting up API clients: {str(e)}")
93
+ return None, None
94
+
95
+ reddit, youtube = setup_api_clients()
96
+
97
+ if reddit is None or youtube is None:
98
+ st.stop()
 
 
 
 
 
 
 
 
 
 
 
99
 
100
  # --------------------------
101
+ # Helper Functions
102
  # --------------------------
103
 
104
+ def bert_score(result):
105
+ """Convert BERT label to numerical score"""
106
+ label_map = {
107
+ '1 star': -1,
108
+ '2 stars': -0.5,
109
+ '3 stars': 0,
110
+ '4 stars': 0.5,
111
+ '5 stars': 1
112
  }
113
+ return label_map.get(result['label'], 0)
114
+
115
+ def analyze_text(text):
116
+ """Analyze sentiment using multiple models"""
117
  try:
118
+ vader_score = vader_analyzer.polarity_scores(text)['compound']
119
+ bert_result = bert_sentiment(text[:512])[0] # Truncate to avoid token limits
120
+ bert_num = bert_score(bert_result)
121
+ textblob_score = TextBlob(text).sentiment.polarity
122
+ return vader_score, bert_num, textblob_score, bert_result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  except Exception as e:
124
+ st.error(f"Error analyzing text: {str(e)}")
125
+ return 0, 0, 0, {'label': 'Error', 'score': 0}
 
126
 
127
+ def generate_wordcloud(text):
128
+ """Generate word cloud image"""
 
 
 
 
129
  try:
130
+ wordcloud = WordCloud(
131
+ width=800,
132
+ height=400,
133
+ background_color='white',
134
+ stopwords=nltk.corpus.stopwords.words('english')
135
+ ).generate(text)
 
 
 
136
 
137
+ img = BytesIO()
138
+ wordcloud.to_image().save(img, format='PNG')
139
+ return base64.b64encode(img.getvalue()).decode()
140
+ except Exception as e:
141
+ st.error(f"Error generating word cloud: {str(e)}")
142
+ return ""
143
+
144
+ # --------------------------
145
+ # Data Fetching Functions
146
+ # --------------------------
147
+
148
+ @st.cache_data(ttl=3600) # Cache for 1 hour
149
+ def fetch_reddit_data(keyword, limit=50):
150
+ """Fetch Reddit posts containing the keyword"""
151
+ try:
152
+ subreddit = reddit.subreddit("all")
153
+ posts = subreddit.search(keyword, limit=limit)
154
 
 
155
  data = []
156
+ for post in posts:
 
 
 
157
  data.append({
158
+ 'date': datetime.fromtimestamp(post.created_utc),
159
+ 'text': f"{post.title}\n{post.selftext}",
160
+ 'source': 'Reddit',
161
+ 'url': f"https://reddit.com{post.permalink}"
 
 
 
 
 
162
  })
 
163
  return pd.DataFrame(data)
 
164
  except Exception as e:
165
+ st.error(f"Error fetching Reddit data: {str(e)}")
166
  return pd.DataFrame()
167
 
168
+ @st.cache_data(ttl=3600) # Cache for 1 hour
169
+ def fetch_youtube_data(keyword, limit=25):
170
+ """Fetch YouTube videos containing the keyword"""
 
 
 
171
  try:
172
+ request = youtube.search().list(
173
+ q=keyword,
174
+ part="snippet",
175
+ maxResults=limit,
176
+ type="video",
177
+ order="relevance"
178
  )
179
+ response = request.execute()
180
 
181
  data = []
182
+ for item in response['items']:
183
  data.append({
184
+ 'date': datetime.strptime(item['snippet']['publishedAt'], '%Y-%m-%dT%H:%M:%SZ'),
185
+ 'text': f"{item['snippet']['title']}\n{item['snippet']['description']}",
186
+ 'source': 'YouTube',
187
+ 'url': f"https://youtube.com/watch?v={item['id']['videoId']}"
 
 
 
 
188
  })
 
189
  return pd.DataFrame(data)
 
190
  except Exception as e:
191
+ st.error(f"Error fetching YouTube data: {str(e)}")
192
  return pd.DataFrame()
193
 
194
  # --------------------------
195
  # Visualization Functions
196
  # --------------------------
197
 
198
+ def plot_sentiment_trends(df, keyword):
199
+ """Plot sentiment trends over time"""
200
  try:
201
+ fig = px.line(
202
+ df,
203
+ x='date',
204
+ y=["VADER", "BERT", "TextBlob", "Average"],
205
+ title=f'Sentiment Over Time for "{keyword}"',
206
+ labels={'value': 'Sentiment Score', 'date': 'Date'},
207
+ color_discrete_map={
208
+ "VADER": "#636EFA",
209
+ "BERT": "#EF553B",
210
+ "TextBlob": "#00CC96",
211
+ "Average": "#AB63FA"
212
+ }
213
+ )
214
+ fig.update_layout(hovermode="x unified")
215
+ st.plotly_chart(fig, use_container_width=True)
216
  except Exception as e:
217
+ st.error(f"Error plotting sentiment trends: {str(e)}")
 
218
 
219
+ def plot_sentiment_distribution(df, keyword):
220
+ """Plot sentiment distribution"""
221
  try:
222
+ dist_values = [
223
+ sum(df['Average'] > 0.1), # Positive
224
+ sum(df['Average'] < -0.1), # Negative
225
+ sum((df['Average'] >= -0.1) & (df['Average'] <= 0.1)) # Neutral
226
+ ]
227
+
228
+ fig = px.pie(
229
+ values=dist_values,
230
+ names=['Positive', 'Negative', 'Neutral'],
231
+ title=f'Sentiment Distribution for "{keyword}"',
232
+ color=['Positive', 'Negative', 'Neutral'],
233
+ color_discrete_map={
234
+ 'Positive': '#00CC96',
235
+ 'Negative': '#EF553B',
236
+ 'Neutral': '#636EFA'
237
+ },
238
+ hole=0.3
239
  )
 
 
240
  st.plotly_chart(fig, use_container_width=True)
241
  except Exception as e:
242
+ st.error(f"Error plotting sentiment distribution: {str(e)}")
243
 
244
  # --------------------------
245
+ # Main App Interface
246
  # --------------------------
247
 
248
+ def main():
249
+ st.title("🌟 SentimentSync: Live Sentiment Analysis Dashboard")
250
+
251
+ # Sidebar controls
252
  with st.sidebar:
253
+ st.header("πŸ” Analysis Controls")
 
254
  analysis_mode = st.radio(
255
  "Analysis Mode",
256
+ ["Manual Text", "Live Data (Reddit & YouTube)"],
257
+ index=0
 
258
  )
259
 
260
+ if analysis_mode == "Manual Text":
261
+ user_input = st.text_area(
262
+ "Enter text for sentiment analysis",
263
  height=200,
264
+ placeholder="Type or paste your text here..."
265
  )
266
+ analyze_btn = st.button("Analyze Text")
267
  else:
268
+ keyword = st.text_input(
269
+ "Enter keyword for live data",
270
+ placeholder="e.g., Tesla, Bitcoin, etc."
 
 
 
 
 
 
 
 
 
271
  )
272
+ analyze_btn = st.button("Fetch & Analyze Data")
273
 
274
  st.markdown("---")
275
+ st.markdown("### Settings")
276
+ show_raw_data = st.checkbox("Show raw data", value=False)
277
+ st.markdown("---")
278
+ st.button("πŸ”„ Reset Analysis")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279
 
280
+ # Main content area
281
+ if analyze_btn:
282
+ with st.spinner("Analyzing..."):
283
+ if analysis_mode == "Manual Text":
284
+ if not user_input or not any(c.isalpha() for c in user_input):
285
+ st.warning("Please enter valid text for analysis")
286
+ return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
 
288
+ # Analyze the text
289
+ vader_score, bert_num, textblob_score, bert_result = analyze_text(user_input)
 
 
 
 
 
 
 
 
 
 
 
 
290
 
291
+ # Display results
292
+ st.subheader("πŸ“Š Sentiment Analysis Results")
293
+ cols = st.columns(3)
294
+ cols[0].metric("VADER Score", f"{vader_score:.2f}",
295
+ "Positive" if vader_score > 0 else "Negative" if vader_score < 0 else "Neutral")
296
+ cols[1].metric("BERT Sentiment", bert_result['label'], f"Confidence: {bert_result['score']:.2f}")
297
+ cols[2].metric("TextBlob Polarity", f"{textblob_score:.2f}",
298
+ "Positive" if textblob_score > 0 else "Negative" if textblob_score < 0 else "Neutral")
299
 
300
+ # Word cloud
301
+ st.subheader("πŸ“ Word Cloud")
302
+ wordcloud_img = f'data:image/png;base64,{generate_wordcloud(user_input)}'
303
+ st.image(wordcloud_img, use_column_width=True)
 
 
 
 
 
 
 
 
304
 
305
+ # Sentence-level analysis
306
+ try:
307
+ sentences = nltk.sent_tokenize(user_input)
308
+ if len(sentences) > 1:
309
+ st.subheader("πŸ” Sentence-level Analysis")
310
+ dates = [datetime.now() - timedelta(minutes=len(sentences)-i) for i in range(len(sentences))]
311
+ sentence_data = [analyze_text(s) for s in sentences]
312
+
313
+ df = pd.DataFrame({
314
+ "Sentence": sentences,
315
+ "VADER": [d[0] for d in sentence_data],
316
+ "BERT": [d[1] for d in sentence_data],
317
+ "TextBlob": [d[2] for d in sentence_data]
318
+ })
319
+ df["Average"] = df[["VADER", "BERT", "TextBlob"]].mean(axis=1)
320
+
321
+ st.dataframe(df.style.background_gradient(
322
+ cmap='RdYlGn',
323
+ subset=["VADER", "BERT", "TextBlob", "Average"],
324
+ vmin=-1, vmax=1
325
+ ), use_container_width=True)
326
+
327
+ plot_sentiment_trends(df, "Your Text")
328
+ except Exception as e:
329
+ st.error(f"Error in sentence analysis: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330
 
331
+ else: # Live Data mode
332
+ if not keyword:
333
+ st.warning("Please enter a keyword to search")
334
+ return
335
+
336
+ # Fetch data
337
+ with st.spinner(f"Fetching data for '{keyword}'..."):
338
+ reddit_df = fetch_reddit_data(keyword)
339
+ youtube_df = fetch_youtube_data(keyword)
340
+
341
+ if reddit_df.empty and youtube_df.empty:
342
+ st.error("No data found. Try a different keyword.")
343
+ return
344
+
345
+ df = pd.concat([reddit_df, youtube_df], ignore_index=True)
346
+
347
+ # Analyze sentiment for each item
348
+ with st.spinner("Analyzing sentiment..."):
349
+ results = []
350
+ for _, row in df.iterrows():
351
+ vader, bert, textblob, _ = analyze_text(row['text'])
352
+ results.append((vader, bert, textblob))
353
+
354
+ df['VADER'] = [r[0] for r in results]
355
+ df['BERT'] = [r[1] for r in results]
356
+ df['TextBlob'] = [r[2] for r in results]
357
+ df['Average'] = df[['VADER', 'BERT', 'TextBlob']].mean(axis=1)
358
+
359
+ # Display results
360
+ st.subheader(f"πŸ“Š Overall Sentiment for '{keyword}'")
361
+
362
+ # Metrics
363
+ avg_sentiment = df['Average'].mean()
364
+ pos_pct = len(df[df['Average'] > 0.1]) / len(df) * 100
365
+ neg_pct = len(df[df['Average'] < -0.1]) / len(df) * 100
366
+
367
+ cols = st.columns(3)
368
+ cols[0].metric("Average Sentiment", f"{avg_sentiment:.2f}",
369
+ "Positive" if avg_sentiment > 0 else "Negative" if avg_sentiment < 0 else "Neutral")
370
+ cols[1].metric("Positive Content", f"{pos_pct:.1f}%")
371
+ cols[2].metric("Negative Content", f"{neg_pct:.1f}%")
372
+
373
+ # Word cloud
374
+ st.subheader("πŸ“ Word Cloud")
375
+ combined_text = " ".join(df['text'])
376
+ wordcloud_img = f'data:image/png;base64,{generate_wordcloud(combined_text)}'
377
+ st.image(wordcloud_img, use_column_width=True)
378
+
379
+ # Filter recent data (last 14 days)
380
+ df['date'] = pd.to_datetime(df['date'])
381
+ cutoff_date = datetime.now() - timedelta(days=14)
382
+ df_recent = df[df['date'] >= cutoff_date].sort_values('date')
383
+
384
+ if not df_recent.empty:
385
+ # Sentiment trends
386
+ st.subheader("πŸ“… Sentiment Trends (Last 14 Days)")
387
+ plot_sentiment_trends(df_recent, keyword)
388
+
389
+ # Sentiment distribution
390
+ st.subheader("πŸ“Š Sentiment Distribution")
391
+ plot_sentiment_distribution(df_recent, keyword)
392
+
393
+ # Raw data (if enabled)
394
+ if show_raw_data:
395
+ st.subheader("πŸ“‹ Raw Data")
396
+ st.dataframe(df_recent[['date', 'source', 'text', 'Average']], use_container_width=True)
397
+ else:
398
+ st.info("No recent data found (within last 14 days).")
399
 
400
  if __name__ == "__main__":
401
  main()