Soundaryasos commited on
Commit
a0b1041
·
verified ·
1 Parent(s): e8b61e5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +401 -310
app.py CHANGED
@@ -1,402 +1,493 @@
1
  import streamlit as st
2
- from transformers import pipeline
3
- from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
4
- import numpy as np
5
  import pandas as pd
 
6
  from datetime import datetime, timedelta
 
 
 
 
7
  import plotly.express as px
8
- from sklearn.linear_model import Ridge
9
  from wordcloud import WordCloud
10
- import base64
11
- from io import BytesIO
12
- import nltk
13
  from textblob import TextBlob
14
- import praw
 
15
  from googleapiclient.discovery import build
16
- import os
 
 
17
 
18
  # --------------------------
19
- # Initial Setup & Configuration
20
  # --------------------------
21
 
22
- # Set page config
23
  st.set_page_config(
24
- page_title="🌟 SentimentSync: Live Sentiment Analysis Dashboard",
25
- page_icon="📊",
26
- layout="wide"
 
27
  )
28
 
29
  # --------------------------
30
- # NLTK Data Download
31
  # --------------------------
32
 
33
- def download_nltk_data():
34
- try:
35
- nltk_data_dir = os.path.join(os.path.expanduser("~"), "nltk_data")
36
- if not os.path.exists(nltk_data_dir):
37
- os.makedirs(nltk_data_dir)
38
-
39
- nltk.download('punkt', download_dir=nltk_data_dir)
40
- nltk.download('stopwords', download_dir=nltk_data_dir)
41
- nltk.download('punkt_tab', download_dir=nltk_data_dir)
42
- nltk.data.path.append(nltk_data_dir)
43
- except Exception as e:
44
- st.error(f"Error downloading NLTK data: {str(e)}")
45
- return False
46
- return True
47
-
48
- if not download_nltk_data():
49
- st.warning("Some NLTK features may not work properly without the required data files.")
50
 
51
  # --------------------------
52
- # Model Initialization
53
  # --------------------------
54
 
55
- @st.cache_resource
56
- def load_models():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  try:
58
- # Initialize sentiment models
59
- bert_sentiment = pipeline(
60
  "sentiment-analysis",
61
  model="nlptown/bert-base-multilingual-uncased-sentiment"
62
  )
63
- vader_analyzer = SentimentIntensityAnalyzer()
64
- return bert_sentiment, vader_analyzer
65
  except Exception as e:
66
- st.error(f"Error loading models: {str(e)}")
67
- return None, None
68
-
69
- bert_sentiment, vader_analyzer = load_models()
70
-
71
- if bert_sentiment is None or vader_analyzer is None:
72
- st.stop()
73
-
74
- # --------------------------
75
- # API Clients Setup
76
- # --------------------------
77
-
78
- @st.cache_resource
79
- def setup_api_clients():
80
  try:
81
- # Reddit API setup
82
- reddit = praw.Reddit(
83
- client_id="S7pTXhj5JDFGDb3-_zrJEA",
84
- client_secret="QP3NYN4lrAKVLrBamzLGrpFywiVg8w",
85
- user_agent="SoundaryaR_Bot/1.0"
86
  )
87
-
88
- # YouTube API setup
89
- youtube = build('youtube', 'v3', developerKey="AIzaSyAChqXPaiNE9hKhApkgjgonzdgiCCOo")
90
-
91
- return reddit, youtube
92
  except Exception as e:
93
- st.error(f"Error setting up API clients: {str(e)}")
94
- return None, None
95
-
96
- reddit, youtube = setup_api_clients()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
- if reddit is None or youtube is None:
 
99
  st.stop()
100
 
101
  # --------------------------
102
- # Helper Functions
103
  # --------------------------
104
 
105
- def bert_score(result):
106
- """Convert BERT label to numerical score"""
107
- label_map = {
108
- '1 star': -1,
109
- '2 stars': -0.5,
110
- '3 stars': 0,
111
- '4 stars': 0.5,
112
- '5 stars': 1
113
  }
114
- return label_map.get(result['label'], 0)
115
-
116
- def analyze_text(text):
117
- """Analyze sentiment using multiple models"""
118
- try:
119
- vader_score = vader_analyzer.polarity_scores(text)['compound']
120
- bert_result = bert_sentiment(text[:512])[0] # Truncate to avoid token limits
121
- bert_num = bert_score(bert_result)
122
- textblob_score = TextBlob(text).sentiment.polarity
123
- return vader_score, bert_num, textblob_score, bert_result
124
- except Exception as e:
125
- st.error(f"Error analyzing text: {str(e)}")
126
- return 0, 0, 0, {'label': 'Error', 'score': 0}
127
-
128
- def generate_wordcloud(text):
129
- """Generate word cloud image"""
130
  try:
131
- wordcloud = WordCloud(
132
- width=800,
133
- height=400,
134
- background_color='white',
135
- stopwords=nltk.corpus.stopwords.words('english')
136
- ).generate(text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
 
138
- img = BytesIO()
139
- wordcloud.to_image().save(img, format='PNG')
140
- return base64.b64encode(img.getvalue()).decode()
141
  except Exception as e:
142
- st.error(f"Error generating word cloud: {str(e)}")
143
- return ""
144
-
145
- # --------------------------
146
- # Data Fetching Functions
147
- # --------------------------
148
 
149
- @st.cache_data(ttl=3600) # Cache for 1 hour
150
- def fetch_reddit_data(keyword, limit=50):
151
- """Fetch Reddit posts containing the keyword"""
 
 
 
152
  try:
153
- subreddit = reddit.subreddit("all")
154
- posts = subreddit.search(keyword, limit=limit)
 
 
 
 
 
 
 
155
 
 
 
 
 
 
 
 
 
156
  data = []
157
- for post in posts:
 
 
 
158
  data.append({
159
- 'date': datetime.fromtimestamp(post.created_utc),
160
- 'text': f"{post.title}\n{post.selftext}",
161
- 'source': 'Reddit',
162
- 'url': f"https://reddit.com{post.permalink}"
 
 
 
 
 
163
  })
 
164
  return pd.DataFrame(data)
 
 
 
 
 
 
 
 
165
  except Exception as e:
166
- st.error(f"Error fetching Reddit data: {str(e)}")
167
  return pd.DataFrame()
168
 
169
- @st.cache_data(ttl=3600) # Cache for 1 hour
170
- def fetch_youtube_data(keyword, limit=25):
171
- """Fetch YouTube videos containing the keyword"""
 
 
 
172
  try:
173
- request = youtube.search().list(
174
- q=keyword,
175
- part="snippet",
176
- maxResults=limit,
177
- type="video",
178
- order="relevance"
179
  )
180
- response = request.execute()
181
 
182
  data = []
183
- for item in response['items']:
184
  data.append({
185
- 'date': datetime.strptime(item['snippet']['publishedAt'], '%Y-%m-%dT%H:%M:%SZ'),
186
- 'text': f"{item['snippet']['title']}\n{item['snippet']['description']}",
187
- 'source': 'YouTube',
188
- 'url': f"https://youtube.com/watch?v={item['id']['videoId']}"
 
 
 
 
189
  })
 
190
  return pd.DataFrame(data)
 
191
  except Exception as e:
192
- st.error(f"Error fetching YouTube data: {str(e)}")
193
  return pd.DataFrame()
194
 
195
  # --------------------------
196
  # Visualization Functions
197
  # --------------------------
198
 
199
- def plot_sentiment_trends(df, keyword):
200
- """Plot sentiment trends over time"""
201
  try:
202
- fig = px.line(
203
- df,
204
- x='date',
205
- y=["VADER", "BERT", "TextBlob", "Average"],
206
- title=f'Sentiment Over Time for "{keyword}"',
207
- labels={'value': 'Sentiment Score', 'date': 'Date'},
208
- color_discrete_map={
209
- "VADER": "#636EFA",
210
- "BERT": "#EF553B",
211
- "TextBlob": "#00CC96",
212
- "Average": "#AB63FA"
213
- }
214
- )
215
- fig.update_layout(hovermode="x unified")
216
- st.plotly_chart(fig, use_container_width=True)
217
  except Exception as e:
218
- st.error(f"Error plotting sentiment trends: {str(e)}")
 
219
 
220
- def plot_sentiment_distribution(df, keyword):
221
- """Plot sentiment distribution"""
222
  try:
223
- dist_values = [
224
- sum(df['Average'] > 0.1), # Positive
225
- sum(df['Average'] < -0.1), # Negative
226
- sum((df['Average'] >= -0.1) & (df['Average'] <= 0.1)) # Neutral
227
- ]
228
-
229
- fig = px.pie(
230
- values=dist_values,
231
- names=['Positive', 'Negative', 'Neutral'],
232
- title=f'Sentiment Distribution for "{keyword}"',
233
- color=['Positive', 'Negative', 'Neutral'],
234
- color_discrete_map={
235
- 'Positive': '#00CC96',
236
- 'Negative': '#EF553B',
237
- 'Neutral': '#636EFA'
238
- },
239
- hole=0.3
240
  )
 
 
241
  st.plotly_chart(fig, use_container_width=True)
242
  except Exception as e:
243
- st.error(f"Error plotting sentiment distribution: {str(e)}")
244
 
245
  # --------------------------
246
- # Main App Interface
247
  # --------------------------
248
 
249
- def main():
250
- st.title("🌟 SentimentSync: Live Sentiment Analysis Dashboard")
251
-
252
- # Sidebar controls
253
  with st.sidebar:
254
- st.header("🔍 Analysis Controls")
 
255
  analysis_mode = st.radio(
256
  "Analysis Mode",
257
- ["Manual Text", "Live Data (Reddit & YouTube)"],
258
- index=0
 
259
  )
260
 
261
- if analysis_mode == "Manual Text":
262
- user_input = st.text_area(
263
- "Enter text for sentiment analysis",
264
  height=200,
265
- placeholder="Type or paste your text here..."
266
  )
267
- analyze_btn = st.button("Analyze Text")
268
  else:
269
- keyword = st.text_input(
270
- "Enter keyword for live data",
271
- placeholder="e.g., Tesla, Bitcoin, etc."
 
 
 
 
 
 
 
 
 
272
  )
273
- analyze_btn = st.button("Fetch & Analyze Data")
274
 
275
  st.markdown("---")
276
- st.markdown("### Settings")
277
- show_raw_data = st.checkbox("Show raw data", value=False)
278
- st.markdown("---")
279
- st.button("🔄 Reset Analysis")
 
 
 
 
 
 
 
 
 
280
 
281
- # Main content area
282
- if analyze_btn:
283
- with st.spinner("Analyzing..."):
284
- if analysis_mode == "Manual Text":
285
- if not user_input or not any(c.isalpha() for c in user_input):
286
- st.warning("Please enter valid text for analysis")
287
- return
288
-
289
- # Analyze the text
290
- vader_score, bert_num, textblob_score, bert_result = analyze_text(user_input)
291
-
292
- # Display results
293
- st.subheader("📊 Sentiment Analysis Results")
294
- cols = st.columns(3)
295
- cols[0].metric("VADER Score", f"{vader_score:.2f}",
296
- "Positive" if vader_score > 0 else "Negative" if vader_score < 0 else "Neutral")
297
- cols[1].metric("BERT Sentiment", bert_result['label'], f"Confidence: {bert_result['score']:.2f}")
298
- cols[2].metric("TextBlob Polarity", f"{textblob_score:.2f}",
299
- "Positive" if textblob_score > 0 else "Negative" if textblob_score < 0 else "Neutral")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300
 
301
- # Word cloud
302
- st.subheader("📝 Word Cloud")
303
- wordcloud_img = f'data:image/png;base64,{generate_wordcloud(user_input)}'
304
- st.image(wordcloud_img, use_column_width=True)
 
 
 
 
 
 
 
 
 
 
305
 
306
- # Sentence-level analysis
307
- try:
308
- sentences = nltk.sent_tokenize(user_input)
309
- if len(sentences) > 1:
310
- st.subheader("🔍 Sentence-level Analysis")
311
- dates = [datetime.now() - timedelta(minutes=len(sentences)-i) for i in range(len(sentences))]
312
- sentence_data = [analyze_text(s) for s in sentences]
313
-
314
- df = pd.DataFrame({
315
- "Sentence": sentences,
316
- "VADER": [d[0] for d in sentence_data],
317
- "BERT": [d[1] for d in sentence_data],
318
- "TextBlob": [d[2] for d in sentence_data]
319
- })
320
- df["Average"] = df[["VADER", "BERT", "TextBlob"]].mean(axis=1)
321
-
322
- st.dataframe(df.style.background_gradient(
323
- cmap='RdYlGn',
324
- subset=["VADER", "BERT", "TextBlob", "Average"],
325
- vmin=-1, vmax=1
326
- ), use_container_width=True)
327
-
328
- plot_sentiment_trends(df, "Your Text")
329
- except Exception as e:
330
- st.error(f"Error in sentence analysis: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
331
 
332
- else: # Live Data mode
333
- if not keyword:
334
- st.warning("Please enter a keyword to search")
335
- return
336
-
337
- # Fetch data
338
- with st.spinner(f"Fetching data for '{keyword}'..."):
339
- reddit_df = fetch_reddit_data(keyword)
340
- youtube_df = fetch_youtube_data(keyword)
341
-
342
- if reddit_df.empty and youtube_df.empty:
343
- st.error("No data found. Try a different keyword.")
344
- return
345
-
346
- df = pd.concat([reddit_df, youtube_df], ignore_index=True)
347
-
348
- # Analyze sentiment for each item
349
- with st.spinner("Analyzing sentiment..."):
350
- results = []
351
- for _, row in df.iterrows():
352
- vader, bert, textblob, _ = analyze_text(row['text'])
353
- results.append((vader, bert, textblob))
354
-
355
- df['VADER'] = [r[0] for r in results]
356
- df['BERT'] = [r[1] for r in results]
357
- df['TextBlob'] = [r[2] for r in results]
358
- df['Average'] = df[['VADER', 'BERT', 'TextBlob']].mean(axis=1)
359
-
360
- # Display results
361
- st.subheader(f"📊 Overall Sentiment for '{keyword}'")
362
-
363
- # Metrics
364
- avg_sentiment = df['Average'].mean()
365
- pos_pct = len(df[df['Average'] > 0.1]) / len(df) * 100
366
- neg_pct = len(df[df['Average'] < -0.1]) / len(df) * 100
367
-
368
- cols = st.columns(3)
369
- cols[0].metric("Average Sentiment", f"{avg_sentiment:.2f}",
370
- "Positive" if avg_sentiment > 0 else "Negative" if avg_sentiment < 0 else "Neutral")
371
- cols[1].metric("Positive Content", f"{pos_pct:.1f}%")
372
- cols[2].metric("Negative Content", f"{neg_pct:.1f}%")
373
-
374
- # Word cloud
375
- st.subheader("📝 Word Cloud")
376
- combined_text = " ".join(df['text'])
377
- wordcloud_img = f'data:image/png;base64,{generate_wordcloud(combined_text)}'
378
- st.image(wordcloud_img, use_column_width=True)
379
-
380
- # Filter recent data (last 14 days)
381
- df['date'] = pd.to_datetime(df['date'])
382
- cutoff_date = datetime.now() - timedelta(days=14)
383
- df_recent = df[df['date'] >= cutoff_date].sort_values('date')
384
-
385
- if not df_recent.empty:
386
- # Sentiment trends
387
- st.subheader("📅 Sentiment Trends (Last 14 Days)")
388
- plot_sentiment_trends(df_recent, keyword)
389
-
390
- # Sentiment distribution
391
- st.subheader("📊 Sentiment Distribution")
392
- plot_sentiment_distribution(df_recent, keyword)
393
-
394
- # Raw data (if enabled)
395
- if show_raw_data:
396
- st.subheader("📋 Raw Data")
397
- st.dataframe(df_recent[['date', 'source', 'text', 'Average']], use_container_width=True)
398
- else:
399
- st.info("No recent data found (within last 14 days).")
400
 
401
  if __name__ == "__main__":
402
  main()
 
1
  import streamlit as st
 
 
 
2
  import pandas as pd
3
+ import numpy as np
4
  from datetime import datetime, timedelta
5
+ import nltk
6
+ import os
7
+ from io import BytesIO
8
+ import base64
9
  import plotly.express as px
 
10
  from wordcloud import WordCloud
 
 
 
11
  from textblob import TextBlob
12
+ from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
13
+ from transformers import pipeline
14
  from googleapiclient.discovery import build
15
+ from googleapiclient.errors import HttpError
16
+ import praw
17
+ from sklearn.linear_model import Ridge
18
 
19
  # --------------------------
20
+ # Initial Setup
21
  # --------------------------
22
 
23
+ # Configure page
24
  st.set_page_config(
25
+ page_title="SentimentSync Pro",
26
+ page_icon="📈",
27
+ layout="wide",
28
+ initial_sidebar_state="expanded"
29
  )
30
 
31
  # --------------------------
32
+ # Configuration
33
  # --------------------------
34
 
35
+ class Config:
36
+ # Replace these with your actual API keys or use environment variables
37
+ YOUTUBE_API_KEY = os.getenv("YT_API_KEY", "your_youtube_api_key_here")
38
+ REDDIT_CLIENT_ID = os.getenv("REDDIT_CLIENT_ID", "your_reddit_client_id")
39
+ REDDIT_CLIENT_SECRET = os.getenv("REDDIT_CLIENT_SECRET", "your_reddit_secret")
40
+ REDDIT_USER_AGENT = "SentimentAnalysisBot/1.0"
41
+
42
+ # NLTK data path
43
+ NLTK_DATA_PATH = os.path.join(os.path.expanduser("~"), "nltk_data")
44
+
45
+ # Sentiment thresholds
46
+ POSITIVE_THRESHOLD = 0.1
47
+ NEGATIVE_THRESHOLD = -0.1
 
 
 
 
48
 
49
  # --------------------------
50
+ # Initialize Resources
51
  # --------------------------
52
 
53
+ def initialize_resources():
54
+ """Initialize all required resources with proper error handling"""
55
+
56
+ # Setup NLTK
57
+ try:
58
+ os.makedirs(Config.NLTK_DATA_PATH, exist_ok=True)
59
+ nltk.data.path.append(Config.NLTK_DATA_PATH)
60
+
61
+ required_nltk = ['punkt', 'stopwords', 'vader_lexicon']
62
+ for package in required_nltk:
63
+ try:
64
+ nltk.data.find(f'tokenizers/{package}')
65
+ except LookupError:
66
+ nltk.download(package, download_dir=Config.NLTK_DATA_PATH)
67
+ except Exception as e:
68
+ st.error(f"NLTK initialization failed: {str(e)}")
69
+ return False
70
+
71
+ # Initialize sentiment analyzers
72
  try:
73
+ st.session_state.vader = SentimentIntensityAnalyzer()
74
+ st.session_state.bert = pipeline(
75
  "sentiment-analysis",
76
  model="nlptown/bert-base-multilingual-uncased-sentiment"
77
  )
 
 
78
  except Exception as e:
79
+ st.error(f"Model initialization failed: {str(e)}")
80
+ return False
81
+
82
+ # Initialize API clients
 
 
 
 
 
 
 
 
 
 
83
  try:
84
+ st.session_state.reddit = praw.Reddit(
85
+ client_id=Config.REDDIT_CLIENT_ID,
86
+ client_secret=Config.REDDIT_CLIENT_SECRET,
87
+ user_agent=Config.REDDIT_USER_AGENT
 
88
  )
 
 
 
 
 
89
  except Exception as e:
90
+ st.error(f"Reddit client initialization failed: {str(e)}")
91
+ st.session_state.reddit = None
92
+
93
+ try:
94
+ if Config.YOUTUBE_API_KEY.startswith("your_"):
95
+ st.session_state.youtube = None
96
+ else:
97
+ st.session_state.youtube = build(
98
+ 'youtube',
99
+ 'v3',
100
+ developerKey=Config.YOUTUBE_API_KEY,
101
+ cache_discovery=False
102
+ )
103
+ except Exception as e:
104
+ st.error(f"YouTube client initialization failed: {str(e)}")
105
+ st.session_state.youtube = None
106
+
107
+ return True
108
 
109
+ if not initialize_resources():
110
+ st.error("Critical initialization failed. Check error messages above.")
111
  st.stop()
112
 
113
  # --------------------------
114
+ # Core Functions
115
  # --------------------------
116
 
117
+ def analyze_sentiment(text):
118
+ """Analyze text using multiple sentiment models"""
119
+ results = {
120
+ 'vader': 0,
121
+ 'bert': 0,
122
+ 'textblob': 0,
123
+ 'bert_label': 'Error',
124
+ 'bert_score': 0
125
  }
126
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  try:
128
+ # VADER
129
+ results['vader'] = st.session_state.vader.polarity_scores(text)['compound']
130
+
131
+ # BERT (with truncation for long texts)
132
+ bert_result = st.session_state.bert(text[:512])[0]
133
+ results['bert_label'] = bert_result['label']
134
+ results['bert_score'] = bert_result['score']
135
+
136
+ # Convert BERT label to numeric score
137
+ label_map = {
138
+ '1 star': -1,
139
+ '2 stars': -0.5,
140
+ '3 stars': 0,
141
+ '4 stars': 0.5,
142
+ '5 stars': 1
143
+ }
144
+ results['bert'] = label_map.get(bert_result['label'], 0)
145
+
146
+ # TextBlob
147
+ results['textblob'] = TextBlob(text).sentiment.polarity
148
 
 
 
 
149
  except Exception as e:
150
+ st.error(f"Sentiment analysis error: {str(e)}")
151
+
152
+ return results
 
 
 
153
 
154
+ def fetch_youtube_data(keyword, max_results=25):
155
+ """Fetch YouTube data with enhanced error handling"""
156
+ if st.session_state.youtube is None:
157
+ st.warning("YouTube API not configured")
158
+ return pd.DataFrame()
159
+
160
  try:
161
+ # Search for videos
162
+ search_response = st.session_state.youtube.search().list(
163
+ q=keyword,
164
+ part="snippet",
165
+ maxResults=max_results,
166
+ type="video",
167
+ order="relevance",
168
+ safeSearch="moderate"
169
+ ).execute()
170
 
171
+ # Get video details
172
+ video_ids = [item['id']['videoId'] for item in search_response['items']]
173
+ videos_response = st.session_state.youtube.videos().list(
174
+ part="snippet,statistics",
175
+ id=",".join(video_ids)
176
+ ).execute()
177
+
178
+ # Process results
179
  data = []
180
+ for item in videos_response['items']:
181
+ snippet = item['snippet']
182
+ stats = item.get('statistics', {})
183
+
184
  data.append({
185
+ 'source': 'YouTube',
186
+ 'date': datetime.strptime(snippet['publishedAt'], '%Y-%m-%dT%H:%M:%SZ'),
187
+ 'title': snippet['title'],
188
+ 'text': f"{snippet['title']}\n{snippet['description']}",
189
+ 'url': f"https://youtu.be/{item['id']}",
190
+ 'views': int(stats.get('viewCount', 0)),
191
+ 'likes': int(stats.get('likeCount', 0)),
192
+ 'comments': int(stats.get('commentCount', 0)),
193
+ 'thumbnail': snippet['thumbnails']['default']['url']
194
  })
195
+
196
  return pd.DataFrame(data)
197
+
198
+ except HttpError as e:
199
+ if e.resp.status == 403:
200
+ st.error("YouTube API quota exceeded. Try again later.")
201
+ else:
202
+ st.error(f"YouTube API error: {str(e)}")
203
+ return pd.DataFrame()
204
+
205
  except Exception as e:
206
+ st.error(f"Error fetching YouTube data: {str(e)}")
207
  return pd.DataFrame()
208
 
209
+ def fetch_reddit_data(keyword, limit=50):
210
+ """Fetch Reddit posts with error handling"""
211
+ if st.session_state.reddit is None:
212
+ st.warning("Reddit API not configured")
213
+ return pd.DataFrame()
214
+
215
  try:
216
+ posts = st.session_state.reddit.subreddit("all").search(
217
+ query=keyword,
218
+ limit=limit,
219
+ time_filter="month"
 
 
220
  )
 
221
 
222
  data = []
223
+ for post in posts:
224
  data.append({
225
+ 'source': 'Reddit',
226
+ 'date': datetime.fromtimestamp(post.created_utc),
227
+ 'title': post.title,
228
+ 'text': f"{post.title}\n\n{post.selftext}",
229
+ 'url': f"https://reddit.com{post.permalink}",
230
+ 'upvotes': post.score,
231
+ 'comments': post.num_comments,
232
+ 'thumbnail': post.thumbnail if post.thumbnail not in ['self', 'default'] else None
233
  })
234
+
235
  return pd.DataFrame(data)
236
+
237
  except Exception as e:
238
+ st.error(f"Error fetching Reddit data: {str(e)}")
239
  return pd.DataFrame()
240
 
241
  # --------------------------
242
  # Visualization Functions
243
  # --------------------------
244
 
245
+ def create_wordcloud(text):
246
+ """Generate a word cloud with proper error handling"""
247
  try:
248
+ wc = WordCloud(
249
+ width=800,
250
+ height=400,
251
+ background_color='white',
252
+ stopwords=set(nltk.corpus.stopwords.words('english')),
253
+ collocations=False
254
+ ).generate(text)
255
+
256
+ img = BytesIO()
257
+ wc.to_image().save(img, format='PNG')
258
+ return base64.b64encode(img.getvalue()).decode()
 
 
 
 
259
  except Exception as e:
260
+ st.error(f"Word cloud error: {str(e)}")
261
+ return None
262
 
263
+ def plot_sentiment_timeline(df):
264
+ """Interactive timeline plot of sentiment"""
265
  try:
266
+ fig = px.line(
267
+ df,
268
+ x='date',
269
+ y='average_sentiment',
270
+ color='source',
271
+ title='Sentiment Over Time',
272
+ labels={'average_sentiment': 'Sentiment Score', 'date': 'Date'},
273
+ hover_data=['title', 'source', 'url'],
274
+ template='plotly_white'
 
 
 
 
 
 
 
 
275
  )
276
+ fig.update_traces(mode='markers+lines')
277
+ fig.update_layout(hovermode='x unified')
278
  st.plotly_chart(fig, use_container_width=True)
279
  except Exception as e:
280
+ st.error(f"Plotting error: {str(e)}")
281
 
282
  # --------------------------
283
+ # UI Components
284
  # --------------------------
285
 
286
+ def sidebar_controls():
287
+ """Render sidebar controls"""
 
 
288
  with st.sidebar:
289
+ st.title("🔧 Controls")
290
+
291
  analysis_mode = st.radio(
292
  "Analysis Mode",
293
+ ["Text Input", "Live Data"],
294
+ index=0,
295
+ key='analysis_mode'
296
  )
297
 
298
+ if st.session_state.analysis_mode == "Text Input":
299
+ st.session_state.user_text = st.text_area(
300
+ "Enter your text:",
301
  height=200,
302
+ placeholder="Type or paste text here..."
303
  )
 
304
  else:
305
+ st.session_state.search_keyword = st.text_input(
306
+ "Search keyword:",
307
+ placeholder="e.g., Tesla, AI, etc."
308
+ )
309
+
310
+ col1, col2 = st.columns(2)
311
+ st.session_state.use_reddit = col1.checkbox("Reddit", True)
312
+ st.session_state.use_youtube = col2.checkbox("YouTube", True)
313
+
314
+ st.session_state.max_results = st.slider(
315
+ "Max results per source:",
316
+ 10, 100, 25
317
  )
 
318
 
319
  st.markdown("---")
320
+ if st.button("Analyze", type="primary"):
321
+ st.session_state.analyze_clicked = True
322
+ if st.button("Reset"):
323
+ st.session_state.clear()
324
+ st.rerun()
325
+
326
+ # --------------------------
327
+ # Main App
328
+ # --------------------------
329
+
330
+ def main():
331
+ st.title("📊 SentimentSync Pro")
332
+ st.caption("Advanced sentiment analysis across multiple platforms")
333
 
334
+ sidebar_controls()
335
+
336
+ if not hasattr(st.session_state, 'analyze_clicked') or not st.session_state.analyze_clicked:
337
+ st.info("Configure your analysis using the sidebar controls")
338
+ return
339
+
340
+ # Perform analysis based on selected mode
341
+ if st.session_state.analysis_mode == "Text Input":
342
+ analyze_text_input()
343
+ else:
344
+ analyze_live_data()
345
+
346
+ def analyze_text_input():
347
+ """Analyze manually entered text"""
348
+ if not st.session_state.user_text or len(st.session_state.user_text.strip()) < 10:
349
+ st.warning("Please enter at least 10 characters of text")
350
+ return
351
+
352
+ with st.spinner("Analyzing text..."):
353
+ # Overall sentiment
354
+ sentiment = analyze_sentiment(st.session_state.user_text)
355
+
356
+ # Display results
357
+ col1, col2, col3 = st.columns(3)
358
+ col1.metric("VADER Score", f"{sentiment['vader']:.2f}",
359
+ delta_color="inverse" if sentiment['vader'] < 0 else "normal")
360
+ col2.metric("BERT Sentiment", sentiment['bert_label'], f"{sentiment['bert_score']:.2f}")
361
+ col3.metric("TextBlob Score", f"{sentiment['textblob']:.2f}")
362
+
363
+ # Word cloud
364
+ st.subheader("Word Cloud")
365
+ wc_img = create_wordcloud(st.session_state.user_text)
366
+ if wc_img:
367
+ st.image(f"data:image/png;base64,{wc_img}", use_container_width=True)
368
+
369
+ # Sentence-level analysis
370
+ try:
371
+ sentences = nltk.sent_tokenize(st.session_state.user_text)
372
+ if len(sentences) > 1:
373
+ st.subheader("Sentence Breakdown")
374
 
375
+ sent_data = []
376
+ for i, sent in enumerate(sentences):
377
+ sent_sentiment = analyze_sentiment(sent)
378
+ sent_data.append({
379
+ 'Sentence': sent[:150] + ("..." if len(sent) > 150 else ""),
380
+ 'VADER': sent_sentiment['vader'],
381
+ 'BERT': sent_sentiment['bert'],
382
+ 'TextBlob': sent_sentiment['textblob'],
383
+ 'Average': np.mean([
384
+ sent_sentiment['vader'],
385
+ sent_sentiment['bert'],
386
+ sent_sentiment['textblob']
387
+ ])
388
+ })
389
 
390
+ sent_df = pd.DataFrame(sent_data)
391
+ st.dataframe(
392
+ sent_df.style.background_gradient(
393
+ cmap='RdYlGn',
394
+ subset=['VADER', 'BERT', 'TextBlob', 'Average'],
395
+ vmin=-1,
396
+ vmax=1
397
+ ),
398
+ use_container_width=True,
399
+ height=min(400, 35 * len(sent_df))
400
+ except Exception as e:
401
+ st.error(f"Sentence analysis error: {str(e)}")
402
+
403
+ def analyze_live_data():
404
+ """Analyze live data from APIs"""
405
+ if not st.session_state.search_keyword:
406
+ st.warning("Please enter a search keyword")
407
+ return
408
+
409
+ if not st.session_state.use_reddit and not st.session_state.use_youtube:
410
+ st.warning("Please select at least one data source")
411
+ return
412
+
413
+ with st.spinner(f"Fetching data for '{st.session_state.search_keyword}'..."):
414
+ # Fetch data
415
+ dfs = []
416
+
417
+ if st.session_state.use_reddit:
418
+ reddit_df = fetch_reddit_data(
419
+ st.session_state.search_keyword,
420
+ st.session_state.max_results
421
+ )
422
+ if not reddit_df.empty:
423
+ dfs.append(reddit_df)
424
+
425
+ if st.session_state.use_youtube:
426
+ youtube_df = fetch_youtube_data(
427
+ st.session_state.search_keyword,
428
+ st.session_state.max_results
429
+ )
430
+ if not youtube_df.empty:
431
+ dfs.append(youtube_df)
432
+
433
+ if not dfs:
434
+ st.error("No data found. Try different keywords or sources.")
435
+ return
436
+
437
+ df = pd.concat(dfs, ignore_index=True)
438
+
439
+ # Analyze sentiment
440
+ with st.spinner("Analyzing sentiment..."):
441
+ sentiment_results = []
442
+ for text in df['text']:
443
+ res = analyze_sentiment(text)
444
+ sentiment_results.append({
445
+ 'vader': res['vader'],
446
+ 'bert': res['bert'],
447
+ 'textblob': res['textblob'],
448
+ 'average_sentiment': np.mean([res['vader'], res['bert'], res['textblob']])
449
+ })
450
 
451
+ sentiment_df = pd.DataFrame(sentiment_results)
452
+ df = pd.concat([df, sentiment_df], axis=1)
453
+
454
+ # Filter recent data
455
+ df = df[df['date'] >= (datetime.now() - timedelta(days=60))]
456
+ df = df.sort_values('date')
457
+
458
+ # Calculate moving average
459
+ df['rolling_sentiment'] = df['average_sentiment'].rolling(
460
+ window=7,
461
+ min_periods=1
462
+ ).mean()
463
+
464
+ # Display results
465
+ st.subheader(f"Results for: '{st.session_state.search_keyword}'")
466
+
467
+ # Overall metrics
468
+ avg_sentiment = df['average_sentiment'].mean()
469
+ pos_pct = (df['average_sentiment'] > Config.POSITIVE_THRESHOLD).mean() * 100
470
+ neg_pct = (df['average_sentiment'] < Config.NEGATIVE_THRESHOLD).mean() * 100
471
+
472
+ col1, col2, col3 = st.columns(3)
473
+ col1.metric("Average Sentiment", f"{avg_sentiment:.2f}")
474
+ col2.metric("Positive Content", f"{pos_pct:.1f}%")
475
+ col3.metric("Negative Content", f"{neg_pct:.1f}%")
476
+
477
+ # Word cloud
478
+ st.subheader("Word Cloud")
479
+ combined_text = " ".join(df['text'])
480
+ wc_img = create_wordcloud(combined_text)
481
+ if wc_img:
482
+ st.image(f"data:image/png;base64,{wc_img}", use_container_width=True)
483
+
484
+ # Timeline visualization
485
+ st.subheader("Sentiment Timeline")
486
+ plot_sentiment_timeline(df)
487
+
488
+ # Raw data
489
+ with st.expander("View Raw Data"):
490
+ st.dataframe(df, use_container_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
491
 
492
  if __name__ == "__main__":
493
  main()