Soundaryasos commited on
Commit
6ac8f81
ยท
verified ยท
1 Parent(s): b436287

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +100 -180
app.py CHANGED
@@ -1,185 +1,105 @@
1
  import streamlit as st
2
- import pandas as pd
3
- import numpy as np
4
- import matplotlib.pyplot as plt
5
- from datetime import datetime, timedelta
6
- from sklearn.preprocessing import MinMaxScaler
7
- from sklearn.linear_model import LogisticRegression
8
- from sklearn.ensemble import RandomForestRegressor
9
- from sklearn.model_selection import train_test_split
10
- from sklearn.metrics import mean_squared_error
11
  from transformers import pipeline
12
  from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
13
- import shap
14
- import praw
15
- from googleapiclient.discovery import build
16
- import warnings
17
- warnings.filterwarnings('ignore')
18
-
19
- np.random.seed(42)
20
-
21
- st.set_page_config(page_title="Sentiment Pulse", layout="wide")
22
- st.markdown("<h1 style='text-align: center; color: #7B68EE;'>Sentiment Pulse: Multi-Platform Analysis</h1>", unsafe_allow_html=True)
23
-
24
- # API credentials
25
- REDDIT_CLIENT_ID = "S7pTXhj5JDFGDb3-_zrJEA"
26
- REDDIT_CLIENT_SECRET = "QP3NYN4lrAKVLrBamzLGrpFywiVg8w"
27
- REDDIT_USER_AGENT = "SoundaryaR_Bot/1.0"
28
- YOUTUBE_API_KEY = "AIzaSyAChqXPaiNE9hKhApkgjgonzdgiCCOo"
29
-
30
- reddit = praw.Reddit(client_id=REDDIT_CLIENT_ID, client_secret=REDDIT_CLIENT_SECRET, user_agent=REDDIT_USER_AGENT)
31
- youtube = build('youtube', 'v3', developerKey=YOUTUBE_API_KEY)
32
- bert_classifier = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
33
  vader_analyzer = SentimentIntensityAnalyzer()
34
 
35
- @st.cache_data
36
- def load_twitter_data():
37
- try:
38
- df = pd.read_csv("twitter_dataset.csv", encoding='latin-1',
39
- names=['sentiment', 'id', 'date', 'query', 'user', 'text'])
40
- df['date'] = pd.to_datetime(df['date'], errors='coerce')
41
- df['sentiment'] = df['sentiment'].map({0: 'negative', 4: 'positive'})
42
- return df.sample(10000, random_state=42)
43
- except FileNotFoundError:
44
- st.error("twitter_dataset.csv not found. Please ensure the file is in the working directory.")
45
- return pd.DataFrame()
46
-
47
- def fetch_reddit_data(keyword):
48
- try:
49
- subreddit = reddit.subreddit("all")
50
- posts = subreddit.search(keyword, limit=100)
51
- return pd.DataFrame([{'date': datetime.fromtimestamp(post.created_utc), 'text': post.title + " " + post.selftext} for post in posts])
52
- except Exception as e:
53
- st.error(f"Error fetching Reddit data: {e}")
54
- return pd.DataFrame()
55
-
56
- def fetch_youtube_data(keyword):
57
- try:
58
- request = youtube.search().list(q=keyword, part="snippet", maxResults=50, type="video")
59
- response = request.execute()
60
- return pd.DataFrame([{
61
- 'date': datetime.strptime(item['snippet']['publishedAt'], "%Y-%m-%dT%H:%M:%SZ"),
62
- 'text': item['snippet']['title'] + " " + item['snippet']['description']
63
- } for item in response['items']])
64
- except Exception as e:
65
- st.error(f"Error fetching YouTube data: {e}")
66
- return pd.DataFrame()
67
-
68
- def get_bert_sentiment(text):
69
- try:
70
- result = bert_classifier(text[:512])[0]
71
- return 1 if result['label'] == 'POSITIVE' else 0, result['score']
72
- except:
73
- return 0, 0.5
74
-
75
- def get_vader_sentiment(text):
76
- score = vader_analyzer.polarity_scores(text)['compound']
77
- return 1 if score > 0 else 0, score
78
-
79
- def combined_sentiment(text):
80
- bert_label, bert_score = get_bert_sentiment(text)
81
- vader_label, vader_score = get_vader_sentiment(text)
82
- avg_score = (bert_score + abs(vader_score)) / 2
83
- return 1 if avg_score > 0.5 else 0, avg_score
84
-
85
- st.sidebar.title("Keyword Search")
86
- keyword = st.sidebar.text_input("Enter a keyword (e.g., 'happy')", value="happy")
87
-
88
- twitter_df = load_twitter_data()
89
- twitter_filtered = twitter_df[twitter_df['text'].str.contains(keyword, case=False, na=False)]
90
- reddit_df = fetch_reddit_data(keyword)
91
- youtube_df = fetch_youtube_data(keyword)
92
-
93
- platforms = {'Twitter': twitter_filtered, 'Reddit': reddit_df, 'YouTube': youtube_df}
94
- valid_platforms = {k: v for k, v in platforms.items() if not v.empty}
95
-
96
- if not valid_platforms:
97
- st.error(f"Error: '{keyword}' is not a valid keyword. No matching data found across Twitter, Reddit, or YouTube.")
98
- else:
99
- for platform, df in valid_platforms.items():
100
- st.subheader(f"{platform} Analysis for '{keyword}'")
101
- st.write(f"{platform} Data Preview:", df.head())
102
-
103
- with st.spinner(f"Analyzing {platform} sentiments..."):
104
- df['bert_sentiment'], df['bert_score'] = zip(*df['text'].apply(get_bert_sentiment))
105
- df['vader_sentiment'], df['vader_score'] = zip(*df['text'].apply(get_vader_sentiment))
106
- df['combined_sentiment'], df['combined_score'] = zip(*df['text'].apply(combined_sentiment))
107
-
108
- st.write(df[['text', 'combined_sentiment', 'combined_score']].head())
109
-
110
- daily_sentiment = df.groupby(df['date'].dt.date)['combined_score'].mean().reset_index()
111
- daily_sentiment['date'] = pd.to_datetime(daily_sentiment['date'])
112
- daily_sentiment['tweet_count'] = df.groupby(df['date'].dt.date).size().values
113
-
114
- if len(daily_sentiment) < 2:
115
- st.warning(f"Not enough {platform} data for prediction.")
116
- fig, ax = plt.subplots()
117
- ax.plot(daily_sentiment['date'], daily_sentiment['combined_score'], label='Historical')
118
- ax.legend()
119
- st.pyplot(fig)
120
- continue
121
-
122
- scaler = MinMaxScaler()
123
- daily_sentiment['scaled_score'] = scaler.fit_transform(daily_sentiment[['combined_score']])
124
-
125
- # Prepare features: use lagged sentiment scores and tweet counts
126
- X = pd.DataFrame({
127
- 'lag1_score': daily_sentiment['scaled_score'].shift(1),
128
- 'tweet_count': daily_sentiment['tweet_count']
129
- }).dropna()
130
- y = daily_sentiment['scaled_score'][1:] # Align with lagged features
131
-
132
- if len(X) < 5: # Minimum data for meaningful split
133
- st.warning(f"Not enough {platform} data points for prediction after lagging.")
134
- fig, ax = plt.subplots()
135
- ax.plot(daily_sentiment['date'], daily_sentiment['combined_score'], label='Historical')
136
- ax.legend()
137
- st.pyplot(fig)
138
- continue
139
-
140
- # Split data for validation
141
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
142
-
143
- # Train Logistic Regression (using regression mode with continuous output)
144
- lr_model = LogisticRegression(max_iter=1000)
145
- lr_model.fit(X_train, (y_train > 0.5).astype(int)) # Binary classification for validation
146
- lr_pred_train = lr_model.predict_proba(X_train)[:, 1]
147
- lr_mse = mean_squared_error(y_train, lr_pred_train)
148
-
149
- # Train Random Forest
150
- rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
151
- rf_model.fit(X_train, y_train)
152
- rf_pred_train = rf_model.predict(X_train)
153
- rf_mse = mean_squared_error(y_train, rf_pred_train)
154
-
155
- # Weighted ensemble based on inverse MSE
156
- total_mse = lr_mse + rf_mse
157
- lr_weight = (1 - lr_mse / total_mse) if total_mse > 0 else 0.5
158
- rf_weight = (1 - rf_mse / total_mse) if total_mse > 0 else 0.5
159
-
160
- # Predict 30 days into the future
161
- last_data = X.iloc[-1:].copy()
162
- predictions = []
163
- future_dates = [daily_sentiment['date'].iloc[-1] + timedelta(days=i) for i in range(1, 31)]
164
-
165
- for _ in range(30):
166
- lr_pred = lr_model.predict_proba(last_data)[:, 1][0]
167
- rf_pred = rf_model.predict(last_data)[0]
168
- ensemble_pred = lr_weight * lr_pred + rf_weight * rf_pred
169
- predictions.append(ensemble_pred)
170
- last_data['lag1_score'] = ensemble_pred # Update lag for next prediction
171
-
172
- predictions = scaler.inverse_transform(np.array(predictions).reshape(-1, 1)).flatten()
173
-
174
- st.subheader(f"{platform} 30-Day Prediction (Ensemble: LR + RF)")
175
- fig, ax = plt.subplots()
176
- ax.plot(daily_sentiment['date'], daily_sentiment['combined_score'], 'g-', label='Historical')
177
- ax.plot(future_dates, predictions, 'b--', label=f'Predicted (LR: {lr_weight:.2f}, RF: {rf_weight:.2f})')
178
- ax.legend()
179
- st.pyplot(fig)
180
-
181
- st.subheader(f"{platform} Random Forest SHAP")
182
- explainer = shap.TreeExplainer(rf_model)
183
- shap_values = explainer.shap_values(X)
184
- shap.summary_plot(shap_values, X, show=False)
185
- st.pyplot(plt.gcf())
 
1
  import streamlit as st
 
 
 
 
 
 
 
 
 
2
  from transformers import pipeline
3
  from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
4
+ import numpy as np
5
+ import pandas as pd
6
+ from datetime import datetime, timedelta
7
+ import plotly.express as px
8
+ from sklearn.linear_model import LinearRegression
9
+ from wordcloud import WordCloud
10
+ import base64
11
+ from io import BytesIO
12
+ import nltk
13
+ from textblob import TextBlob
14
+
15
+ nltk.download('punkt')
16
+
17
+ # Initialize sentiment models
18
+ bert_sentiment = pipeline("sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment")
 
 
 
 
 
19
  vader_analyzer = SentimentIntensityAnalyzer()
20
 
21
+ # Generate sample past sentiment data
22
+ dates = [datetime.today() - timedelta(days=i) for i in range(14)]
23
+ sentiment_scores = np.random.uniform(-1, 1, len(dates))
24
+ df = pd.DataFrame({"Date": dates, "Sentiment Score": sentiment_scores})
25
+
26
+ # Train a regression model
27
+ X = np.array(range(len(df))).reshape(-1, 1)
28
+ y = df["Sentiment Score"]
29
+ model = LinearRegression()
30
+ model.fit(X, y)
31
+
32
+ # Predict for next 7 days
33
+ future_dates = [datetime.today() + timedelta(days=i) for i in range(1, 8)]
34
+ X_future = np.array(range(len(df), len(df) + 7)).reshape(-1, 1)
35
+ predictions = model.predict(X_future)
36
+
37
+ future_df = pd.DataFrame({"Date": future_dates, "Predicted Sentiment": predictions})
38
+
39
+ # Generate Word Cloud
40
+ def generate_wordcloud(text):
41
+ wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
42
+ img = BytesIO()
43
+ wordcloud.to_image().save(img, format='PNG')
44
+ return base64.b64encode(img.getvalue()).decode()
45
+
46
+ # Streamlit app setup
47
+ st.title("๐ŸŒŸ Advanced Sentiment Analysis Dashboard")
48
+
49
+ # Sidebar for user input
50
+ st.sidebar.header("๐Ÿ” Sentiment Analysis Controls")
51
+ user_input = st.sidebar.text_area("Enter text for sentiment analysis")
52
+
53
+ # Display sentiment analysis results
54
+ def display_sentiment_analysis(vader_score, bert_result, textblob_score):
55
+ st.subheader("๐Ÿ“Š Sentiment Analysis Results:")
56
+ st.write(f"**VADER Sentiment Score**: {vader_score:.2f}")
57
+ st.write(f"**BERT Sentiment**: {bert_result['label']} ({bert_result['score']:.2f})")
58
+ st.write(f"**TextBlob Sentiment Polarity**: {textblob_score:.2f}")
59
+
60
+ sentiment_data = {'Positive': max(0, vader_score), 'Negative': min(0, vader_score), 'Neutral': 1 - abs(vader_score)}
61
+ sentiment_df = pd.DataFrame(list(sentiment_data.items()), columns=["Sentiment", "Score"])
62
+ st.bar_chart(sentiment_df.set_index("Sentiment"))
63
+
64
+ wordcloud_img = f'data:image/png;base64,{generate_wordcloud(user_input)}'
65
+ st.image(wordcloud_img, use_column_width=True)
66
+
67
+ if st.sidebar.button("Analyze Sentiment"):
68
+ if user_input:
69
+ with st.spinner("Analyzing text..."):
70
+ vader_score = vader_analyzer.polarity_scores(user_input)['compound']
71
+ bert_result = bert_sentiment(user_input)[0]
72
+ textblob_score = TextBlob(user_input).sentiment.polarity
73
+ display_sentiment_analysis(vader_score, bert_result, textblob_score)
74
+ else:
75
+ st.warning("โš ๏ธ Please enter some text for analysis.")
76
+
77
+ # Past sentiment trends
78
+ st.subheader("๐Ÿ“… Past Sentiment Trends (Last 14 Days)")
79
+ fig1 = px.line(df, x='Date', y='Sentiment Score', title='Sentiment Over Time', markers=True, line_shape='spline')
80
+ st.plotly_chart(fig1)
81
+
82
+ # Future sentiment predictions
83
+ st.subheader("๐Ÿ”ฎ Sentiment Prediction for Next 7 Days")
84
+ fig2 = px.line(future_df, x='Date', y='Predicted Sentiment', title='Predicted Sentiment Trend', markers=True, line_shape='spline')
85
+ st.plotly_chart(fig2)
86
+
87
+ # Sentiment distribution pie chart
88
+ st.subheader("๐Ÿ“Š Sentiment Distribution")
89
+ fig3 = px.pie(values=[sum(df['Sentiment Score'] > 0), sum(df['Sentiment Score'] <= 0)], names=['Positive', 'Negative'], title='Sentiment Distribution', hole=0.3)
90
+ st.plotly_chart(fig3)
91
+
92
+ # Sentiment scatter plot
93
+ st.subheader("๐Ÿ”Ž Sentiment Scatter Plot (Last 14 Days)")
94
+ fig4 = px.scatter(df, x='Date', y='Sentiment Score', title='Sentiment Over Time')
95
+ st.plotly_chart(fig4)
96
+
97
+ # Rolling average sentiment
98
+ st.subheader("๐Ÿ“ˆ Rolling Average of Sentiment (7-Day Window)")
99
+ df['Rolling Avg Sentiment'] = df['Sentiment Score'].rolling(window=7).mean()
100
+ fig5 = px.line(df, x='Date', y='Rolling Avg Sentiment', title="7-Day Rolling Average Sentiment")
101
+ st.plotly_chart(fig5)
102
+
103
+ # Reset button
104
+ if st.sidebar.button('๐Ÿ”„ Reset Analysis'):
105
+ st.experimental_rerun()