Soundaryasos commited on
Commit
0b45956
·
verified ·
1 Parent(s): 079e67d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -15
app.py CHANGED
@@ -16,42 +16,60 @@ from googleapiclient.discovery import build
16
  import warnings
17
  warnings.filterwarnings('ignore')
18
 
 
19
  np.random.seed(42)
20
  tf.random.set_seed(42)
21
 
 
22
  st.set_page_config(page_title="Sentiment Pulse", layout="wide")
23
  st.markdown("<h1 style='text-align: center; color: #7B68EE;'>Sentiment Pulse: Multi-Platform Analysis</h1>", unsafe_allow_html=True)
24
 
 
25
  REDDIT_CLIENT_ID = "S7pTXhj5JDFGDb3-_zrJEA"
26
  REDDIT_CLIENT_SECRET = "QP3NYN4lrAKVLrBamzLGrpFywiVg8w"
27
  REDDIT_USER_AGENT = "SoundaryaR_Bot/1.0"
28
  YOUTUBE_API_KEY = "AIzaSyAChqXPaiNE9hKhApkgjgonzdgiCCOo"
29
 
 
30
  reddit = praw.Reddit(client_id=REDDIT_CLIENT_ID, client_secret=REDDIT_CLIENT_SECRET, user_agent=REDDIT_USER_AGENT)
31
  youtube = build('youtube', 'v3', developerKey=YOUTUBE_API_KEY)
 
 
32
  bert_classifier = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
33
  vader_analyzer = SentimentIntensityAnalyzer()
34
 
35
  @st.cache_data
36
  def load_twitter_data():
37
- df = pd.read_csv("twitter_dataset.csv", encoding='latin-1',
38
- names=['sentiment', 'id', 'date', 'query', 'user', 'text'])
39
- df['date'] = pd.to_datetime(df['date'])
40
- df['sentiment'] = df['sentiment'].map({0: 'negative', 4: 'positive'})
41
- return df.sample(10000)
 
 
 
 
42
 
43
  def fetch_reddit_data(keyword):
44
- subreddit = reddit.subreddit("all")
45
- posts = subreddit.search(keyword, limit=100)
46
- return pd.DataFrame([{'date': datetime.fromtimestamp(post.created_utc), 'text': post.title + " " + post.selftext} for post in posts])
 
 
 
 
47
 
48
  def fetch_youtube_data(keyword):
49
- request = youtube.search().list(q=keyword, part="snippet", maxResults=50, type="video")
50
- response = request.execute()
51
- return pd.DataFrame([{
52
- 'date': datetime.strptime(item['snippet']['publishedAt'], "%Y-%m-%dT%H:%M:%SZ"),
53
- 'text': item['snippet']['title'] + " " + item['snippet']['description']
54
- } for item in response['items']])
 
 
 
 
55
 
56
  def get_bert_sentiment(text):
57
  try:
@@ -70,9 +88,11 @@ def combined_sentiment(text):
70
  avg_score = (bert_score + abs(vader_score)) / 2
71
  return 1 if avg_score > 0.5 else 0, avg_score
72
 
 
73
  st.sidebar.title("Keyword Search")
74
  keyword = st.sidebar.text_input("Enter a keyword (e.g., 'happy')", value="happy")
75
 
 
76
  twitter_df = load_twitter_data()
77
  twitter_filtered = twitter_df[twitter_df['text'].str.contains(keyword, case=False, na=False)]
78
  reddit_df = fetch_reddit_data(keyword)
@@ -103,6 +123,7 @@ else:
103
  st.warning(f"Not enough {platform} data for prediction.")
104
  fig, ax = plt.subplots()
105
  ax.plot(daily_sentiment['date'], daily_sentiment['combined_score'], label='Historical')
 
106
  st.pyplot(fig)
107
  continue
108
 
@@ -159,4 +180,4 @@ else:
159
  explainer = shap.Explainer(lr_model, X_lr)
160
  shap_values = explainer(X_lr)
161
  shap.plots.beeswarm(shap_values, show=False)
162
- st.pyplot(plt.gcf())
 
16
  import warnings
17
  warnings.filterwarnings('ignore')
18
 
19
+ # Set random seeds for reproducibility
20
  np.random.seed(42)
21
  tf.random.set_seed(42)
22
 
23
+ # Streamlit page configuration
24
  st.set_page_config(page_title="Sentiment Pulse", layout="wide")
25
  st.markdown("<h1 style='text-align: center; color: #7B68EE;'>Sentiment Pulse: Multi-Platform Analysis</h1>", unsafe_allow_html=True)
26
 
27
+ # API credentials (replace with your own if needed)
28
  REDDIT_CLIENT_ID = "S7pTXhj5JDFGDb3-_zrJEA"
29
  REDDIT_CLIENT_SECRET = "QP3NYN4lrAKVLrBamzLGrpFywiVg8w"
30
  REDDIT_USER_AGENT = "SoundaryaR_Bot/1.0"
31
  YOUTUBE_API_KEY = "AIzaSyAChqXPaiNE9hKhApkgjgonzdgiCCOo"
32
 
33
+ # Initialize APIs
34
  reddit = praw.Reddit(client_id=REDDIT_CLIENT_ID, client_secret=REDDIT_CLIENT_SECRET, user_agent=REDDIT_USER_AGENT)
35
  youtube = build('youtube', 'v3', developerKey=YOUTUBE_API_KEY)
36
+
37
+ # Load sentiment analysis models
38
  bert_classifier = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
39
  vader_analyzer = SentimentIntensityAnalyzer()
40
 
41
  @st.cache_data
42
  def load_twitter_data():
43
+ try:
44
+ df = pd.read_csv("twitter_dataset.csv", encoding='latin-1',
45
+ names=['sentiment', 'id', 'date', 'query', 'user', 'text'])
46
+ df['date'] = pd.to_datetime(df['date'], errors='coerce')
47
+ df['sentiment'] = df['sentiment'].map({0: 'negative', 4: 'positive'})
48
+ return df.sample(10000, random_state=42)
49
+ except FileNotFoundError:
50
+ st.error("twitter_dataset.csv not found. Please ensure the file is in the working directory.")
51
+ return pd.DataFrame()
52
 
53
  def fetch_reddit_data(keyword):
54
+ try:
55
+ subreddit = reddit.subreddit("all")
56
+ posts = subreddit.search(keyword, limit=100)
57
+ return pd.DataFrame([{'date': datetime.fromtimestamp(post.created_utc), 'text': post.title + " " + post.selftext} for post in posts])
58
+ except Exception as e:
59
+ st.error(f"Error fetching Reddit data: {e}")
60
+ return pd.DataFrame()
61
 
62
  def fetch_youtube_data(keyword):
63
+ try:
64
+ request = youtube.search().list(q=keyword, part="snippet", maxResults=50, type="video")
65
+ response = request.execute()
66
+ return pd.DataFrame([{
67
+ 'date': datetime.strptime(item['snippet']['publishedAt'], "%Y-%m-%dT%H:%M:%SZ"),
68
+ 'text': item['snippet']['title'] + " " + item['snippet']['description']
69
+ } for item in response['items']])
70
+ except Exception as e:
71
+ st.error(f"Error fetching YouTube data: {e}")
72
+ return pd.DataFrame()
73
 
74
  def get_bert_sentiment(text):
75
  try:
 
88
  avg_score = (bert_score + abs(vader_score)) / 2
89
  return 1 if avg_score > 0.5 else 0, avg_score
90
 
91
+ # Sidebar for keyword input
92
  st.sidebar.title("Keyword Search")
93
  keyword = st.sidebar.text_input("Enter a keyword (e.g., 'happy')", value="happy")
94
 
95
+ # Load and filter data
96
  twitter_df = load_twitter_data()
97
  twitter_filtered = twitter_df[twitter_df['text'].str.contains(keyword, case=False, na=False)]
98
  reddit_df = fetch_reddit_data(keyword)
 
123
  st.warning(f"Not enough {platform} data for prediction.")
124
  fig, ax = plt.subplots()
125
  ax.plot(daily_sentiment['date'], daily_sentiment['combined_score'], label='Historical')
126
+ ax.legend()
127
  st.pyplot(fig)
128
  continue
129
 
 
180
  explainer = shap.Explainer(lr_model, X_lr)
181
  shap_values = explainer(X_lr)
182
  shap.plots.beeswarm(shap_values, show=False)
183
+ st.pyplot(plt.gcf())