poemsforaphrodite commited on
Commit
303bb41
·
verified ·
1 Parent(s): ff34b27

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +106 -43
src/streamlit_app.py CHANGED
@@ -2,10 +2,11 @@ import os
2
  import streamlit as st
3
  import pandas as pd
4
  import json
5
- from datetime import datetime
6
  import plotly.express as px
7
  import numpy as np
8
  from collections import Counter
 
9
 
10
  # Try to import Google Generative AI, but handle it gracefully if not installed
11
  try:
@@ -53,7 +54,7 @@ def get_gemini_summary(tweets_data, context=""):
53
 
54
  all_tweets = "\n\n".join(tweets_text)
55
 
56
- # Create a prompt for Gemini
57
  prompt = f"""
58
  {context}
59
 
@@ -61,14 +62,24 @@ def get_gemini_summary(tweets_data, context=""):
61
 
62
  {all_tweets}
63
 
64
- Please provide a comprehensive summary of these tweets, including:
 
65
  1. Main themes and topics discussed
66
  2. Overall sentiment
67
  3. Key insights or patterns
68
  4. Most engaging content
69
- 5. Any recommendations based on the analysis
70
 
71
- Format the summary in a clear, structured way with bullet points where appropriate.
 
 
 
 
 
 
 
 
 
 
72
  """
73
 
74
  # Generate summary using Gemini
@@ -85,34 +96,37 @@ def run_apify_comment_analysis(input):
85
  since_date = input["since"]
86
  max_items = input["max_items"]
87
 
 
 
 
88
  # Use fixed date format as specified in the example
89
  run_input = {
90
  "@": id,
91
- "filter:blue_verified": False,
92
- "filter:consumer_video": False,
93
- "filter:has_engagement": True,
94
- "filter:hashtags": False,
95
- "filter:images": False,
96
- "filter:links": False,
97
- "filter:media": False,
98
- "filter:mentions": False,
99
- "filter:native_video": False,
100
- "filter:nativeretweets": False,
101
- "filter:news": False,
102
- "filter:pro_video": False,
103
- "filter:quote": False,
104
- "filter:replies": False,
105
- "filter:safe": False,
106
- "filter:spaces": False,
107
- "filter:twimg": False,
108
- "filter:verified": False,
109
- "filter:videos": False,
110
- "filter:vine": False,
111
- "include:nativeretweets": False,
112
- "lang": "en",
113
  "since": since_date + "_23:59:59_UTC",
114
- "to": id,
115
- "until": "2025-12-31_23:59:59_UTC",
116
  "maxItems": max_items,
117
  "queryType": "Latest",
118
  "min_retweets": 0,
@@ -130,6 +144,10 @@ def run_apify_comment_analysis(input):
130
 
131
  # Fetch data from the run's dataset
132
  data = list(client.dataset(run["defaultDatasetId"]).iterate_items())
 
 
 
 
133
 
134
  return data, run["defaultDatasetId"]
135
 
@@ -139,6 +157,9 @@ def run_apify_account_analysis(input):
139
  since_date = input["since"]
140
  max_items = input["max_items"]
141
 
 
 
 
142
  # Use the exact format provided by the user
143
  run_input = {
144
  "filter:blue_verified": False,
@@ -168,7 +189,7 @@ def run_apify_account_analysis(input):
168
  "min_faves": 500,
169
  "queryType": "Latest",
170
  "since": since_date + "_23:59:59_UTC",
171
- "until": "2025-12-31_23:59:59_UTC",
172
  "min_retweets": 0,
173
  "min_replies": 0,
174
  "-min_retweets": 0,
@@ -180,9 +201,13 @@ def run_apify_account_analysis(input):
180
  with st.spinner("Fetching tweets from Twitter..."):
181
  # Run the Actor and wait for it to finish
182
  run = client.actor("CJdippxWmn9uRfooo").call(run_input=run_input)
183
-
184
  # Fetch data from the run's dataset
185
  data = list(client.dataset(run["defaultDatasetId"]).iterate_items())
 
 
 
 
186
 
187
  return data, run["defaultDatasetId"]
188
 
@@ -206,6 +231,23 @@ def extract_mentions(text):
206
  mentions = [word[1:] for word in words if word.startswith('@')]
207
  return mentions
208
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
  # Function to process tweet data and create dataframe
210
  def process_tweet_data(data):
211
  processed_data = []
@@ -219,11 +261,15 @@ def process_tweet_data(data):
219
  try:
220
  # Try to parse the Twitter date format
221
  date_obj = datetime.strptime(date_str, "%a %b %d %H:%M:%S %z %Y")
222
- formatted_date = date_obj.strftime("%Y-%m-%d %H:%M:%S")
223
- date_only = date_obj.strftime("%Y-%m-%d")
224
- time_only = date_obj.strftime("%H:%M")
225
- hour = date_obj.hour
226
- day_of_week = date_obj.strftime("%A")
 
 
 
 
227
  except:
228
  formatted_date = date_str
229
  date_only = ""
@@ -255,6 +301,9 @@ def process_tweet_data(data):
255
  # Calculate tweet length
256
  tweet_length = len(text) if text else 0
257
 
 
 
 
258
  processed_item = {
259
  "Date": formatted_date,
260
  "Date_Only": date_only,
@@ -269,6 +318,7 @@ def process_tweet_data(data):
269
  "Likes": item.get("likeCount", 0),
270
  "Retweets": item.get("retweetCount", 0),
271
  "Replies": item.get("replyCount", 0),
 
272
  "Views": item.get("viewCount", 0),
273
  "URL": item.get("url", ""),
274
  "Is_Reply": item.get("isReply", False),
@@ -304,15 +354,16 @@ def analyze_and_display_data(data, dataset_id, analysis_type="Account"):
304
  total_likes = df["Likes"].sum()
305
  total_retweets = df["Retweets"].sum()
306
  total_replies = df["Replies"].sum()
 
307
  total_views = df["Views"].sum()
308
 
309
  # Engagement metrics
310
- total_engagement = total_likes + total_retweets + total_replies
311
  avg_engagement_per_tweet = total_engagement / total_tweets if total_tweets > 0 else 0
312
  engagement_rate = (total_engagement / total_views * 100) if total_views > 0 else 0
313
 
314
  # Find most engaging tweet
315
- df["Engagement"] = df["Likes"] + df["Retweets"] + df["Replies"]
316
  most_engaging_tweet = df.loc[df["Engagement"].idxmax()] if not df.empty else None
317
 
318
  # Tweet type breakdown
@@ -362,8 +413,8 @@ def analyze_and_display_data(data, dataset_id, analysis_type="Account"):
362
  st.metric("Total Retweets", f"{total_retweets:,}")
363
  st.metric("Total Replies", f"{total_replies:,}")
364
  with col3:
 
365
  st.metric("Total Views", f"{total_views:,}")
366
- st.metric("Total Engagement", f"{total_engagement:,}")
367
 
368
  # Engagement metrics
369
  st.subheader("⚡ Engagement Analysis")
@@ -426,7 +477,7 @@ def analyze_and_display_data(data, dataset_id, analysis_type="Account"):
426
  st.write(most_engaging_tweet['Text'])
427
 
428
  # Display metrics in a row
429
- cols = st.columns(4)
430
  with cols[0]:
431
  st.write(f"💬 {most_engaging_tweet['Replies']}")
432
  with cols[1]:
@@ -434,6 +485,8 @@ def analyze_and_display_data(data, dataset_id, analysis_type="Account"):
434
  with cols[2]:
435
  st.write(f"❤️ {most_engaging_tweet['Likes']}")
436
  with cols[3]:
 
 
437
  st.write(f"👁️ {most_engaging_tweet['Views']}")
438
 
439
  # Link to original tweet
@@ -453,7 +506,7 @@ def analyze_and_display_data(data, dataset_id, analysis_type="Account"):
453
  # Tweets by hour of day
454
  if not df_by_hour.empty:
455
  fig_by_hour = px.bar(df_by_hour, x="Hour", y="Count",
456
- title="Tweets by Hour of Day",
457
  labels={"Hour": "Hour (24h format)", "Count": "Number of Tweets"})
458
  st.plotly_chart(fig_by_hour, use_container_width=True)
459
 
@@ -503,7 +556,7 @@ def display_tweet_list(df):
503
  st.write(row['Text'])
504
 
505
  # Display metrics in a row
506
- cols = st.columns(4)
507
  with cols[0]:
508
  st.write(f"💬 {row['Replies']}")
509
  with cols[1]:
@@ -511,6 +564,8 @@ def display_tweet_list(df):
511
  with cols[2]:
512
  st.write(f"❤️ {row['Likes']}")
513
  with cols[3]:
 
 
514
  st.write(f"👁️ {row['Views']}")
515
 
516
  # Indicate if tweet has media without showing it
@@ -676,12 +731,20 @@ try:
676
  with open("requirements.txt", "r") as f:
677
  requirements = f.read()
678
 
 
 
679
  if "google-generativeai" not in requirements:
680
  with open("requirements.txt", "a") as f:
681
  f.write("\ngoogle-generativeai>=0.3.0\n")
 
 
 
 
 
 
682
  except:
683
  pass
684
 
685
  # Footer with attribution
686
  st.divider()
687
- st.caption("Powered by Apify Twitter Scraper API • Created with Streamlit • AI Summaries by Google Gemini")
 
2
  import streamlit as st
3
  import pandas as pd
4
  import json
5
+ from datetime import datetime, timedelta
6
  import plotly.express as px
7
  import numpy as np
8
  from collections import Counter
9
+ import pytz
10
 
11
  # Try to import Google Generative AI, but handle it gracefully if not installed
12
  try:
 
54
 
55
  all_tweets = "\n\n".join(tweets_text)
56
 
57
+ # Create a prompt for Gemini with enhanced analysis requirements
58
  prompt = f"""
59
  {context}
60
 
 
62
 
63
  {all_tweets}
64
 
65
+ Please provide a comprehensive analysis of these tweets, including:
66
+
67
  1. Main themes and topics discussed
68
  2. Overall sentiment
69
  3. Key insights or patterns
70
  4. Most engaging content
 
71
 
72
+ Additionally, please provide these specific analyses:
73
+
74
+ 5. Political/Brand Affiliation Analysis: Analyze which party or brand the reply tweeters belong to. Identify if there are instances where people from the same party/brand are tweeting negatively about their own party/brand.
75
+
76
+ 6. Top 10 Positive Tweets: List the most positive tweets with their tweet numbers and brief explanation.
77
+
78
+ 7. Top 10 Negative Tweets: List the most negative tweets with their tweet numbers and brief explanation.
79
+
80
+ 8. Top 10 Recommendations: Provide specific suggestions and recommendations to help the party or brand improve their messaging, engagement, or content strategy based on the tweet analysis.
81
+
82
+ Format the analysis in a clear, structured way with bullet points where appropriate and clear section headings.
83
  """
84
 
85
  # Generate summary using Gemini
 
96
  since_date = input["since"]
97
  max_items = input["max_items"]
98
 
99
+ # Get current date for the "until" parameter
100
+ current_date = datetime.now().strftime("%Y-%m-%d")
101
+
102
  # Use fixed date format as specified in the example
103
  run_input = {
104
  "@": id,
105
+ "filter:blue_verified": False,
106
+ "filter:consumer_video": False,
107
+ "filter:has_engagement": True,
108
+ "filter:hashtags": False,
109
+ "filter:images": False,
110
+ "filter:links": False,
111
+ "filter:media": False,
112
+ "filter:mentions": False,
113
+ "filter:native_video": False,
114
+ "filter:nativeretweets": False,
115
+ "filter:news": False,
116
+ "filter:pro_video": False,
117
+ "filter:quote": False,
118
+ "filter:replies": False,
119
+ "filter:safe": False,
120
+ "filter:spaces": False,
121
+ "filter:twimg": False,
122
+ "filter:verified": False,
123
+ "filter:videos": False,
124
+ "filter:vine": False,
125
+ "include:nativeretweets": False,
126
+ "lang": "en",
127
  "since": since_date + "_23:59:59_UTC",
128
+ "to": id,
129
+ "until": current_date + "_23:59:59_UTC",
130
  "maxItems": max_items,
131
  "queryType": "Latest",
132
  "min_retweets": 0,
 
144
 
145
  # Fetch data from the run's dataset
146
  data = list(client.dataset(run["defaultDatasetId"]).iterate_items())
147
+
148
+ # Ensure we don't exceed max_items
149
+ if len(data) > max_items:
150
+ data = data[:max_items]
151
 
152
  return data, run["defaultDatasetId"]
153
 
 
157
  since_date = input["since"]
158
  max_items = input["max_items"]
159
 
160
+ # Get current date for the "until" parameter
161
+ current_date = datetime.now().strftime("%Y-%m-%d")
162
+
163
  # Use the exact format provided by the user
164
  run_input = {
165
  "filter:blue_verified": False,
 
189
  "min_faves": 500,
190
  "queryType": "Latest",
191
  "since": since_date + "_23:59:59_UTC",
192
+ "until": current_date + "_23:59:59_UTC",
193
  "min_retweets": 0,
194
  "min_replies": 0,
195
  "-min_retweets": 0,
 
201
  with st.spinner("Fetching tweets from Twitter..."):
202
  # Run the Actor and wait for it to finish
203
  run = client.actor("CJdippxWmn9uRfooo").call(run_input=run_input)
204
+
205
  # Fetch data from the run's dataset
206
  data = list(client.dataset(run["defaultDatasetId"]).iterate_items())
207
+
208
+ # Ensure we don't exceed max_items
209
+ if len(data) > max_items:
210
+ data = data[:max_items]
211
 
212
  return data, run["defaultDatasetId"]
213
 
 
231
  mentions = [word[1:] for word in words if word.startswith('@')]
232
  return mentions
233
 
234
+ # Function to convert UTC time to Indian Standard Time (IST)
235
+ def convert_to_ist(utc_datetime):
236
+ if not utc_datetime:
237
+ return None
238
+
239
+ # Create timezone objects
240
+ utc_tz = pytz.timezone('UTC')
241
+ ist_tz = pytz.timezone('Asia/Kolkata')
242
+
243
+ # If datetime is naive, make it timezone-aware with UTC
244
+ if utc_datetime.tzinfo is None:
245
+ utc_datetime = utc_tz.localize(utc_datetime)
246
+
247
+ # Convert to IST
248
+ ist_datetime = utc_datetime.astimezone(ist_tz)
249
+ return ist_datetime
250
+
251
  # Function to process tweet data and create dataframe
252
  def process_tweet_data(data):
253
  processed_data = []
 
261
  try:
262
  # Try to parse the Twitter date format
263
  date_obj = datetime.strptime(date_str, "%a %b %d %H:%M:%S %z %Y")
264
+
265
+ # Convert to IST
266
+ ist_date_obj = convert_to_ist(date_obj)
267
+
268
+ formatted_date = ist_date_obj.strftime("%Y-%m-%d %H:%M:%S")
269
+ date_only = ist_date_obj.strftime("%Y-%m-%d")
270
+ time_only = ist_date_obj.strftime("%H:%M")
271
+ hour = ist_date_obj.hour
272
+ day_of_week = ist_date_obj.strftime("%A")
273
  except:
274
  formatted_date = date_str
275
  date_only = ""
 
301
  # Calculate tweet length
302
  tweet_length = len(text) if text else 0
303
 
304
+ # Get bookmarks count if available
305
+ bookmarks = item.get("bookmarkCount", 0)
306
+
307
  processed_item = {
308
  "Date": formatted_date,
309
  "Date_Only": date_only,
 
318
  "Likes": item.get("likeCount", 0),
319
  "Retweets": item.get("retweetCount", 0),
320
  "Replies": item.get("replyCount", 0),
321
+ "Bookmarks": bookmarks,
322
  "Views": item.get("viewCount", 0),
323
  "URL": item.get("url", ""),
324
  "Is_Reply": item.get("isReply", False),
 
354
  total_likes = df["Likes"].sum()
355
  total_retweets = df["Retweets"].sum()
356
  total_replies = df["Replies"].sum()
357
+ total_bookmarks = df["Bookmarks"].sum()
358
  total_views = df["Views"].sum()
359
 
360
  # Engagement metrics
361
+ total_engagement = total_likes + total_retweets + total_replies + total_bookmarks
362
  avg_engagement_per_tweet = total_engagement / total_tweets if total_tweets > 0 else 0
363
  engagement_rate = (total_engagement / total_views * 100) if total_views > 0 else 0
364
 
365
  # Find most engaging tweet
366
+ df["Engagement"] = df["Likes"] + df["Retweets"] + df["Replies"] + df["Bookmarks"]
367
  most_engaging_tweet = df.loc[df["Engagement"].idxmax()] if not df.empty else None
368
 
369
  # Tweet type breakdown
 
413
  st.metric("Total Retweets", f"{total_retweets:,}")
414
  st.metric("Total Replies", f"{total_replies:,}")
415
  with col3:
416
+ st.metric("Total Bookmarks", f"{total_bookmarks:,}")
417
  st.metric("Total Views", f"{total_views:,}")
 
418
 
419
  # Engagement metrics
420
  st.subheader("⚡ Engagement Analysis")
 
477
  st.write(most_engaging_tweet['Text'])
478
 
479
  # Display metrics in a row
480
+ cols = st.columns(5)
481
  with cols[0]:
482
  st.write(f"💬 {most_engaging_tweet['Replies']}")
483
  with cols[1]:
 
485
  with cols[2]:
486
  st.write(f"❤️ {most_engaging_tweet['Likes']}")
487
  with cols[3]:
488
+ st.write(f"🔖 {most_engaging_tweet['Bookmarks']}")
489
+ with cols[4]:
490
  st.write(f"👁️ {most_engaging_tweet['Views']}")
491
 
492
  # Link to original tweet
 
506
  # Tweets by hour of day
507
  if not df_by_hour.empty:
508
  fig_by_hour = px.bar(df_by_hour, x="Hour", y="Count",
509
+ title="Tweets by Hour of Day (Indian Time)",
510
  labels={"Hour": "Hour (24h format)", "Count": "Number of Tweets"})
511
  st.plotly_chart(fig_by_hour, use_container_width=True)
512
 
 
556
  st.write(row['Text'])
557
 
558
  # Display metrics in a row
559
+ cols = st.columns(5)
560
  with cols[0]:
561
  st.write(f"💬 {row['Replies']}")
562
  with cols[1]:
 
564
  with cols[2]:
565
  st.write(f"❤️ {row['Likes']}")
566
  with cols[3]:
567
+ st.write(f"🔖 {row['Bookmarks']}")
568
+ with cols[4]:
569
  st.write(f"👁️ {row['Views']}")
570
 
571
  # Indicate if tweet has media without showing it
 
731
  with open("requirements.txt", "r") as f:
732
  requirements = f.read()
733
 
734
+ updated_requirements = False
735
+
736
  if "google-generativeai" not in requirements:
737
  with open("requirements.txt", "a") as f:
738
  f.write("\ngoogle-generativeai>=0.3.0\n")
739
+ updated_requirements = True
740
+
741
+ if "pytz" not in requirements:
742
+ with open("requirements.txt", "a") as f:
743
+ f.write("\npytz\n")
744
+ updated_requirements = True
745
  except:
746
  pass
747
 
748
  # Footer with attribution
749
  st.divider()
750
+ st.caption("Powered by Apify Twitter Scraper API • Created with Streamlit • AI Summaries by Google Gemini • Times in Indian Standard Time (IST)")