poemsforaphrodite commited on
Commit
bbfa3ce
Β·
verified Β·
1 Parent(s): fd8c6cf

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +614 -254
src/streamlit_app.py CHANGED
@@ -120,44 +120,70 @@ def get_gemini_summary(tweets_data, context=""):
120
  except Exception as e:
121
  return f"Error generating summary: {str(e)}"
122
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  def run_apify_comment_analysis(input):
124
  # Prepare the Actor input with exact format for Comment Analysis
125
  id = input["id"]
126
  since_date = input["since"]
127
- max_items = input["max_items"]
128
-
129
- # Get current date for the "until" parameter
130
- current_date = datetime.now().strftime("%Y-%m-%d")
131
 
132
- # Use fixed date format as specified in the example
133
  run_input = {
134
  "@": id,
135
- "filter:blue_verified": False,
136
- "filter:consumer_video": False,
137
- "filter:has_engagement": True,
138
- "filter:hashtags": False,
139
- "filter:images": False,
140
- "filter:links": False,
141
- "filter:media": False,
142
- "filter:mentions": False,
143
- "filter:native_video": False,
144
- "filter:nativeretweets": False,
145
- "filter:news": False,
146
- "filter:pro_video": False,
147
- "filter:quote": False,
148
- "filter:replies": False,
149
- "filter:safe": False,
150
- "filter:spaces": False,
151
- "filter:twimg": False,
152
- "filter:verified": False,
153
- "filter:videos": False,
154
- "filter:vine": False,
155
- "include:nativeretweets": False,
156
- "lang": "en",
157
- "since": since_date + "_23:59:59_UTC",
158
- "to": id,
159
- "until": current_date + "_23:59:59_UTC",
160
- "maxItems": max_items,
161
  "queryType": "Latest",
162
  "min_retweets": 0,
163
  "min_faves": 0,
@@ -168,33 +194,29 @@ def run_apify_comment_analysis(input):
168
  }
169
 
170
  # Show loading state
171
- with st.spinner("Fetching comments from Twitter..."):
172
  # Run the Actor and wait for it to finish
173
  run = client.actor("CJdippxWmn9uRfooo").call(run_input=run_input)
174
 
175
- # Fetch data from the run's dataset
176
  data = list(client.dataset(run["defaultDatasetId"]).iterate_items())
177
-
178
- # Ensure we don't exceed max_items
179
- if len(data) > max_items:
180
- data = data[:max_items]
181
 
182
  return data, run["defaultDatasetId"]
183
 
184
- def run_apify_account_analysis(input, disable_engagement_filters=False):
185
  # Prepare the Actor input with exact format for Account Analysis
186
  username = input["username"]
187
  since_date = input["since"]
188
- max_items = input["max_items"]
189
-
190
- # Get current date for the "until" parameter
191
- current_date = datetime.now().strftime("%Y-%m-%d")
192
 
193
- # Use the exact format provided by the user
194
  run_input = {
195
  "filter:blue_verified": False,
196
  "filter:consumer_video": False,
197
- "filter:has_engagement": True,
198
  "filter:hashtags": False,
199
  "filter:images": False,
200
  "filter:links": False,
@@ -215,33 +237,24 @@ def run_apify_account_analysis(input, disable_engagement_filters=False):
215
  "from": username,
216
  "include:nativeretweets": False,
217
  "lang": "en",
218
- "maxItems": max_items,
219
  "queryType": "Latest",
220
- "since": since_date + "_23:59:59_UTC",
221
- "until": current_date + "_23:59:59_UTC",
 
 
 
 
 
 
222
  }
223
 
224
- if not disable_engagement_filters:
225
- run_input.update({
226
- "min_faves": 500,
227
- "min_retweets": 0,
228
- "min_replies": 0,
229
- "-min_retweets": 0,
230
- "-min_faves": 0,
231
- "-min_replies": 0
232
- })
233
-
234
  # Show loading state
235
- with st.spinner("Fetching tweets from Twitter..."):
236
  # Run the Actor and wait for it to finish
237
  run = client.actor("CJdippxWmn9uRfooo").call(run_input=run_input)
238
 
239
- # Fetch data from the run's dataset
240
  data = list(client.dataset(run["defaultDatasetId"]).iterate_items())
241
-
242
- # Ensure we don't exceed max_items
243
- if len(data) > max_items:
244
- data = data[:max_items]
245
 
246
  return data, run["defaultDatasetId"]
247
 
@@ -282,13 +295,14 @@ def convert_to_ist(utc_datetime):
282
  ist_datetime = utc_datetime.astimezone(ist_tz)
283
  return ist_datetime
284
 
285
- # Function to process tweet data and create dataframe
286
- def process_tweet_data(data):
287
  processed_data = []
288
  all_hashtags = []
289
  all_mentions = []
290
  mock_data_detected = False
291
  mock_data_signature = "From KaitoEasyAPI, a reminder:Our API pricing is based on the volume of data returned."
 
292
 
293
  for item in data:
294
  text = item.get("text", "")
@@ -321,6 +335,10 @@ def process_tweet_data(data):
321
  # Get author info
322
  author = item.get("author", {})
323
 
 
 
 
 
324
  # Check if media exists
325
  has_media = False
326
  if "extendedEntities" in item and "media" in item["extendedEntities"]:
@@ -380,6 +398,7 @@ def process_tweet_data(data):
380
  metrics = {
381
  "hashtags": all_hashtags,
382
  "mentions": all_mentions,
 
383
  }
384
 
385
  return df, metrics, mock_data_detected
@@ -388,6 +407,17 @@ def process_tweet_data(data):
388
  def display_compact_analysis(df, metrics, username, dataset_id):
389
  st.subheader(f"@{username}")
390
 
 
 
 
 
 
 
 
 
 
 
 
391
  # Calculate metrics for analysis
392
  total_tweets = len(df)
393
  total_likes = df["Likes"].sum()
@@ -461,9 +491,12 @@ def display_compact_analysis(df, metrics, username, dataset_id):
461
 
462
  # Function to analyze and display the tweet data
463
  def analyze_and_display_data(data, dataset_id, analysis_type="Account"):
 
464
  if not isinstance(data, pd.DataFrame): # If raw data is passed
465
- # Process the data into a dataframe
466
- df, metrics, _ = process_tweet_data(data) # We don't need mock_data_detected here
 
 
467
  else: # If DataFrame is already processed (e.g. after retry)
468
  df = data
469
  # Recalculate metrics if df might have changed (e.g. if mock data was removed before this call)
@@ -474,7 +507,7 @@ def analyze_and_display_data(data, dataset_id, analysis_type="Account"):
474
  all_hashtags_retry.extend(row["Hashtags"].split(", "))
475
  if pd.notna(row.get("Mentions")) and row["Mentions"]:
476
  all_mentions_retry.extend(row["Mentions"].split(", "))
477
- metrics = {"hashtags": all_hashtags_retry, "mentions": all_mentions_retry}
478
 
479
  if not df.empty:
480
  # Calculate additional metrics for analysis
@@ -529,6 +562,29 @@ def analyze_and_display_data(data, dataset_id, analysis_type="Account"):
529
  left_col, right_col = st.columns([1, 1])
530
 
531
  with left_col:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
532
  st.subheader("πŸ“ˆ Key Metrics")
533
 
534
  # Basic stats
@@ -735,8 +791,8 @@ def display_tweet_list_compact(df):
735
  # Small divider
736
  st.write("---")
737
 
738
- # Function to store processed tweets into MongoDB (upsert by tweet ID)
739
- def store_to_mongodb(df, analysis_type="Account", ai_summary=None):
740
  if df.empty:
741
  return
742
  if not MONGODB_AVAILABLE:
@@ -748,19 +804,20 @@ def store_to_mongodb(df, analysis_type="Account", ai_summary=None):
748
  user_tweets = df[df['Username'] == username]
749
 
750
  # Calculate aggregated metrics (convert to native Python types for MongoDB)
 
751
  total_tweets = int(len(user_tweets))
752
- total_likes = int(user_tweets["Likes"].sum())
753
- total_retweets = int(user_tweets["Retweets"].sum())
754
- total_replies = int(user_tweets["Replies"].sum())
755
- total_bookmarks = int(user_tweets["Bookmarks"].sum())
756
- total_views = int(user_tweets["Views"].sum())
757
  total_engagement = total_likes + total_retweets + total_replies + total_bookmarks
758
  avg_engagement = float(total_engagement / total_tweets) if total_tweets > 0 else 0.0
759
 
760
  # Get all tweets as a list
761
  tweets_list = user_tweets.to_dict("records")
762
 
763
- # Create account document
764
  account_doc = {
765
  "username": username,
766
  "analysis_type": analysis_type,
@@ -774,7 +831,9 @@ def store_to_mongodb(df, analysis_type="Account", ai_summary=None):
774
  "total_engagement": total_engagement,
775
  "avg_engagement_per_tweet": avg_engagement,
776
  "tweets": tweets_list,
777
- "ai_summary": ai_summary
 
 
778
  }
779
 
780
  # Upsert by username - one document per account
@@ -786,15 +845,18 @@ def store_to_mongodb(df, analysis_type="Account", ai_summary=None):
786
 
787
  # --- Scheduler utilities ---
788
 
789
- def fetch_and_store(username, since, max_items):
790
  """Helper to fetch tweets for a username and store them in MongoDB."""
791
  try:
792
  results, _ = run_apify_account_analysis({
793
  "username": username,
794
  "since": since,
795
- "max_items": max_items
796
- }, disable_engagement_filters=True)
797
- df, _, _ = process_tweet_data(results)
 
 
 
798
 
799
  # Generate AI summary if available
800
  ai_summary = None
@@ -805,14 +867,16 @@ def fetch_and_store(username, since, max_items):
805
  except Exception as e:
806
  print(f"AI summary generation failed for @{username}: {e}")
807
 
808
- store_to_mongodb(df, "Account", ai_summary)
 
 
809
  except Exception as e:
810
  print(f"Scheduler error fetching @{username}: {e}")
811
 
812
 
813
- def schedule_fetch(usernames, since, max_items):
814
  for user in usernames:
815
- fetch_and_store(user, since, max_items)
816
 
817
 
818
  def _run_schedule_loop():
@@ -869,8 +933,14 @@ if 'id' not in st.session_state:
869
  st.session_state.id = ""
870
  if 'since' not in st.session_state:
871
  st.session_state.since = "2025-01-01"
872
- if 'max_items' not in st.session_state:
873
- st.session_state.max_items = 200
 
 
 
 
 
 
874
  if 'results' not in st.session_state:
875
  st.session_state.results = None
876
  if 'dataset_id' not in st.session_state:
@@ -885,11 +955,11 @@ if 'username2' not in st.session_state:
885
  st.session_state.username2 = ""
886
  if 'compare_since' not in st.session_state:
887
  st.session_state.compare_since = "2025-01-01"
888
- if 'compare_max_items' not in st.session_state:
889
- st.session_state.compare_max_items = 200
890
 
891
  # Create tabs
892
- tabs = st.tabs(["πŸ“Š Account Analysis", "πŸ’¬ Comment Analysis", "πŸ†š Compare", "⏰ Scheduler"])
893
 
894
  # Account Analysis tab
895
  with tabs[0]:
@@ -899,29 +969,56 @@ with tabs[0]:
899
  st.write("Analyze tweets from a specific Twitter account")
900
 
901
  # Input fields in a cleaner layout
902
- col1, col2, col3 = st.columns([3, 2, 1])
903
  with col1:
904
  st.session_state.username = st.text_input("Enter Twitter username (without @)",
905
  value=st.session_state.username,
906
  key="account_username",
907
  placeholder="e.g. elonmusk")
908
  with col2:
909
- st.session_state.since = st.date_input("Since date",
910
  value=datetime.strptime(st.session_state.since, "%Y-%m-%d")
911
  if isinstance(st.session_state.since, str)
912
  else st.session_state.since,
913
  key="account_since")
914
  with col3:
915
- st.session_state.max_items = st.number_input("Max tweets",
916
- min_value=10,
917
- max_value=1000,
918
- value=st.session_state.max_items,
919
- step=10,
920
- key="account_max_items")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
921
 
922
- # Convert date to string format
923
  if not isinstance(st.session_state.since, str):
924
  st.session_state.since = st.session_state.since.strftime("%Y-%m-%d")
 
 
925
 
926
  # Run button
927
  run_button = st.button("πŸ” Analyze Account Tweets", key="run_account", use_container_width=True)
@@ -929,33 +1026,32 @@ with tabs[0]:
929
  # Run analysis when button is clicked
930
  if run_button:
931
  if st.session_state.username:
932
- # Initial attempt
933
- st.session_state.results, st.session_state.dataset_id = run_apify_account_analysis({
934
- "username": st.session_state.username,
935
- "since": st.session_state.since,
936
- "max_items": st.session_state.max_items
937
- })
938
-
939
- # Process initial results to check for mock data
940
- processed_df, _, mock_data_detected = process_tweet_data(st.session_state.results)
941
-
942
- if mock_data_detected:
943
- st.warning("Mock data detected in the initial response, indicating no specific tweets found with current filters. Retrying without engagement filters...")
944
- # Retry without engagement filters
945
  st.session_state.results, st.session_state.dataset_id = run_apify_account_analysis({
946
- "username": st.session_state.username,
947
  "since": st.session_state.since,
948
- "max_items": st.session_state.max_items
949
- }, disable_engagement_filters=True)
950
- # Re-process the data after retry
951
- processed_df, _, _ = process_tweet_data(st.session_state.results) # mock_data_detected should be False now or data is empty
952
-
953
- if not processed_df.empty:
954
- st.success(f"Analysis complete! Found {len(processed_df)} tweets.")
955
- st.balloons()
956
- analyze_and_display_data(processed_df, st.session_state.dataset_id, "Account")
957
- else:
958
- st.warning("No results found even after adjustments. Try a different query or date range.")
 
 
 
 
 
 
 
 
959
  else:
960
  st.error("Please enter a Twitter username")
961
 
@@ -966,24 +1062,23 @@ with tabs[1]:
966
  st.write("Analyze comments directed at a specific Twitter account")
967
 
968
  # Input fields in a cleaner layout
969
- col1, col2, col3 = st.columns([3, 2, 1])
970
  with col1:
971
  tweet_id = st.text_input("Enter Twitter ID",
972
  key="comment_id",
973
  placeholder="e.g. YSJaganTrends")
974
  with col2:
975
- comment_since = st.date_input("Since date",
976
  value=datetime.strptime(st.session_state.since, "%Y-%m-%d")
977
  if isinstance(st.session_state.since, str)
978
  else st.session_state.since,
979
  key="comment_since")
980
  with col3:
981
- comment_max_items = st.number_input("Max comments",
982
- min_value=10,
983
- max_value=1000,
984
- value=st.session_state.max_items,
985
- step=10,
986
- key="comment_max_items")
987
 
988
  # Run button
989
  comment_button = st.button("πŸ” Analyze Comments", key="run_comment", use_container_width=True)
@@ -991,24 +1086,29 @@ with tabs[1]:
991
  # Run analysis when button is clicked
992
  if comment_button:
993
  if tweet_id:
994
- raw_results, dataset_id = run_apify_comment_analysis({
995
- "id": tweet_id,
996
- "since": comment_since.strftime("%Y-%m-%d"),
997
- "max_items": comment_max_items
998
- })
999
-
1000
- # Process data to remove mock tweets and get the actual count
1001
- processed_df, _, mock_data_detected = process_tweet_data(raw_results)
1002
-
1003
- if not processed_df.empty:
1004
- st.success(f"Analysis complete! Found {len(processed_df)} actual comments.")
1005
- st.balloons()
1006
- # Display the results using the processed DataFrame
1007
- analyze_and_display_data(processed_df, dataset_id, "Comment")
1008
- elif mock_data_detected and processed_df.empty:
1009
- st.warning("Mock data was returned by the API, indicating no specific comments were found for your query. Please try adjusting your parameters.")
1010
- else: # No mock data, but still empty (or raw_results was empty)
1011
- st.warning("No results found. Try a different query or date range.")
 
 
 
 
 
1012
  else:
1013
  st.error("Please enter a Twitter ID")
1014
 
@@ -1036,74 +1136,75 @@ with tabs[2]:
1036
  )
1037
 
1038
  # Shared settings
1039
- col1, col2 = st.columns([2, 1])
1040
  with col1:
1041
  # Use a different key for the date input to avoid conflicts
1042
  compare_since_date = st.date_input(
1043
- "Since date",
1044
  value=datetime.strptime(st.session_state.compare_since, "%Y-%m-%d"),
1045
  key="compare_since_dateinput"
1046
  )
1047
  st.session_state.compare_since = compare_since_date.strftime("%Y-%m-%d")
1048
  with col2:
1049
- st.session_state.compare_max_items = st.number_input(
1050
- "Max tweets per account",
1051
- min_value=10,
1052
- max_value=1000,
1053
- value=st.session_state.compare_max_items,
1054
- step=10,
1055
- key="compare_max_items_num"
1056
  )
 
1057
 
1058
  compare_button = st.button("βš–οΈ Compare Accounts", key="run_compare", use_container_width=True)
1059
 
1060
  if compare_button:
1061
  if st.session_state.username1 and st.session_state.username2:
1062
-
1063
- def fetch_and_process_user_data(username, since, max_items):
1064
- with st.spinner(f"Fetching tweets for @{username}..."):
1065
- results, dataset_id = run_apify_account_analysis({
1066
- "username": username,
1067
- "since": since,
1068
- "max_items": max_items
1069
- })
1070
- processed_df, metrics, mock_data = process_tweet_data(results)
1071
-
1072
- if mock_data:
1073
- st.warning(f"Mock data for @{username}, retrying without engagement filters...")
1074
  results, dataset_id = run_apify_account_analysis({
1075
- "username": username,
1076
  "since": since,
1077
- "max_items": max_items
1078
- }, disable_engagement_filters=True)
1079
- processed_df, metrics, _ = process_tweet_data(results)
1080
-
1081
- if not processed_df.empty:
1082
- st.success(f"Found {len(processed_df)} tweets for @{username}.")
1083
- else:
1084
- st.warning(f"No results for @{username}.")
 
 
 
 
 
 
 
 
1085
 
1086
- return processed_df, metrics, dataset_id
1087
 
1088
- col1, col2 = st.columns(2)
1089
-
1090
- with col1:
1091
- df1, metrics1, dsid1 = fetch_and_process_user_data(
1092
- st.session_state.username1,
1093
- st.session_state.compare_since,
1094
- st.session_state.compare_max_items
1095
- )
1096
- if not df1.empty:
1097
- display_compact_analysis(df1, metrics1, st.session_state.username1, dsid1)
1098
 
1099
- with col2:
1100
- df2, metrics2, dsid2 = fetch_and_process_user_data(
1101
- st.session_state.username2,
1102
- st.session_state.compare_since,
1103
- st.session_state.compare_max_items
1104
- )
1105
- if not df2.empty:
1106
- display_compact_analysis(df2, metrics2, st.session_state.username2, dsid2)
1107
 
1108
  # Display tweets side by side after the analysis
1109
  if not df1.empty or not df2.empty:
@@ -1129,8 +1230,118 @@ with tabs[2]:
1129
  else:
1130
  st.error("Please enter both Twitter usernames to compare.")
1131
 
1132
- # Scheduler tab
1133
  with tabs[3]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1134
  st.header("⏰ Daily Scheduler")
1135
  st.write("Configure daily automatic fetching of tweets and storage to MongoDB.")
1136
 
@@ -1193,13 +1404,13 @@ with tabs[3]:
1193
  st.subheader("βš™οΈ Scheduler Configuration")
1194
  usernames_input = st.text_area("Usernames to schedule (one per line)", value="\n".join(existing_users), key="sched_usernames")
1195
 
1196
- col1, col2 = st.columns(2)
1197
  with col1:
1198
- sched_since = st.date_input("Since date", value=(datetime.now() - timedelta(days=30)).date(), key="sched_since")
1199
  with col2:
1200
- sched_max_items = st.number_input("Max tweets per account", min_value=10, max_value=1000, value=200, step=10, key="sched_max_items")
1201
-
1202
- sched_time = st.time_input("Run at (24h format)", datetime.now().replace(hour=2, minute=0, second=0, microsecond=0).time(), key="sched_time")
1203
 
1204
  # Buttons row
1205
  col1, col2 = st.columns(2)
@@ -1207,23 +1418,28 @@ with tabs[3]:
1207
  if st.button("▢️ Start Scheduler", key="start_scheduler", use_container_width=True):
1208
  usernames = [u.strip() for u in usernames_input.split("\n") if u.strip()]
1209
  if usernames:
1210
- # Save/update usernames in DB
1211
- save_scheduler_usernames(usernames)
1212
-
1213
- # Clear existing jobs with tag
1214
- schedule.clear('tweet_jobs')
 
 
 
 
1215
 
1216
- def scheduled_job():
1217
- schedule_fetch(usernames, sched_since.strftime("%Y-%m-%d"), sched_max_items)
1218
 
1219
- schedule.every().day.at(sched_time.strftime("%H:%M")).tag('tweet_jobs').do(scheduled_job)
1220
- st.success(f"Scheduler started for {len(usernames)} accounts daily at {sched_time.strftime('%H:%M')}.")
 
1221
 
1222
- # Launch scheduler loop thread if not already running
1223
- if 'scheduler_thread' not in st.session_state:
1224
- thread = threading.Thread(target=_run_schedule_loop, daemon=True)
1225
- thread.start()
1226
- st.session_state.scheduler_thread = thread
1227
  else:
1228
  st.error("Please input at least one username.")
1229
 
@@ -1231,42 +1447,53 @@ with tabs[3]:
1231
  if st.button("πŸš€ Run Now", key="run_now_btn", use_container_width=True, type="secondary"):
1232
  usernames = [u.strip() for u in usernames_input.split("\n") if u.strip()]
1233
  if usernames:
1234
- with st.spinner(f"Scraping tweets for {len(usernames)} accounts..."):
1235
- try:
1236
- total_tweets = 0
1237
- for username in usernames:
1238
- with st.spinner(f"Scraping @{username}..."):
1239
- results, _ = run_apify_account_analysis({
1240
- "username": username,
1241
- "since": sched_since.strftime("%Y-%m-%d"),
1242
- "max_items": sched_max_items
1243
- }, disable_engagement_filters=True)
1244
- df, _, _ = process_tweet_data(results)
1245
- if not df.empty:
1246
- # Generate AI summary
1247
- ai_summary = None
1248
- if GENAI_AVAILABLE and GEMINI_API_KEY:
1249
- with st.spinner(f"Generating AI summary for @{username}..."):
1250
- try:
1251
- context = f"The following are account tweets for Twitter account @{username}"
1252
- ai_summary = get_gemini_summary(df, context)
1253
- except Exception as e:
1254
- st.warning(f"AI summary generation failed for @{username}: {e}")
1255
-
1256
- store_to_mongodb(df, "Account", ai_summary)
1257
- total_tweets += len(df)
1258
- summary_status = " (with AI summary)" if ai_summary else ""
1259
- st.success(f"βœ… @{username}: {len(df)} tweets scraped and stored{summary_status}")
1260
- else:
1261
- st.warning(f"⚠️ @{username}: No tweets found")
1262
-
1263
- if total_tweets > 0:
1264
- st.success(f"πŸŽ‰ Successfully scraped and stored {total_tweets} tweets from {len(usernames)} accounts!")
1265
- st.info("Data has been stored in your MongoDB DataCollector database.")
1266
- else:
1267
- st.warning("No tweets were found for any of the accounts.")
1268
- except Exception as e:
1269
- st.error(f"❌ Error during scraping: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
1270
  else:
1271
  st.error("Please input at least one username.")
1272
 
@@ -1285,6 +1512,37 @@ with tabs[3]:
1285
  st.success("Scheduler stopped. All scheduled jobs cleared.")
1286
  st.rerun()
1287
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1288
  # Show instructions for setting up Gemini
1289
  if not GENAI_AVAILABLE or not GEMINI_API_KEY:
1290
  st.sidebar.title("Setup Gemini API")
@@ -1362,6 +1620,108 @@ try:
1362
  except:
1363
  pass
1364
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1365
  # Footer with attribution
1366
  st.divider()
1367
  st.caption("Powered by Apify Twitter Scraper API β€’ Created with Streamlit β€’ AI Summaries by Google Gemini β€’ Times in Indian Standard Time (IST)")
 
120
  except Exception as e:
121
  return f"Error generating summary: {str(e)}"
122
 
123
+ # Function to extract account details from API response
124
+ def extract_account_details(author_data):
125
+ """Extract comprehensive account details from author data"""
126
+ # If no data provided (None), return empty dict
127
+ if author_data is None:
128
+ return {}
129
+
130
+ # Create account details with defaults for all fields
131
+ account_details = {
132
+ "user_id": author_data.get("id", ""),
133
+ "name": author_data.get("name", ""),
134
+ "username": author_data.get("userName", ""),
135
+ "bio": author_data.get("description", author_data.get("biography", "")),
136
+ "location": author_data.get("location", ""),
137
+ "website": author_data.get("url", ""),
138
+ "followers_count": author_data.get("followersCount", author_data.get("followers_count", author_data.get("followers", 0))),
139
+ "following_count": author_data.get("followingCount", author_data.get("following_count", author_data.get("following", 0))),
140
+ "tweet_count": author_data.get("statusesCount", author_data.get("tweet_count", 0)),
141
+ "listed_count": author_data.get("listedCount", author_data.get("listed_count", 0)),
142
+ "verified": author_data.get("verified", author_data.get("isVerified", author_data.get("isBlueVerified", False))),
143
+ "protected": author_data.get("protected", False),
144
+ "profile_image_url": author_data.get("profileImageUrl", author_data.get("profile_image_url", "")),
145
+ "profile_banner_url": author_data.get("profileBannerUrl", author_data.get("profile_banner_url", "")),
146
+ "created_at": author_data.get("createdAt", author_data.get("created_at", "")),
147
+ "favourites_count": author_data.get("favouritesCount", author_data.get("favourites_count", 0)),
148
+ "media_count": author_data.get("mediaCount", author_data.get("media_count", 0))
149
+ }
150
+
151
+ return account_details
152
+
153
  def run_apify_comment_analysis(input):
154
  # Prepare the Actor input with exact format for Comment Analysis
155
  id = input["id"]
156
  since_date = input["since"]
157
+ until_date = input.get("until", datetime.now().strftime("%Y-%m-%d")) # NEW: Add until date
 
 
 
158
 
159
+ # Use fixed date format as specified in the example - FIXED QUERY PARAMETERS
160
  run_input = {
161
  "@": id,
162
+ "filter:blue_verified": False,
163
+ "filter:consumer_video": False,
164
+ "filter:has_engagement": False, # Always False to get more comments
165
+ "filter:hashtags": False,
166
+ "filter:images": False,
167
+ "filter:links": False,
168
+ "filter:media": False,
169
+ "filter:mentions": False,
170
+ "filter:native_video": False,
171
+ "filter:nativeretweets": False,
172
+ "filter:news": False,
173
+ "filter:pro_video": False,
174
+ "filter:quote": False,
175
+ "filter:replies": False, # Keep false to get actual comments
176
+ "filter:safe": False,
177
+ "filter:spaces": False,
178
+ "filter:twimg": False,
179
+ "filter:verified": False,
180
+ "filter:videos": False,
181
+ "filter:vine": False,
182
+ "include:nativeretweets": False,
183
+ "lang": "en",
184
+ "since": since_date + "_00:00:00_UTC",
185
+ "to": id,
186
+ "until": until_date + "_23:59:59_UTC",
 
187
  "queryType": "Latest",
188
  "min_retweets": 0,
189
  "min_faves": 0,
 
194
  }
195
 
196
  # Show loading state
197
+ with st.spinner(f"Fetching comments from {since_date} to {until_date}..."):
198
  # Run the Actor and wait for it to finish
199
  run = client.actor("CJdippxWmn9uRfooo").call(run_input=run_input)
200
 
201
+ # Fetch ALL data from the run's dataset (no maxItems limit)
202
  data = list(client.dataset(run["defaultDatasetId"]).iterate_items())
 
 
 
 
203
 
204
  return data, run["defaultDatasetId"]
205
 
206
+ def run_apify_account_analysis(input, disable_engagement_filters=True):
207
  # Prepare the Actor input with exact format for Account Analysis
208
  username = input["username"]
209
  since_date = input["since"]
210
+ until_date = input.get("until", datetime.now().strftime("%Y-%m-%d")) # NEW: Add until date
211
+ min_faves = input.get("min_faves", 0) # NEW: Configurable engagement
212
+ min_retweets = input.get("min_retweets", 0) # NEW: Configurable engagement
213
+ min_replies = input.get("min_replies", 0) # NEW: Configurable engagement
214
 
215
+ # Use the exact format provided by the user - IMPROVED QUERY PARAMETERS
216
  run_input = {
217
  "filter:blue_verified": False,
218
  "filter:consumer_video": False,
219
+ "filter:has_engagement": False, # Always False for maximum tweet capture
220
  "filter:hashtags": False,
221
  "filter:images": False,
222
  "filter:links": False,
 
237
  "from": username,
238
  "include:nativeretweets": False,
239
  "lang": "en",
 
240
  "queryType": "Latest",
241
+ "since": since_date + "_00:00:00_UTC",
242
+ "until": until_date + "_23:59:59_UTC",
243
+ "min_faves": min_faves, # NEW: User configurable, default 0
244
+ "min_retweets": min_retweets, # NEW: User configurable, default 0
245
+ "min_replies": min_replies, # NEW: User configurable, default 0
246
+ "-min_retweets": 0,
247
+ "-min_faves": 0,
248
+ "-min_replies": 0
249
  }
250
 
 
 
 
 
 
 
 
 
 
 
251
  # Show loading state
252
+ with st.spinner(f"Fetching tweets from {since_date} to {until_date}..."):
253
  # Run the Actor and wait for it to finish
254
  run = client.actor("CJdippxWmn9uRfooo").call(run_input=run_input)
255
 
256
+ # Fetch ALL data from the run's dataset (no maxItems limit)
257
  data = list(client.dataset(run["defaultDatasetId"]).iterate_items())
 
 
 
 
258
 
259
  return data, run["defaultDatasetId"]
260
 
 
295
  ist_datetime = utc_datetime.astimezone(ist_tz)
296
  return ist_datetime
297
 
298
+ # Function to process tweet data and create dataframe - ENHANCED FOR ACCOUNT DETAILS
299
+ def process_tweet_data(data, extract_account_info=False):
300
  processed_data = []
301
  all_hashtags = []
302
  all_mentions = []
303
  mock_data_detected = False
304
  mock_data_signature = "From KaitoEasyAPI, a reminder:Our API pricing is based on the volume of data returned."
305
+ account_details = {}
306
 
307
  for item in data:
308
  text = item.get("text", "")
 
335
  # Get author info
336
  author = item.get("author", {})
337
 
338
+ # ENHANCED: Extract account details if requested
339
+ if extract_account_info and not account_details and author:
340
+ account_details = extract_account_details(author)
341
+
342
  # Check if media exists
343
  has_media = False
344
  if "extendedEntities" in item and "media" in item["extendedEntities"]:
 
398
  metrics = {
399
  "hashtags": all_hashtags,
400
  "mentions": all_mentions,
401
+ "account_details": account_details # ADDED: Include account details
402
  }
403
 
404
  return df, metrics, mock_data_detected
 
407
  def display_compact_analysis(df, metrics, username, dataset_id):
408
  st.subheader(f"@{username}")
409
 
410
+ # ENHANCED: Display account details if available
411
+ account_details = metrics.get("account_details", {})
412
+ if account_details:
413
+ st.markdown("##### πŸ‘€ Account Info")
414
+ if account_details.get("followers_count"):
415
+ st.metric("Followers", f"{account_details['followers_count']:,}")
416
+ if account_details.get("following_count"):
417
+ st.metric("Following", f"{account_details['following_count']:,}")
418
+ if account_details.get("verified"):
419
+ st.success("βœ… Verified")
420
+
421
  # Calculate metrics for analysis
422
  total_tweets = len(df)
423
  total_likes = df["Likes"].sum()
 
491
 
492
  # Function to analyze and display the tweet data
493
  def analyze_and_display_data(data, dataset_id, analysis_type="Account"):
494
+ raw_data = None
495
  if not isinstance(data, pd.DataFrame): # If raw data is passed
496
+ # Store raw data for sentiment analysis
497
+ raw_data = data
498
+ # Process the data into a dataframe - ENHANCED: Extract account details
499
+ df, metrics, _ = process_tweet_data(data, extract_account_info=True)
500
  else: # If DataFrame is already processed (e.g. after retry)
501
  df = data
502
  # Recalculate metrics if df might have changed (e.g. if mock data was removed before this call)
 
507
  all_hashtags_retry.extend(row["Hashtags"].split(", "))
508
  if pd.notna(row.get("Mentions")) and row["Mentions"]:
509
  all_mentions_retry.extend(row["Mentions"].split(", "))
510
+ metrics = {"hashtags": all_hashtags_retry, "mentions": all_mentions_retry, "account_details": {}}
511
 
512
  if not df.empty:
513
  # Calculate additional metrics for analysis
 
562
  left_col, right_col = st.columns([1, 1])
563
 
564
  with left_col:
565
+ # ENHANCED: Display account details if available
566
+ account_details = metrics.get("account_details", {})
567
+ if account_details:
568
+ st.subheader("πŸ‘€ Account Information")
569
+ acc_col1, acc_col2, acc_col3 = st.columns(3)
570
+ with acc_col1:
571
+ if account_details.get("followers_count"):
572
+ st.metric("Followers", f"{account_details['followers_count']:,}")
573
+ if account_details.get("following_count"):
574
+ st.metric("Following", f"{account_details['following_count']:,}")
575
+ with acc_col2:
576
+ if account_details.get("tweet_count"):
577
+ st.metric("Total Tweets (All Time)", f"{account_details['tweet_count']:,}")
578
+ if account_details.get("listed_count"):
579
+ st.metric("Listed Count", f"{account_details['listed_count']:,}")
580
+ with acc_col3:
581
+ if account_details.get("verified"):
582
+ st.success("βœ… Verified Account")
583
+ if account_details.get("bio"):
584
+ st.write(f"**Bio:** {account_details['bio']}")
585
+
586
+ st.divider()
587
+
588
  st.subheader("πŸ“ˆ Key Metrics")
589
 
590
  # Basic stats
 
791
  # Small divider
792
  st.write("---")
793
 
794
+ # Function to store processed tweets into MongoDB (upsert by tweet ID) - ENHANCED FOR RAW DATA
795
+ def store_to_mongodb(df, analysis_type="Account", ai_summary=None, raw_data=None, account_details=None):
796
  if df.empty:
797
  return
798
  if not MONGODB_AVAILABLE:
 
804
  user_tweets = df[df['Username'] == username]
805
 
806
  # Calculate aggregated metrics (convert to native Python types for MongoDB)
807
+ # Handle missing columns gracefully
808
  total_tweets = int(len(user_tweets))
809
+ total_likes = int(user_tweets.get("Likes", pd.Series([0])).sum()) if "Likes" in user_tweets.columns else 0
810
+ total_retweets = int(user_tweets.get("Retweets", pd.Series([0])).sum()) if "Retweets" in user_tweets.columns else 0
811
+ total_replies = int(user_tweets.get("Replies", pd.Series([0])).sum()) if "Replies" in user_tweets.columns else 0
812
+ total_bookmarks = int(user_tweets.get("Bookmarks", pd.Series([0])).sum()) if "Bookmarks" in user_tweets.columns else 0
813
+ total_views = int(user_tweets.get("Views", pd.Series([0])).sum()) if "Views" in user_tweets.columns else 0
814
  total_engagement = total_likes + total_retweets + total_replies + total_bookmarks
815
  avg_engagement = float(total_engagement / total_tweets) if total_tweets > 0 else 0.0
816
 
817
  # Get all tweets as a list
818
  tweets_list = user_tweets.to_dict("records")
819
 
820
+ # ENHANCED: Create account document with raw data and account details
821
  account_doc = {
822
  "username": username,
823
  "analysis_type": analysis_type,
 
831
  "total_engagement": total_engagement,
832
  "avg_engagement_per_tweet": avg_engagement,
833
  "tweets": tweets_list,
834
+ "ai_summary": ai_summary,
835
+ "raw_tweets": raw_data if raw_data else [], # ADDED: Store raw data for sentiment analysis
836
+ "account_details": account_details if account_details else {} # ADDED: Store account details
837
  }
838
 
839
  # Upsert by username - one document per account
 
845
 
846
  # --- Scheduler utilities ---
847
 
848
+ def fetch_and_store(username, since, until):
849
  """Helper to fetch tweets for a username and store them in MongoDB."""
850
  try:
851
  results, _ = run_apify_account_analysis({
852
  "username": username,
853
  "since": since,
854
+ "until": until,
855
+ "min_faves": 0,
856
+ "min_retweets": 0,
857
+ "min_replies": 0
858
+ })
859
+ df, metrics, _ = process_tweet_data(results, extract_account_info=True)
860
 
861
  # Generate AI summary if available
862
  ai_summary = None
 
867
  except Exception as e:
868
  print(f"AI summary generation failed for @{username}: {e}")
869
 
870
+ # ENHANCED: Store with raw data and account details
871
+ account_details = metrics.get("account_details", {})
872
+ store_to_mongodb(df, "Account", ai_summary, raw_data=results, account_details=account_details)
873
  except Exception as e:
874
  print(f"Scheduler error fetching @{username}: {e}")
875
 
876
 
877
+ def schedule_fetch(usernames, since, until):
878
  for user in usernames:
879
+ fetch_and_store(user, since, until)
880
 
881
 
882
  def _run_schedule_loop():
 
933
  st.session_state.id = ""
934
  if 'since' not in st.session_state:
935
  st.session_state.since = "2025-01-01"
936
+ if 'until' not in st.session_state:
937
+ st.session_state.until = datetime.now().strftime("%Y-%m-%d")
938
+ if 'min_faves' not in st.session_state:
939
+ st.session_state.min_faves = 0
940
+ if 'min_retweets' not in st.session_state:
941
+ st.session_state.min_retweets = 0
942
+ if 'min_replies' not in st.session_state:
943
+ st.session_state.min_replies = 0
944
  if 'results' not in st.session_state:
945
  st.session_state.results = None
946
  if 'dataset_id' not in st.session_state:
 
955
  st.session_state.username2 = ""
956
  if 'compare_since' not in st.session_state:
957
  st.session_state.compare_since = "2025-01-01"
958
+ if 'compare_until' not in st.session_state:
959
+ st.session_state.compare_until = datetime.now().strftime("%Y-%m-%d")
960
 
961
  # Create tabs
962
+ tabs = st.tabs(["πŸ“Š Account Analysis", "πŸ’¬ Comment Analysis", "πŸ†š Compare", "πŸ‘₯ Followers", "⏰ Scheduler"])
963
 
964
  # Account Analysis tab
965
  with tabs[0]:
 
969
  st.write("Analyze tweets from a specific Twitter account")
970
 
971
  # Input fields in a cleaner layout
972
+ col1, col2, col3 = st.columns([3, 2, 2])
973
  with col1:
974
  st.session_state.username = st.text_input("Enter Twitter username (without @)",
975
  value=st.session_state.username,
976
  key="account_username",
977
  placeholder="e.g. elonmusk")
978
  with col2:
979
+ st.session_state.since = st.date_input("Start date",
980
  value=datetime.strptime(st.session_state.since, "%Y-%m-%d")
981
  if isinstance(st.session_state.since, str)
982
  else st.session_state.since,
983
  key="account_since")
984
  with col3:
985
+ st.session_state.until = st.date_input("End date",
986
+ value=datetime.strptime(st.session_state.until, "%Y-%m-%d")
987
+ if isinstance(st.session_state.until, str)
988
+ else st.session_state.until,
989
+ key="account_until")
990
+
991
+ # Optional engagement filters
992
+ with st.expander("βš™οΈ Advanced Filters (Optional)", expanded=False):
993
+ st.info("All filters are set to 0 by default to capture maximum tweets. Increase values to filter for more engaging content.")
994
+ col1, col2, col3 = st.columns(3)
995
+ with col1:
996
+ st.session_state.min_faves = st.number_input("Minimum Likes",
997
+ min_value=0,
998
+ max_value=10000,
999
+ value=st.session_state.min_faves,
1000
+ step=10,
1001
+ key="account_min_faves")
1002
+ with col2:
1003
+ st.session_state.min_retweets = st.number_input("Minimum Retweets",
1004
+ min_value=0,
1005
+ max_value=1000,
1006
+ value=st.session_state.min_retweets,
1007
+ step=5,
1008
+ key="account_min_retweets")
1009
+ with col3:
1010
+ st.session_state.min_replies = st.number_input("Minimum Replies",
1011
+ min_value=0,
1012
+ max_value=1000,
1013
+ value=st.session_state.min_replies,
1014
+ step=5,
1015
+ key="account_min_replies")
1016
 
1017
+ # Convert dates to string format
1018
  if not isinstance(st.session_state.since, str):
1019
  st.session_state.since = st.session_state.since.strftime("%Y-%m-%d")
1020
+ if not isinstance(st.session_state.until, str):
1021
+ st.session_state.until = st.session_state.until.strftime("%Y-%m-%d")
1022
 
1023
  # Run button
1024
  run_button = st.button("πŸ” Analyze Account Tweets", key="run_account", use_container_width=True)
 
1026
  # Run analysis when button is clicked
1027
  if run_button:
1028
  if st.session_state.username:
1029
+ # Validate date range
1030
+ if st.session_state.since > st.session_state.until:
1031
+ st.error("Start date must be before end date.")
1032
+ else:
 
 
 
 
 
 
 
 
 
1033
  st.session_state.results, st.session_state.dataset_id = run_apify_account_analysis({
1034
+ "username": st.session_state.username,
1035
  "since": st.session_state.since,
1036
+ "until": st.session_state.until,
1037
+ "min_faves": st.session_state.min_faves,
1038
+ "min_retweets": st.session_state.min_retweets,
1039
+ "min_replies": st.session_state.min_replies
1040
+ })
1041
+
1042
+ # Process results to check for mock data
1043
+ processed_df, _, mock_data_detected = process_tweet_data(st.session_state.results, extract_account_info=True)
1044
+
1045
+ if mock_data_detected:
1046
+ st.warning("Mock data detected in the response, indicating limited results. This may be due to strict filters or no tweets in the date range.")
1047
+
1048
+ if not processed_df.empty:
1049
+ date_range = f"{st.session_state.since} to {st.session_state.until}"
1050
+ st.success(f"Analysis complete! Found {len(processed_df)} tweets from {date_range}.")
1051
+ st.balloons()
1052
+ analyze_and_display_data(processed_df, st.session_state.dataset_id, "Account")
1053
+ else:
1054
+ st.warning("No results found. Try a different date range or reduce the engagement filters.")
1055
  else:
1056
  st.error("Please enter a Twitter username")
1057
 
 
1062
  st.write("Analyze comments directed at a specific Twitter account")
1063
 
1064
  # Input fields in a cleaner layout
1065
+ col1, col2, col3 = st.columns([3, 2, 2])
1066
  with col1:
1067
  tweet_id = st.text_input("Enter Twitter ID",
1068
  key="comment_id",
1069
  placeholder="e.g. YSJaganTrends")
1070
  with col2:
1071
+ comment_since = st.date_input("Start date",
1072
  value=datetime.strptime(st.session_state.since, "%Y-%m-%d")
1073
  if isinstance(st.session_state.since, str)
1074
  else st.session_state.since,
1075
  key="comment_since")
1076
  with col3:
1077
+ comment_until = st.date_input("End date",
1078
+ value=datetime.strptime(st.session_state.until, "%Y-%m-%d")
1079
+ if isinstance(st.session_state.until, str)
1080
+ else st.session_state.until,
1081
+ key="comment_until")
 
1082
 
1083
  # Run button
1084
  comment_button = st.button("πŸ” Analyze Comments", key="run_comment", use_container_width=True)
 
1086
  # Run analysis when button is clicked
1087
  if comment_button:
1088
  if tweet_id:
1089
+ # Validate date range
1090
+ if comment_since > comment_until:
1091
+ st.error("Start date must be before end date.")
1092
+ else:
1093
+ raw_results, dataset_id = run_apify_comment_analysis({
1094
+ "id": tweet_id,
1095
+ "since": comment_since.strftime("%Y-%m-%d"),
1096
+ "until": comment_until.strftime("%Y-%m-%d")
1097
+ })
1098
+
1099
+ # Process data to remove mock tweets and get the actual count
1100
+ processed_df, _, mock_data_detected = process_tweet_data(raw_results)
1101
+
1102
+ if not processed_df.empty:
1103
+ date_range = f"{comment_since.strftime('%Y-%m-%d')} to {comment_until.strftime('%Y-%m-%d')}"
1104
+ st.success(f"Analysis complete! Found {len(processed_df)} actual comments from {date_range}.")
1105
+ st.balloons()
1106
+ # Display the results using the processed DataFrame
1107
+ analyze_and_display_data(processed_df, dataset_id, "Comment")
1108
+ elif mock_data_detected and processed_df.empty:
1109
+ st.warning("Mock data was returned by the API, indicating no specific comments were found for your query. Please try adjusting your date range.")
1110
+ else: # No mock data, but still empty (or raw_results was empty)
1111
+ st.warning("No results found. Try a different query or date range.")
1112
  else:
1113
  st.error("Please enter a Twitter ID")
1114
 
 
1136
  )
1137
 
1138
  # Shared settings
1139
+ col1, col2 = st.columns([1, 1])
1140
  with col1:
1141
  # Use a different key for the date input to avoid conflicts
1142
  compare_since_date = st.date_input(
1143
+ "Start date",
1144
  value=datetime.strptime(st.session_state.compare_since, "%Y-%m-%d"),
1145
  key="compare_since_dateinput"
1146
  )
1147
  st.session_state.compare_since = compare_since_date.strftime("%Y-%m-%d")
1148
  with col2:
1149
+ compare_until_date = st.date_input(
1150
+ "End date",
1151
+ value=datetime.strptime(st.session_state.compare_until, "%Y-%m-%d"),
1152
+ key="compare_until_dateinput"
 
 
 
1153
  )
1154
+ st.session_state.compare_until = compare_until_date.strftime("%Y-%m-%d")
1155
 
1156
  compare_button = st.button("βš–οΈ Compare Accounts", key="run_compare", use_container_width=True)
1157
 
1158
  if compare_button:
1159
  if st.session_state.username1 and st.session_state.username2:
1160
+ # Validate date range
1161
+ if st.session_state.compare_since > st.session_state.compare_until:
1162
+ st.error("Start date must be before end date.")
1163
+ else:
1164
+ def fetch_and_process_user_data(username, since, until):
1165
+ date_range = f"{since} to {until}"
1166
+ with st.spinner(f"Fetching tweets for @{username} from {date_range}..."):
 
 
 
 
 
1167
  results, dataset_id = run_apify_account_analysis({
1168
+ "username": username,
1169
  "since": since,
1170
+ "until": until,
1171
+ "min_faves": 0,
1172
+ "min_retweets": 0,
1173
+ "min_replies": 0
1174
+ })
1175
+ processed_df, metrics, mock_data = process_tweet_data(results, extract_account_info=True)
1176
+
1177
+ if mock_data:
1178
+ st.warning(f"Mock data detected for @{username}, indicating limited results in the date range.")
1179
+
1180
+ if not processed_df.empty:
1181
+ account_details = metrics.get("account_details", {})
1182
+ followers_info = f" | {account_details.get('followers_count', 'N/A')} followers" if account_details.get('followers_count') else ""
1183
+ st.success(f"Found {len(processed_df)} tweets for @{username} from {date_range}{followers_info}.")
1184
+ else:
1185
+ st.warning(f"No results for @{username} in the specified date range.")
1186
 
1187
+ return processed_df, metrics, dataset_id
1188
 
1189
+ col1, col2 = st.columns(2)
1190
+
1191
+ with col1:
1192
+ df1, metrics1, dsid1 = fetch_and_process_user_data(
1193
+ st.session_state.username1,
1194
+ st.session_state.compare_since,
1195
+ st.session_state.compare_until
1196
+ )
1197
+ if not df1.empty:
1198
+ display_compact_analysis(df1, metrics1, st.session_state.username1, dsid1)
1199
 
1200
+ with col2:
1201
+ df2, metrics2, dsid2 = fetch_and_process_user_data(
1202
+ st.session_state.username2,
1203
+ st.session_state.compare_since,
1204
+ st.session_state.compare_until
1205
+ )
1206
+ if not df2.empty:
1207
+ display_compact_analysis(df2, metrics2, st.session_state.username2, dsid2)
1208
 
1209
  # Display tweets side by side after the analysis
1210
  if not df1.empty or not df2.empty:
 
1230
  else:
1231
  st.error("Please enter both Twitter usernames to compare.")
1232
 
1233
+ # Followers tab
1234
  with tabs[3]:
1235
+ st.header("πŸ‘₯ Followers & Following Analysis")
1236
+ st.write("Analyze followers and following lists for any Twitter account")
1237
+
1238
+ # Input fields
1239
+ col1, col2, col3 = st.columns([3, 2, 2])
1240
+ with col1:
1241
+ followers_username = st.text_input(
1242
+ "Enter Twitter username (without @)",
1243
+ key="followers_username",
1244
+ placeholder="e.g. JanaSenaParty"
1245
+ )
1246
+ with col2:
1247
+ relationship_type = st.selectbox(
1248
+ "Analysis Type",
1249
+ ["followers", "following"],
1250
+ key="relationship_type"
1251
+ )
1252
+ with col3:
1253
+ max_users = st.number_input(
1254
+ "Max Users to Fetch",
1255
+ min_value=10,
1256
+ max_value=1000,
1257
+ value=100,
1258
+ step=10,
1259
+ key="max_followers"
1260
+ )
1261
+
1262
+ # Analyze button
1263
+ followers_button = st.button("πŸ‘₯ Analyze Followers/Following", key="run_followers", use_container_width=True)
1264
+
1265
+ if followers_button:
1266
+ if followers_username:
1267
+ try:
1268
+ # Fetch followers/following data
1269
+ data, dataset_id = run_apify_followers_analysis({
1270
+ "username": followers_username,
1271
+ "relationship_type": relationship_type,
1272
+ "max_items": max_users
1273
+ })
1274
+
1275
+ if data:
1276
+ # Process the data
1277
+ df = process_followers_data(data, relationship_type)
1278
+
1279
+ if not df.empty:
1280
+ st.success(f"Found {len(df)} {relationship_type} for @{followers_username}")
1281
+ st.balloons()
1282
+
1283
+ # Display statistics
1284
+ col1, col2, col3, col4 = st.columns(4)
1285
+ with col1:
1286
+ st.metric("Total Users", len(df))
1287
+ with col2:
1288
+ verified_count = df['Verified'].sum()
1289
+ st.metric("Verified Users", verified_count)
1290
+ with col3:
1291
+ avg_followers = df['Followers'].mean()
1292
+ st.metric("Avg Followers", f"{avg_followers:,.0f}")
1293
+ with col4:
1294
+ avg_following = df['Following'].mean()
1295
+ st.metric("Avg Following", f"{avg_following:,.0f}")
1296
+
1297
+ # Show top users by followers
1298
+ st.subheader(f"πŸ” Top {relationship_type.title()} by Followers")
1299
+ top_users = df.nlargest(10, 'Followers')[['Username', 'Name', 'Followers', 'Following', 'Verified']]
1300
+ st.dataframe(top_users, use_container_width=True)
1301
+
1302
+ # Download CSV
1303
+ csv = df.to_csv(index=False).encode('utf-8')
1304
+ st.download_button(
1305
+ f"πŸ“₯ Download {relationship_type.title()} CSV",
1306
+ csv,
1307
+ f"{followers_username}_{relationship_type}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
1308
+ "text/csv",
1309
+ key=f"download-{relationship_type}",
1310
+ use_container_width=True
1311
+ )
1312
+
1313
+ # Show detailed list
1314
+ st.subheader(f"πŸ“‹ All {relationship_type.title()}")
1315
+ with st.expander(f"View all {len(df)} {relationship_type}"):
1316
+ for _, user in df.iterrows():
1317
+ with st.container():
1318
+ col1, col2 = st.columns([1, 4])
1319
+ with col1:
1320
+ if user['Profile_Image']:
1321
+ st.image(user['Profile_Image'], width=50)
1322
+ with col2:
1323
+ verified_badge = " βœ…" if user['Verified'] else ""
1324
+ st.write(f"**@{user['Username']}** ({user['Name']}){verified_badge}")
1325
+ st.caption(f"πŸ‘₯ {user['Followers']:,} followers β€’ {user['Following']:,} following")
1326
+ if user['Bio']:
1327
+ st.caption(f"πŸ“ {user['Bio'][:100]}..." if len(user['Bio']) > 100 else user['Bio'])
1328
+ st.divider()
1329
+ else:
1330
+ st.warning(f"No {relationship_type} data found for @{followers_username}")
1331
+ else:
1332
+ st.warning(f"No {relationship_type} data could be retrieved for @{followers_username}")
1333
+ st.info("This may be due to:")
1334
+ st.write("β€’ Account being private/protected")
1335
+ st.write("β€’ Account having no followers/following")
1336
+ st.write("β€’ API limitations or temporary issues")
1337
+
1338
+ except Exception as e:
1339
+ st.error(f"Error fetching {relationship_type}: {str(e)}")
1340
+ else:
1341
+ st.error("Please enter a Twitter username")
1342
+
1343
+ # Scheduler tab
1344
+ with tabs[4]:
1345
  st.header("⏰ Daily Scheduler")
1346
  st.write("Configure daily automatic fetching of tweets and storage to MongoDB.")
1347
 
 
1404
  st.subheader("βš™οΈ Scheduler Configuration")
1405
  usernames_input = st.text_area("Usernames to schedule (one per line)", value="\n".join(existing_users), key="sched_usernames")
1406
 
1407
+ col1, col2, col3 = st.columns(3)
1408
  with col1:
1409
+ sched_since = st.date_input("Start date", value=(datetime.now() - timedelta(days=30)).date(), key="sched_since")
1410
  with col2:
1411
+ sched_until = st.date_input("End date", value=datetime.now().date(), key="sched_until")
1412
+ with col3:
1413
+ sched_time = st.time_input("Run at (24h format)", datetime.now().replace(hour=2, minute=0, second=0, microsecond=0).time(), key="sched_time")
1414
 
1415
  # Buttons row
1416
  col1, col2 = st.columns(2)
 
1418
  if st.button("▢️ Start Scheduler", key="start_scheduler", use_container_width=True):
1419
  usernames = [u.strip() for u in usernames_input.split("\n") if u.strip()]
1420
  if usernames:
1421
+ # Validate date range
1422
+ if sched_since > sched_until:
1423
+ st.error("Start date must be before end date.")
1424
+ else:
1425
+ # Save/update usernames in DB
1426
+ save_scheduler_usernames(usernames)
1427
+
1428
+ # Clear existing jobs with tag
1429
+ schedule.clear('tweet_jobs')
1430
 
1431
+ def scheduled_job():
1432
+ schedule_fetch(usernames, sched_since.strftime("%Y-%m-%d"), sched_until.strftime("%Y-%m-%d"))
1433
 
1434
+ schedule.every().day.at(sched_time.strftime("%H:%M")).tag('tweet_jobs').do(scheduled_job)
1435
+ date_range = f"{sched_since.strftime('%Y-%m-%d')} to {sched_until.strftime('%Y-%m-%d')}"
1436
+ st.success(f"Scheduler started for {len(usernames)} accounts daily at {sched_time.strftime('%H:%M')} for date range {date_range}.")
1437
 
1438
+ # Launch scheduler loop thread if not already running
1439
+ if 'scheduler_thread' not in st.session_state:
1440
+ thread = threading.Thread(target=_run_schedule_loop, daemon=True)
1441
+ thread.start()
1442
+ st.session_state.scheduler_thread = thread
1443
  else:
1444
  st.error("Please input at least one username.")
1445
 
 
1447
  if st.button("πŸš€ Run Now", key="run_now_btn", use_container_width=True, type="secondary"):
1448
  usernames = [u.strip() for u in usernames_input.split("\n") if u.strip()]
1449
  if usernames:
1450
+ # Validate date range
1451
+ if sched_since > sched_until:
1452
+ st.error("Start date must be before end date.")
1453
+ else:
1454
+ date_range = f"{sched_since.strftime('%Y-%m-%d')} to {sched_until.strftime('%Y-%m-%d')}"
1455
+ with st.spinner(f"Scraping tweets for {len(usernames)} accounts from {date_range}..."):
1456
+ try:
1457
+ total_tweets = 0
1458
+ for username in usernames:
1459
+ with st.spinner(f"Scraping @{username} from {date_range}..."):
1460
+ results, _ = run_apify_account_analysis({
1461
+ "username": username,
1462
+ "since": sched_since.strftime("%Y-%m-%d"),
1463
+ "until": sched_until.strftime("%Y-%m-%d"),
1464
+ "min_faves": 0,
1465
+ "min_retweets": 0,
1466
+ "min_replies": 0
1467
+ })
1468
+ df, metrics, _ = process_tweet_data(results, extract_account_info=True)
1469
+ if not df.empty:
1470
+ # Generate AI summary
1471
+ ai_summary = None
1472
+ if GENAI_AVAILABLE and GEMINI_API_KEY:
1473
+ with st.spinner(f"Generating AI summary for @{username}..."):
1474
+ try:
1475
+ context = f"The following are account tweets for Twitter account @{username}"
1476
+ ai_summary = get_gemini_summary(df, context)
1477
+ except Exception as e:
1478
+ st.warning(f"AI summary generation failed for @{username}: {e}")
1479
+
1480
+ # ENHANCED: Store with raw data and account details
1481
+ account_details = metrics.get("account_details", {})
1482
+ store_to_mongodb(df, "Account", ai_summary, raw_data=results, account_details=account_details)
1483
+ total_tweets += len(df)
1484
+ summary_status = " (with AI summary)" if ai_summary else ""
1485
+ account_info = f" | Followers: {account_details.get('followers_count', 'N/A')}" if account_details.get('followers_count') else ""
1486
+ st.success(f"βœ… @{username}: {len(df)} tweets scraped and stored from {date_range}{summary_status}{account_info}")
1487
+ else:
1488
+ st.warning(f"⚠️ @{username}: No tweets found in the specified date range")
1489
+
1490
+ if total_tweets > 0:
1491
+ st.success(f"πŸŽ‰ Successfully scraped and stored {total_tweets} tweets from {len(usernames)} accounts in date range {date_range}!")
1492
+ st.info("Data has been stored in your MongoDB DataCollector database.")
1493
+ else:
1494
+ st.warning("No tweets were found for any of the accounts in the specified date range.")
1495
+ except Exception as e:
1496
+ st.error(f"❌ Error during scraping: {str(e)}")
1497
  else:
1498
  st.error("Please input at least one username.")
1499
 
 
1512
  st.success("Scheduler stopped. All scheduled jobs cleared.")
1513
  st.rerun()
1514
 
1515
+ # ENHANCED: Show API limitations and setup instructions
1516
+ st.sidebar.title("πŸ“‹ API Notes & Features")
1517
+ st.sidebar.info(
1518
+ """
1519
+ **New Features:**
1520
+
1521
+ βœ… **Date Range Fetching:** All tweets between start and end dates are fetched (no max limit)
1522
+
1523
+ βœ… **Account Analysis:** Comprehensive account details shown in all analysis views
1524
+
1525
+ βœ… **Zero Engagement Filters:** Default engagement filters set to 0 for maximum tweet capture
1526
+
1527
+ βš™οΈ **Optional Filters:** Users can set custom engagement thresholds if desired
1528
+
1529
+ **Known Limitations:**
1530
+
1531
+ 🚫 **Tweet-level comment replies** are not available due to Twitter API restrictions. Only direct comments to the main account are fetched.
1532
+
1533
+ ⚠️ **Tweet count discrepancies** may occur due to:
1534
+ - Private/protected tweets
1535
+ - Deleted tweets
1536
+ - API rate limiting
1537
+ - Account restrictions
1538
+
1539
+ πŸ’‘ **Tips for better results:**
1540
+ - Use appropriate date ranges
1541
+ - Keep engagement filters at 0 (default) for maximum capture
1542
+ - Use broader time periods for more comprehensive data
1543
+ """
1544
+ )
1545
+
1546
  # Show instructions for setting up Gemini
1547
  if not GENAI_AVAILABLE or not GEMINI_API_KEY:
1548
  st.sidebar.title("Setup Gemini API")
 
1620
  except:
1621
  pass
1622
 
1623
+ def run_apify_followers_analysis(input):
1624
+ """
1625
+ Fetch followers/following data using Apify actor
1626
+ This is a placeholder for when the followers actor works
1627
+ """
1628
+ username = input["username"]
1629
+ relationship_type = input.get("relationship_type", "followers") # "followers" or "following"
1630
+ max_items = input.get("max_items", 100)
1631
+
1632
+ # Try the followers actor first
1633
+ try:
1634
+ if relationship_type == "followers":
1635
+ run_input = {
1636
+ "twitterHandles": [username],
1637
+ "maxItems": max_items,
1638
+ "getFollowers": True,
1639
+ "getFollowing": False,
1640
+ "getRetweeters": False,
1641
+ "includeUnavailableUsers": False,
1642
+ }
1643
+ else: # following
1644
+ run_input = {
1645
+ "twitterHandles": [username],
1646
+ "maxItems": max_items,
1647
+ "getFollowers": False,
1648
+ "getFollowing": True,
1649
+ "getRetweeters": False,
1650
+ "includeUnavailableUsers": False,
1651
+ }
1652
+
1653
+ with st.spinner(f"Fetching {relationship_type} for @{username}..."):
1654
+ # Try the actor you specified
1655
+ run = client.actor("V38PZzpEgOfeeWvZY").call(run_input=run_input)
1656
+ data = list(client.dataset(run["defaultDatasetId"]).iterate_items())
1657
+
1658
+ if data:
1659
+ return data, run["defaultDatasetId"]
1660
+ else:
1661
+ # Fallback: Use alternative followers scraper
1662
+ return run_apify_followers_fallback(input)
1663
+
1664
+ except Exception as e:
1665
+ st.warning(f"Primary followers actor failed: {e}")
1666
+ # Fallback to alternative scraper
1667
+ return run_apify_followers_fallback(input)
1668
+
1669
+ def run_apify_followers_fallback(input):
1670
+ """
1671
+ Fallback method using alternative followers scraper
1672
+ """
1673
+ username = input["username"]
1674
+ relationship_type = input.get("relationship_type", "followers")
1675
+ max_items = input.get("max_items", 100)
1676
+
1677
+ try:
1678
+ # Use curious_coder/twitter-scraper as fallback
1679
+ run_input = {
1680
+ "profileUrl": f"https://twitter.com/{username}",
1681
+ "friendshipType": relationship_type, # "followers" or "following"
1682
+ "count": max_items,
1683
+ "minDelay": 1,
1684
+ "maxDelay": 3
1685
+ }
1686
+
1687
+ with st.spinner(f"Fetching {relationship_type} for @{username} (fallback method)..."):
1688
+ run = client.actor("curious_coder/twitter-scraper").call(run_input=run_input)
1689
+ data = list(client.dataset(run["defaultDatasetId"]).iterate_items())
1690
+ return data, run["defaultDatasetId"]
1691
+
1692
+ except Exception as e:
1693
+ st.error(f"All followers scrapers failed: {e}")
1694
+ return [], None
1695
+
1696
+ def process_followers_data(data, relationship_type="followers"):
1697
+ """
1698
+ Process followers/following data into a structured format
1699
+ """
1700
+ processed_data = []
1701
+
1702
+ for item in data:
1703
+ # Handle different data structures from different actors
1704
+ username = item.get('username', item.get('screen_name', item.get('userName', '')))
1705
+ name = item.get('name', item.get('displayName', ''))
1706
+
1707
+ processed_item = {
1708
+ "Username": username,
1709
+ "Name": name,
1710
+ "Bio": item.get('description', item.get('bio', '')),
1711
+ "Location": item.get('location', ''),
1712
+ "Followers": item.get('followers_count', item.get('followersCount', item.get('followers', 0))),
1713
+ "Following": item.get('following_count', item.get('followingCount', item.get('following', 0))),
1714
+ "Tweets": item.get('tweet_count', item.get('statusesCount', item.get('statuses_count', 0))),
1715
+ "Verified": item.get('verified', item.get('isVerified', False)),
1716
+ "Profile_Image": item.get('profile_image_url', item.get('profileImageUrl', '')),
1717
+ "Created_At": item.get('created_at', item.get('createdAt', '')),
1718
+ "URL": item.get('url', f"https://twitter.com/{username}"),
1719
+ "Relationship_Type": relationship_type
1720
+ }
1721
+ processed_data.append(processed_item)
1722
+
1723
+ return pd.DataFrame(processed_data)
1724
+
1725
  # Footer with attribution
1726
  st.divider()
1727
  st.caption("Powered by Apify Twitter Scraper API β€’ Created with Streamlit β€’ AI Summaries by Google Gemini β€’ Times in Indian Standard Time (IST)")