Spaces:
Paused
Paused
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +614 -254
src/streamlit_app.py
CHANGED
|
@@ -120,44 +120,70 @@ def get_gemini_summary(tweets_data, context=""):
|
|
| 120 |
except Exception as e:
|
| 121 |
return f"Error generating summary: {str(e)}"
|
| 122 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
def run_apify_comment_analysis(input):
|
| 124 |
# Prepare the Actor input with exact format for Comment Analysis
|
| 125 |
id = input["id"]
|
| 126 |
since_date = input["since"]
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
# Get current date for the "until" parameter
|
| 130 |
-
current_date = datetime.now().strftime("%Y-%m-%d")
|
| 131 |
|
| 132 |
-
# Use fixed date format as specified in the example
|
| 133 |
run_input = {
|
| 134 |
"@": id,
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
"since": since_date + "
|
| 158 |
-
|
| 159 |
-
"until":
|
| 160 |
-
"maxItems": max_items,
|
| 161 |
"queryType": "Latest",
|
| 162 |
"min_retweets": 0,
|
| 163 |
"min_faves": 0,
|
|
@@ -168,33 +194,29 @@ def run_apify_comment_analysis(input):
|
|
| 168 |
}
|
| 169 |
|
| 170 |
# Show loading state
|
| 171 |
-
with st.spinner("Fetching comments from
|
| 172 |
# Run the Actor and wait for it to finish
|
| 173 |
run = client.actor("CJdippxWmn9uRfooo").call(run_input=run_input)
|
| 174 |
|
| 175 |
-
# Fetch data from the run's dataset
|
| 176 |
data = list(client.dataset(run["defaultDatasetId"]).iterate_items())
|
| 177 |
-
|
| 178 |
-
# Ensure we don't exceed max_items
|
| 179 |
-
if len(data) > max_items:
|
| 180 |
-
data = data[:max_items]
|
| 181 |
|
| 182 |
return data, run["defaultDatasetId"]
|
| 183 |
|
| 184 |
-
def run_apify_account_analysis(input, disable_engagement_filters=
|
| 185 |
# Prepare the Actor input with exact format for Account Analysis
|
| 186 |
username = input["username"]
|
| 187 |
since_date = input["since"]
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
|
| 193 |
-
# Use the exact format provided by the user
|
| 194 |
run_input = {
|
| 195 |
"filter:blue_verified": False,
|
| 196 |
"filter:consumer_video": False,
|
| 197 |
-
"filter:has_engagement":
|
| 198 |
"filter:hashtags": False,
|
| 199 |
"filter:images": False,
|
| 200 |
"filter:links": False,
|
|
@@ -215,33 +237,24 @@ def run_apify_account_analysis(input, disable_engagement_filters=False):
|
|
| 215 |
"from": username,
|
| 216 |
"include:nativeretweets": False,
|
| 217 |
"lang": "en",
|
| 218 |
-
"maxItems": max_items,
|
| 219 |
"queryType": "Latest",
|
| 220 |
-
"since": since_date + "
|
| 221 |
-
"until":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 222 |
}
|
| 223 |
|
| 224 |
-
if not disable_engagement_filters:
|
| 225 |
-
run_input.update({
|
| 226 |
-
"min_faves": 500,
|
| 227 |
-
"min_retweets": 0,
|
| 228 |
-
"min_replies": 0,
|
| 229 |
-
"-min_retweets": 0,
|
| 230 |
-
"-min_faves": 0,
|
| 231 |
-
"-min_replies": 0
|
| 232 |
-
})
|
| 233 |
-
|
| 234 |
# Show loading state
|
| 235 |
-
with st.spinner("Fetching tweets from
|
| 236 |
# Run the Actor and wait for it to finish
|
| 237 |
run = client.actor("CJdippxWmn9uRfooo").call(run_input=run_input)
|
| 238 |
|
| 239 |
-
# Fetch data from the run's dataset
|
| 240 |
data = list(client.dataset(run["defaultDatasetId"]).iterate_items())
|
| 241 |
-
|
| 242 |
-
# Ensure we don't exceed max_items
|
| 243 |
-
if len(data) > max_items:
|
| 244 |
-
data = data[:max_items]
|
| 245 |
|
| 246 |
return data, run["defaultDatasetId"]
|
| 247 |
|
|
@@ -282,13 +295,14 @@ def convert_to_ist(utc_datetime):
|
|
| 282 |
ist_datetime = utc_datetime.astimezone(ist_tz)
|
| 283 |
return ist_datetime
|
| 284 |
|
| 285 |
-
# Function to process tweet data and create dataframe
|
| 286 |
-
def process_tweet_data(data):
|
| 287 |
processed_data = []
|
| 288 |
all_hashtags = []
|
| 289 |
all_mentions = []
|
| 290 |
mock_data_detected = False
|
| 291 |
mock_data_signature = "From KaitoEasyAPI, a reminder:Our API pricing is based on the volume of data returned."
|
|
|
|
| 292 |
|
| 293 |
for item in data:
|
| 294 |
text = item.get("text", "")
|
|
@@ -321,6 +335,10 @@ def process_tweet_data(data):
|
|
| 321 |
# Get author info
|
| 322 |
author = item.get("author", {})
|
| 323 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 324 |
# Check if media exists
|
| 325 |
has_media = False
|
| 326 |
if "extendedEntities" in item and "media" in item["extendedEntities"]:
|
|
@@ -380,6 +398,7 @@ def process_tweet_data(data):
|
|
| 380 |
metrics = {
|
| 381 |
"hashtags": all_hashtags,
|
| 382 |
"mentions": all_mentions,
|
|
|
|
| 383 |
}
|
| 384 |
|
| 385 |
return df, metrics, mock_data_detected
|
|
@@ -388,6 +407,17 @@ def process_tweet_data(data):
|
|
| 388 |
def display_compact_analysis(df, metrics, username, dataset_id):
|
| 389 |
st.subheader(f"@{username}")
|
| 390 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 391 |
# Calculate metrics for analysis
|
| 392 |
total_tweets = len(df)
|
| 393 |
total_likes = df["Likes"].sum()
|
|
@@ -461,9 +491,12 @@ def display_compact_analysis(df, metrics, username, dataset_id):
|
|
| 461 |
|
| 462 |
# Function to analyze and display the tweet data
|
| 463 |
def analyze_and_display_data(data, dataset_id, analysis_type="Account"):
|
|
|
|
| 464 |
if not isinstance(data, pd.DataFrame): # If raw data is passed
|
| 465 |
-
#
|
| 466 |
-
|
|
|
|
|
|
|
| 467 |
else: # If DataFrame is already processed (e.g. after retry)
|
| 468 |
df = data
|
| 469 |
# Recalculate metrics if df might have changed (e.g. if mock data was removed before this call)
|
|
@@ -474,7 +507,7 @@ def analyze_and_display_data(data, dataset_id, analysis_type="Account"):
|
|
| 474 |
all_hashtags_retry.extend(row["Hashtags"].split(", "))
|
| 475 |
if pd.notna(row.get("Mentions")) and row["Mentions"]:
|
| 476 |
all_mentions_retry.extend(row["Mentions"].split(", "))
|
| 477 |
-
metrics = {"hashtags": all_hashtags_retry, "mentions": all_mentions_retry}
|
| 478 |
|
| 479 |
if not df.empty:
|
| 480 |
# Calculate additional metrics for analysis
|
|
@@ -529,6 +562,29 @@ def analyze_and_display_data(data, dataset_id, analysis_type="Account"):
|
|
| 529 |
left_col, right_col = st.columns([1, 1])
|
| 530 |
|
| 531 |
with left_col:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 532 |
st.subheader("π Key Metrics")
|
| 533 |
|
| 534 |
# Basic stats
|
|
@@ -735,8 +791,8 @@ def display_tweet_list_compact(df):
|
|
| 735 |
# Small divider
|
| 736 |
st.write("---")
|
| 737 |
|
| 738 |
-
# Function to store processed tweets into MongoDB (upsert by tweet ID)
|
| 739 |
-
def store_to_mongodb(df, analysis_type="Account", ai_summary=None):
|
| 740 |
if df.empty:
|
| 741 |
return
|
| 742 |
if not MONGODB_AVAILABLE:
|
|
@@ -748,19 +804,20 @@ def store_to_mongodb(df, analysis_type="Account", ai_summary=None):
|
|
| 748 |
user_tweets = df[df['Username'] == username]
|
| 749 |
|
| 750 |
# Calculate aggregated metrics (convert to native Python types for MongoDB)
|
|
|
|
| 751 |
total_tweets = int(len(user_tweets))
|
| 752 |
-
total_likes = int(user_tweets
|
| 753 |
-
total_retweets = int(user_tweets
|
| 754 |
-
total_replies = int(user_tweets
|
| 755 |
-
total_bookmarks = int(user_tweets
|
| 756 |
-
total_views = int(user_tweets
|
| 757 |
total_engagement = total_likes + total_retweets + total_replies + total_bookmarks
|
| 758 |
avg_engagement = float(total_engagement / total_tweets) if total_tweets > 0 else 0.0
|
| 759 |
|
| 760 |
# Get all tweets as a list
|
| 761 |
tweets_list = user_tweets.to_dict("records")
|
| 762 |
|
| 763 |
-
# Create account document
|
| 764 |
account_doc = {
|
| 765 |
"username": username,
|
| 766 |
"analysis_type": analysis_type,
|
|
@@ -774,7 +831,9 @@ def store_to_mongodb(df, analysis_type="Account", ai_summary=None):
|
|
| 774 |
"total_engagement": total_engagement,
|
| 775 |
"avg_engagement_per_tweet": avg_engagement,
|
| 776 |
"tweets": tweets_list,
|
| 777 |
-
"ai_summary": ai_summary
|
|
|
|
|
|
|
| 778 |
}
|
| 779 |
|
| 780 |
# Upsert by username - one document per account
|
|
@@ -786,15 +845,18 @@ def store_to_mongodb(df, analysis_type="Account", ai_summary=None):
|
|
| 786 |
|
| 787 |
# --- Scheduler utilities ---
|
| 788 |
|
| 789 |
-
def fetch_and_store(username, since,
|
| 790 |
"""Helper to fetch tweets for a username and store them in MongoDB."""
|
| 791 |
try:
|
| 792 |
results, _ = run_apify_account_analysis({
|
| 793 |
"username": username,
|
| 794 |
"since": since,
|
| 795 |
-
"
|
| 796 |
-
|
| 797 |
-
|
|
|
|
|
|
|
|
|
|
| 798 |
|
| 799 |
# Generate AI summary if available
|
| 800 |
ai_summary = None
|
|
@@ -805,14 +867,16 @@ def fetch_and_store(username, since, max_items):
|
|
| 805 |
except Exception as e:
|
| 806 |
print(f"AI summary generation failed for @{username}: {e}")
|
| 807 |
|
| 808 |
-
|
|
|
|
|
|
|
| 809 |
except Exception as e:
|
| 810 |
print(f"Scheduler error fetching @{username}: {e}")
|
| 811 |
|
| 812 |
|
| 813 |
-
def schedule_fetch(usernames, since,
|
| 814 |
for user in usernames:
|
| 815 |
-
fetch_and_store(user, since,
|
| 816 |
|
| 817 |
|
| 818 |
def _run_schedule_loop():
|
|
@@ -869,8 +933,14 @@ if 'id' not in st.session_state:
|
|
| 869 |
st.session_state.id = ""
|
| 870 |
if 'since' not in st.session_state:
|
| 871 |
st.session_state.since = "2025-01-01"
|
| 872 |
-
if '
|
| 873 |
-
st.session_state.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 874 |
if 'results' not in st.session_state:
|
| 875 |
st.session_state.results = None
|
| 876 |
if 'dataset_id' not in st.session_state:
|
|
@@ -885,11 +955,11 @@ if 'username2' not in st.session_state:
|
|
| 885 |
st.session_state.username2 = ""
|
| 886 |
if 'compare_since' not in st.session_state:
|
| 887 |
st.session_state.compare_since = "2025-01-01"
|
| 888 |
-
if '
|
| 889 |
-
st.session_state.
|
| 890 |
|
| 891 |
# Create tabs
|
| 892 |
-
tabs = st.tabs(["π Account Analysis", "π¬ Comment Analysis", "π Compare", "β° Scheduler"])
|
| 893 |
|
| 894 |
# Account Analysis tab
|
| 895 |
with tabs[0]:
|
|
@@ -899,29 +969,56 @@ with tabs[0]:
|
|
| 899 |
st.write("Analyze tweets from a specific Twitter account")
|
| 900 |
|
| 901 |
# Input fields in a cleaner layout
|
| 902 |
-
col1, col2, col3 = st.columns([3, 2,
|
| 903 |
with col1:
|
| 904 |
st.session_state.username = st.text_input("Enter Twitter username (without @)",
|
| 905 |
value=st.session_state.username,
|
| 906 |
key="account_username",
|
| 907 |
placeholder="e.g. elonmusk")
|
| 908 |
with col2:
|
| 909 |
-
st.session_state.since = st.date_input("
|
| 910 |
value=datetime.strptime(st.session_state.since, "%Y-%m-%d")
|
| 911 |
if isinstance(st.session_state.since, str)
|
| 912 |
else st.session_state.since,
|
| 913 |
key="account_since")
|
| 914 |
with col3:
|
| 915 |
-
st.session_state.
|
| 916 |
-
|
| 917 |
-
|
| 918 |
-
|
| 919 |
-
|
| 920 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 921 |
|
| 922 |
-
# Convert
|
| 923 |
if not isinstance(st.session_state.since, str):
|
| 924 |
st.session_state.since = st.session_state.since.strftime("%Y-%m-%d")
|
|
|
|
|
|
|
| 925 |
|
| 926 |
# Run button
|
| 927 |
run_button = st.button("π Analyze Account Tweets", key="run_account", use_container_width=True)
|
|
@@ -929,33 +1026,32 @@ with tabs[0]:
|
|
| 929 |
# Run analysis when button is clicked
|
| 930 |
if run_button:
|
| 931 |
if st.session_state.username:
|
| 932 |
-
#
|
| 933 |
-
st.session_state.
|
| 934 |
-
"
|
| 935 |
-
|
| 936 |
-
"max_items": st.session_state.max_items
|
| 937 |
-
})
|
| 938 |
-
|
| 939 |
-
# Process initial results to check for mock data
|
| 940 |
-
processed_df, _, mock_data_detected = process_tweet_data(st.session_state.results)
|
| 941 |
-
|
| 942 |
-
if mock_data_detected:
|
| 943 |
-
st.warning("Mock data detected in the initial response, indicating no specific tweets found with current filters. Retrying without engagement filters...")
|
| 944 |
-
# Retry without engagement filters
|
| 945 |
st.session_state.results, st.session_state.dataset_id = run_apify_account_analysis({
|
| 946 |
-
"username": st.session_state.username,
|
| 947 |
"since": st.session_state.since,
|
| 948 |
-
"
|
| 949 |
-
|
| 950 |
-
|
| 951 |
-
|
| 952 |
-
|
| 953 |
-
|
| 954 |
-
|
| 955 |
-
st.
|
| 956 |
-
|
| 957 |
-
|
| 958 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 959 |
else:
|
| 960 |
st.error("Please enter a Twitter username")
|
| 961 |
|
|
@@ -966,24 +1062,23 @@ with tabs[1]:
|
|
| 966 |
st.write("Analyze comments directed at a specific Twitter account")
|
| 967 |
|
| 968 |
# Input fields in a cleaner layout
|
| 969 |
-
col1, col2, col3 = st.columns([3, 2,
|
| 970 |
with col1:
|
| 971 |
tweet_id = st.text_input("Enter Twitter ID",
|
| 972 |
key="comment_id",
|
| 973 |
placeholder="e.g. YSJaganTrends")
|
| 974 |
with col2:
|
| 975 |
-
comment_since = st.date_input("
|
| 976 |
value=datetime.strptime(st.session_state.since, "%Y-%m-%d")
|
| 977 |
if isinstance(st.session_state.since, str)
|
| 978 |
else st.session_state.since,
|
| 979 |
key="comment_since")
|
| 980 |
with col3:
|
| 981 |
-
|
| 982 |
-
|
| 983 |
-
|
| 984 |
-
|
| 985 |
-
|
| 986 |
-
key="comment_max_items")
|
| 987 |
|
| 988 |
# Run button
|
| 989 |
comment_button = st.button("π Analyze Comments", key="run_comment", use_container_width=True)
|
|
@@ -991,24 +1086,29 @@ with tabs[1]:
|
|
| 991 |
# Run analysis when button is clicked
|
| 992 |
if comment_button:
|
| 993 |
if tweet_id:
|
| 994 |
-
|
| 995 |
-
|
| 996 |
-
"
|
| 997 |
-
|
| 998 |
-
|
| 999 |
-
|
| 1000 |
-
|
| 1001 |
-
|
| 1002 |
-
|
| 1003 |
-
|
| 1004 |
-
|
| 1005 |
-
|
| 1006 |
-
|
| 1007 |
-
|
| 1008 |
-
|
| 1009 |
-
|
| 1010 |
-
|
| 1011 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1012 |
else:
|
| 1013 |
st.error("Please enter a Twitter ID")
|
| 1014 |
|
|
@@ -1036,74 +1136,75 @@ with tabs[2]:
|
|
| 1036 |
)
|
| 1037 |
|
| 1038 |
# Shared settings
|
| 1039 |
-
col1, col2 = st.columns([
|
| 1040 |
with col1:
|
| 1041 |
# Use a different key for the date input to avoid conflicts
|
| 1042 |
compare_since_date = st.date_input(
|
| 1043 |
-
"
|
| 1044 |
value=datetime.strptime(st.session_state.compare_since, "%Y-%m-%d"),
|
| 1045 |
key="compare_since_dateinput"
|
| 1046 |
)
|
| 1047 |
st.session_state.compare_since = compare_since_date.strftime("%Y-%m-%d")
|
| 1048 |
with col2:
|
| 1049 |
-
|
| 1050 |
-
"
|
| 1051 |
-
|
| 1052 |
-
|
| 1053 |
-
value=st.session_state.compare_max_items,
|
| 1054 |
-
step=10,
|
| 1055 |
-
key="compare_max_items_num"
|
| 1056 |
)
|
|
|
|
| 1057 |
|
| 1058 |
compare_button = st.button("βοΈ Compare Accounts", key="run_compare", use_container_width=True)
|
| 1059 |
|
| 1060 |
if compare_button:
|
| 1061 |
if st.session_state.username1 and st.session_state.username2:
|
| 1062 |
-
|
| 1063 |
-
|
| 1064 |
-
|
| 1065 |
-
|
| 1066 |
-
|
| 1067 |
-
|
| 1068 |
-
|
| 1069 |
-
})
|
| 1070 |
-
processed_df, metrics, mock_data = process_tweet_data(results)
|
| 1071 |
-
|
| 1072 |
-
if mock_data:
|
| 1073 |
-
st.warning(f"Mock data for @{username}, retrying without engagement filters...")
|
| 1074 |
results, dataset_id = run_apify_account_analysis({
|
| 1075 |
-
"username": username,
|
| 1076 |
"since": since,
|
| 1077 |
-
"
|
| 1078 |
-
|
| 1079 |
-
|
| 1080 |
-
|
| 1081 |
-
|
| 1082 |
-
|
| 1083 |
-
|
| 1084 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1085 |
|
| 1086 |
-
|
| 1087 |
|
| 1088 |
-
|
| 1089 |
-
|
| 1090 |
-
|
| 1091 |
-
|
| 1092 |
-
|
| 1093 |
-
|
| 1094 |
-
|
| 1095 |
-
|
| 1096 |
-
|
| 1097 |
-
|
| 1098 |
|
| 1099 |
-
|
| 1100 |
-
|
| 1101 |
-
|
| 1102 |
-
|
| 1103 |
-
|
| 1104 |
-
|
| 1105 |
-
|
| 1106 |
-
|
| 1107 |
|
| 1108 |
# Display tweets side by side after the analysis
|
| 1109 |
if not df1.empty or not df2.empty:
|
|
@@ -1129,8 +1230,118 @@ with tabs[2]:
|
|
| 1129 |
else:
|
| 1130 |
st.error("Please enter both Twitter usernames to compare.")
|
| 1131 |
|
| 1132 |
-
#
|
| 1133 |
with tabs[3]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1134 |
st.header("β° Daily Scheduler")
|
| 1135 |
st.write("Configure daily automatic fetching of tweets and storage to MongoDB.")
|
| 1136 |
|
|
@@ -1193,13 +1404,13 @@ with tabs[3]:
|
|
| 1193 |
st.subheader("βοΈ Scheduler Configuration")
|
| 1194 |
usernames_input = st.text_area("Usernames to schedule (one per line)", value="\n".join(existing_users), key="sched_usernames")
|
| 1195 |
|
| 1196 |
-
col1, col2 = st.columns(
|
| 1197 |
with col1:
|
| 1198 |
-
sched_since = st.date_input("
|
| 1199 |
with col2:
|
| 1200 |
-
|
| 1201 |
-
|
| 1202 |
-
|
| 1203 |
|
| 1204 |
# Buttons row
|
| 1205 |
col1, col2 = st.columns(2)
|
|
@@ -1207,23 +1418,28 @@ with tabs[3]:
|
|
| 1207 |
if st.button("βΆοΈ Start Scheduler", key="start_scheduler", use_container_width=True):
|
| 1208 |
usernames = [u.strip() for u in usernames_input.split("\n") if u.strip()]
|
| 1209 |
if usernames:
|
| 1210 |
-
#
|
| 1211 |
-
|
| 1212 |
-
|
| 1213 |
-
|
| 1214 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1215 |
|
| 1216 |
-
|
| 1217 |
-
|
| 1218 |
|
| 1219 |
-
|
| 1220 |
-
|
|
|
|
| 1221 |
|
| 1222 |
-
|
| 1223 |
-
|
| 1224 |
-
|
| 1225 |
-
|
| 1226 |
-
|
| 1227 |
else:
|
| 1228 |
st.error("Please input at least one username.")
|
| 1229 |
|
|
@@ -1231,42 +1447,53 @@ with tabs[3]:
|
|
| 1231 |
if st.button("π Run Now", key="run_now_btn", use_container_width=True, type="secondary"):
|
| 1232 |
usernames = [u.strip() for u in usernames_input.split("\n") if u.strip()]
|
| 1233 |
if usernames:
|
| 1234 |
-
|
| 1235 |
-
|
| 1236 |
-
|
| 1237 |
-
|
| 1238 |
-
|
| 1239 |
-
|
| 1240 |
-
|
| 1241 |
-
|
| 1242 |
-
|
| 1243 |
-
}
|
| 1244 |
-
|
| 1245 |
-
|
| 1246 |
-
|
| 1247 |
-
|
| 1248 |
-
|
| 1249 |
-
|
| 1250 |
-
|
| 1251 |
-
|
| 1252 |
-
|
| 1253 |
-
|
| 1254 |
-
|
| 1255 |
-
|
| 1256 |
-
|
| 1257 |
-
|
| 1258 |
-
|
| 1259 |
-
|
| 1260 |
-
|
| 1261 |
-
|
| 1262 |
-
|
| 1263 |
-
|
| 1264 |
-
|
| 1265 |
-
|
| 1266 |
-
|
| 1267 |
-
|
| 1268 |
-
|
| 1269 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1270 |
else:
|
| 1271 |
st.error("Please input at least one username.")
|
| 1272 |
|
|
@@ -1285,6 +1512,37 @@ with tabs[3]:
|
|
| 1285 |
st.success("Scheduler stopped. All scheduled jobs cleared.")
|
| 1286 |
st.rerun()
|
| 1287 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1288 |
# Show instructions for setting up Gemini
|
| 1289 |
if not GENAI_AVAILABLE or not GEMINI_API_KEY:
|
| 1290 |
st.sidebar.title("Setup Gemini API")
|
|
@@ -1362,6 +1620,108 @@ try:
|
|
| 1362 |
except:
|
| 1363 |
pass
|
| 1364 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1365 |
# Footer with attribution
|
| 1366 |
st.divider()
|
| 1367 |
st.caption("Powered by Apify Twitter Scraper API β’ Created with Streamlit β’ AI Summaries by Google Gemini β’ Times in Indian Standard Time (IST)")
|
|
|
|
| 120 |
except Exception as e:
|
| 121 |
return f"Error generating summary: {str(e)}"
|
| 122 |
|
| 123 |
+
# Function to extract account details from API response
|
| 124 |
+
def extract_account_details(author_data):
|
| 125 |
+
"""Extract comprehensive account details from author data"""
|
| 126 |
+
# If no data provided (None), return empty dict
|
| 127 |
+
if author_data is None:
|
| 128 |
+
return {}
|
| 129 |
+
|
| 130 |
+
# Create account details with defaults for all fields
|
| 131 |
+
account_details = {
|
| 132 |
+
"user_id": author_data.get("id", ""),
|
| 133 |
+
"name": author_data.get("name", ""),
|
| 134 |
+
"username": author_data.get("userName", ""),
|
| 135 |
+
"bio": author_data.get("description", author_data.get("biography", "")),
|
| 136 |
+
"location": author_data.get("location", ""),
|
| 137 |
+
"website": author_data.get("url", ""),
|
| 138 |
+
"followers_count": author_data.get("followersCount", author_data.get("followers_count", author_data.get("followers", 0))),
|
| 139 |
+
"following_count": author_data.get("followingCount", author_data.get("following_count", author_data.get("following", 0))),
|
| 140 |
+
"tweet_count": author_data.get("statusesCount", author_data.get("tweet_count", 0)),
|
| 141 |
+
"listed_count": author_data.get("listedCount", author_data.get("listed_count", 0)),
|
| 142 |
+
"verified": author_data.get("verified", author_data.get("isVerified", author_data.get("isBlueVerified", False))),
|
| 143 |
+
"protected": author_data.get("protected", False),
|
| 144 |
+
"profile_image_url": author_data.get("profileImageUrl", author_data.get("profile_image_url", "")),
|
| 145 |
+
"profile_banner_url": author_data.get("profileBannerUrl", author_data.get("profile_banner_url", "")),
|
| 146 |
+
"created_at": author_data.get("createdAt", author_data.get("created_at", "")),
|
| 147 |
+
"favourites_count": author_data.get("favouritesCount", author_data.get("favourites_count", 0)),
|
| 148 |
+
"media_count": author_data.get("mediaCount", author_data.get("media_count", 0))
|
| 149 |
+
}
|
| 150 |
+
|
| 151 |
+
return account_details
|
| 152 |
+
|
| 153 |
def run_apify_comment_analysis(input):
|
| 154 |
# Prepare the Actor input with exact format for Comment Analysis
|
| 155 |
id = input["id"]
|
| 156 |
since_date = input["since"]
|
| 157 |
+
until_date = input.get("until", datetime.now().strftime("%Y-%m-%d")) # NEW: Add until date
|
|
|
|
|
|
|
|
|
|
| 158 |
|
| 159 |
+
# Use fixed date format as specified in the example - FIXED QUERY PARAMETERS
|
| 160 |
run_input = {
|
| 161 |
"@": id,
|
| 162 |
+
"filter:blue_verified": False,
|
| 163 |
+
"filter:consumer_video": False,
|
| 164 |
+
"filter:has_engagement": False, # Always False to get more comments
|
| 165 |
+
"filter:hashtags": False,
|
| 166 |
+
"filter:images": False,
|
| 167 |
+
"filter:links": False,
|
| 168 |
+
"filter:media": False,
|
| 169 |
+
"filter:mentions": False,
|
| 170 |
+
"filter:native_video": False,
|
| 171 |
+
"filter:nativeretweets": False,
|
| 172 |
+
"filter:news": False,
|
| 173 |
+
"filter:pro_video": False,
|
| 174 |
+
"filter:quote": False,
|
| 175 |
+
"filter:replies": False, # Keep false to get actual comments
|
| 176 |
+
"filter:safe": False,
|
| 177 |
+
"filter:spaces": False,
|
| 178 |
+
"filter:twimg": False,
|
| 179 |
+
"filter:verified": False,
|
| 180 |
+
"filter:videos": False,
|
| 181 |
+
"filter:vine": False,
|
| 182 |
+
"include:nativeretweets": False,
|
| 183 |
+
"lang": "en",
|
| 184 |
+
"since": since_date + "_00:00:00_UTC",
|
| 185 |
+
"to": id,
|
| 186 |
+
"until": until_date + "_23:59:59_UTC",
|
|
|
|
| 187 |
"queryType": "Latest",
|
| 188 |
"min_retweets": 0,
|
| 189 |
"min_faves": 0,
|
|
|
|
| 194 |
}
|
| 195 |
|
| 196 |
# Show loading state
|
| 197 |
+
with st.spinner(f"Fetching comments from {since_date} to {until_date}..."):
|
| 198 |
# Run the Actor and wait for it to finish
|
| 199 |
run = client.actor("CJdippxWmn9uRfooo").call(run_input=run_input)
|
| 200 |
|
| 201 |
+
# Fetch ALL data from the run's dataset (no maxItems limit)
|
| 202 |
data = list(client.dataset(run["defaultDatasetId"]).iterate_items())
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
|
| 204 |
return data, run["defaultDatasetId"]
|
| 205 |
|
| 206 |
+
def run_apify_account_analysis(input, disable_engagement_filters=True):
|
| 207 |
# Prepare the Actor input with exact format for Account Analysis
|
| 208 |
username = input["username"]
|
| 209 |
since_date = input["since"]
|
| 210 |
+
until_date = input.get("until", datetime.now().strftime("%Y-%m-%d")) # NEW: Add until date
|
| 211 |
+
min_faves = input.get("min_faves", 0) # NEW: Configurable engagement
|
| 212 |
+
min_retweets = input.get("min_retweets", 0) # NEW: Configurable engagement
|
| 213 |
+
min_replies = input.get("min_replies", 0) # NEW: Configurable engagement
|
| 214 |
|
| 215 |
+
# Use the exact format provided by the user - IMPROVED QUERY PARAMETERS
|
| 216 |
run_input = {
|
| 217 |
"filter:blue_verified": False,
|
| 218 |
"filter:consumer_video": False,
|
| 219 |
+
"filter:has_engagement": False, # Always False for maximum tweet capture
|
| 220 |
"filter:hashtags": False,
|
| 221 |
"filter:images": False,
|
| 222 |
"filter:links": False,
|
|
|
|
| 237 |
"from": username,
|
| 238 |
"include:nativeretweets": False,
|
| 239 |
"lang": "en",
|
|
|
|
| 240 |
"queryType": "Latest",
|
| 241 |
+
"since": since_date + "_00:00:00_UTC",
|
| 242 |
+
"until": until_date + "_23:59:59_UTC",
|
| 243 |
+
"min_faves": min_faves, # NEW: User configurable, default 0
|
| 244 |
+
"min_retweets": min_retweets, # NEW: User configurable, default 0
|
| 245 |
+
"min_replies": min_replies, # NEW: User configurable, default 0
|
| 246 |
+
"-min_retweets": 0,
|
| 247 |
+
"-min_faves": 0,
|
| 248 |
+
"-min_replies": 0
|
| 249 |
}
|
| 250 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 251 |
# Show loading state
|
| 252 |
+
with st.spinner(f"Fetching tweets from {since_date} to {until_date}..."):
|
| 253 |
# Run the Actor and wait for it to finish
|
| 254 |
run = client.actor("CJdippxWmn9uRfooo").call(run_input=run_input)
|
| 255 |
|
| 256 |
+
# Fetch ALL data from the run's dataset (no maxItems limit)
|
| 257 |
data = list(client.dataset(run["defaultDatasetId"]).iterate_items())
|
|
|
|
|
|
|
|
|
|
|
|
|
| 258 |
|
| 259 |
return data, run["defaultDatasetId"]
|
| 260 |
|
|
|
|
| 295 |
ist_datetime = utc_datetime.astimezone(ist_tz)
|
| 296 |
return ist_datetime
|
| 297 |
|
| 298 |
+
# Function to process tweet data and create dataframe - ENHANCED FOR ACCOUNT DETAILS
|
| 299 |
+
def process_tweet_data(data, extract_account_info=False):
|
| 300 |
processed_data = []
|
| 301 |
all_hashtags = []
|
| 302 |
all_mentions = []
|
| 303 |
mock_data_detected = False
|
| 304 |
mock_data_signature = "From KaitoEasyAPI, a reminder:Our API pricing is based on the volume of data returned."
|
| 305 |
+
account_details = {}
|
| 306 |
|
| 307 |
for item in data:
|
| 308 |
text = item.get("text", "")
|
|
|
|
| 335 |
# Get author info
|
| 336 |
author = item.get("author", {})
|
| 337 |
|
| 338 |
+
# ENHANCED: Extract account details if requested
|
| 339 |
+
if extract_account_info and not account_details and author:
|
| 340 |
+
account_details = extract_account_details(author)
|
| 341 |
+
|
| 342 |
# Check if media exists
|
| 343 |
has_media = False
|
| 344 |
if "extendedEntities" in item and "media" in item["extendedEntities"]:
|
|
|
|
| 398 |
metrics = {
|
| 399 |
"hashtags": all_hashtags,
|
| 400 |
"mentions": all_mentions,
|
| 401 |
+
"account_details": account_details # ADDED: Include account details
|
| 402 |
}
|
| 403 |
|
| 404 |
return df, metrics, mock_data_detected
|
|
|
|
| 407 |
def display_compact_analysis(df, metrics, username, dataset_id):
|
| 408 |
st.subheader(f"@{username}")
|
| 409 |
|
| 410 |
+
# ENHANCED: Display account details if available
|
| 411 |
+
account_details = metrics.get("account_details", {})
|
| 412 |
+
if account_details:
|
| 413 |
+
st.markdown("##### π€ Account Info")
|
| 414 |
+
if account_details.get("followers_count"):
|
| 415 |
+
st.metric("Followers", f"{account_details['followers_count']:,}")
|
| 416 |
+
if account_details.get("following_count"):
|
| 417 |
+
st.metric("Following", f"{account_details['following_count']:,}")
|
| 418 |
+
if account_details.get("verified"):
|
| 419 |
+
st.success("β
Verified")
|
| 420 |
+
|
| 421 |
# Calculate metrics for analysis
|
| 422 |
total_tweets = len(df)
|
| 423 |
total_likes = df["Likes"].sum()
|
|
|
|
| 491 |
|
| 492 |
# Function to analyze and display the tweet data
|
| 493 |
def analyze_and_display_data(data, dataset_id, analysis_type="Account"):
|
| 494 |
+
raw_data = None
|
| 495 |
if not isinstance(data, pd.DataFrame): # If raw data is passed
|
| 496 |
+
# Store raw data for sentiment analysis
|
| 497 |
+
raw_data = data
|
| 498 |
+
# Process the data into a dataframe - ENHANCED: Extract account details
|
| 499 |
+
df, metrics, _ = process_tweet_data(data, extract_account_info=True)
|
| 500 |
else: # If DataFrame is already processed (e.g. after retry)
|
| 501 |
df = data
|
| 502 |
# Recalculate metrics if df might have changed (e.g. if mock data was removed before this call)
|
|
|
|
| 507 |
all_hashtags_retry.extend(row["Hashtags"].split(", "))
|
| 508 |
if pd.notna(row.get("Mentions")) and row["Mentions"]:
|
| 509 |
all_mentions_retry.extend(row["Mentions"].split(", "))
|
| 510 |
+
metrics = {"hashtags": all_hashtags_retry, "mentions": all_mentions_retry, "account_details": {}}
|
| 511 |
|
| 512 |
if not df.empty:
|
| 513 |
# Calculate additional metrics for analysis
|
|
|
|
| 562 |
left_col, right_col = st.columns([1, 1])
|
| 563 |
|
| 564 |
with left_col:
|
| 565 |
+
# ENHANCED: Display account details if available
|
| 566 |
+
account_details = metrics.get("account_details", {})
|
| 567 |
+
if account_details:
|
| 568 |
+
st.subheader("π€ Account Information")
|
| 569 |
+
acc_col1, acc_col2, acc_col3 = st.columns(3)
|
| 570 |
+
with acc_col1:
|
| 571 |
+
if account_details.get("followers_count"):
|
| 572 |
+
st.metric("Followers", f"{account_details['followers_count']:,}")
|
| 573 |
+
if account_details.get("following_count"):
|
| 574 |
+
st.metric("Following", f"{account_details['following_count']:,}")
|
| 575 |
+
with acc_col2:
|
| 576 |
+
if account_details.get("tweet_count"):
|
| 577 |
+
st.metric("Total Tweets (All Time)", f"{account_details['tweet_count']:,}")
|
| 578 |
+
if account_details.get("listed_count"):
|
| 579 |
+
st.metric("Listed Count", f"{account_details['listed_count']:,}")
|
| 580 |
+
with acc_col3:
|
| 581 |
+
if account_details.get("verified"):
|
| 582 |
+
st.success("β
Verified Account")
|
| 583 |
+
if account_details.get("bio"):
|
| 584 |
+
st.write(f"**Bio:** {account_details['bio']}")
|
| 585 |
+
|
| 586 |
+
st.divider()
|
| 587 |
+
|
| 588 |
st.subheader("π Key Metrics")
|
| 589 |
|
| 590 |
# Basic stats
|
|
|
|
| 791 |
# Small divider
|
| 792 |
st.write("---")
|
| 793 |
|
| 794 |
+
# Function to store processed tweets into MongoDB (upsert by tweet ID) - ENHANCED FOR RAW DATA
|
| 795 |
+
def store_to_mongodb(df, analysis_type="Account", ai_summary=None, raw_data=None, account_details=None):
|
| 796 |
if df.empty:
|
| 797 |
return
|
| 798 |
if not MONGODB_AVAILABLE:
|
|
|
|
| 804 |
user_tweets = df[df['Username'] == username]
|
| 805 |
|
| 806 |
# Calculate aggregated metrics (convert to native Python types for MongoDB)
|
| 807 |
+
# Handle missing columns gracefully
|
| 808 |
total_tweets = int(len(user_tweets))
|
| 809 |
+
total_likes = int(user_tweets.get("Likes", pd.Series([0])).sum()) if "Likes" in user_tweets.columns else 0
|
| 810 |
+
total_retweets = int(user_tweets.get("Retweets", pd.Series([0])).sum()) if "Retweets" in user_tweets.columns else 0
|
| 811 |
+
total_replies = int(user_tweets.get("Replies", pd.Series([0])).sum()) if "Replies" in user_tweets.columns else 0
|
| 812 |
+
total_bookmarks = int(user_tweets.get("Bookmarks", pd.Series([0])).sum()) if "Bookmarks" in user_tweets.columns else 0
|
| 813 |
+
total_views = int(user_tweets.get("Views", pd.Series([0])).sum()) if "Views" in user_tweets.columns else 0
|
| 814 |
total_engagement = total_likes + total_retweets + total_replies + total_bookmarks
|
| 815 |
avg_engagement = float(total_engagement / total_tweets) if total_tweets > 0 else 0.0
|
| 816 |
|
| 817 |
# Get all tweets as a list
|
| 818 |
tweets_list = user_tweets.to_dict("records")
|
| 819 |
|
| 820 |
+
# ENHANCED: Create account document with raw data and account details
|
| 821 |
account_doc = {
|
| 822 |
"username": username,
|
| 823 |
"analysis_type": analysis_type,
|
|
|
|
| 831 |
"total_engagement": total_engagement,
|
| 832 |
"avg_engagement_per_tweet": avg_engagement,
|
| 833 |
"tweets": tweets_list,
|
| 834 |
+
"ai_summary": ai_summary,
|
| 835 |
+
"raw_tweets": raw_data if raw_data else [], # ADDED: Store raw data for sentiment analysis
|
| 836 |
+
"account_details": account_details if account_details else {} # ADDED: Store account details
|
| 837 |
}
|
| 838 |
|
| 839 |
# Upsert by username - one document per account
|
|
|
|
| 845 |
|
| 846 |
# --- Scheduler utilities ---
|
| 847 |
|
| 848 |
+
def fetch_and_store(username, since, until):
|
| 849 |
"""Helper to fetch tweets for a username and store them in MongoDB."""
|
| 850 |
try:
|
| 851 |
results, _ = run_apify_account_analysis({
|
| 852 |
"username": username,
|
| 853 |
"since": since,
|
| 854 |
+
"until": until,
|
| 855 |
+
"min_faves": 0,
|
| 856 |
+
"min_retweets": 0,
|
| 857 |
+
"min_replies": 0
|
| 858 |
+
})
|
| 859 |
+
df, metrics, _ = process_tweet_data(results, extract_account_info=True)
|
| 860 |
|
| 861 |
# Generate AI summary if available
|
| 862 |
ai_summary = None
|
|
|
|
| 867 |
except Exception as e:
|
| 868 |
print(f"AI summary generation failed for @{username}: {e}")
|
| 869 |
|
| 870 |
+
# ENHANCED: Store with raw data and account details
|
| 871 |
+
account_details = metrics.get("account_details", {})
|
| 872 |
+
store_to_mongodb(df, "Account", ai_summary, raw_data=results, account_details=account_details)
|
| 873 |
except Exception as e:
|
| 874 |
print(f"Scheduler error fetching @{username}: {e}")
|
| 875 |
|
| 876 |
|
| 877 |
+
def schedule_fetch(usernames, since, until):
|
| 878 |
for user in usernames:
|
| 879 |
+
fetch_and_store(user, since, until)
|
| 880 |
|
| 881 |
|
| 882 |
def _run_schedule_loop():
|
|
|
|
| 933 |
st.session_state.id = ""
|
| 934 |
if 'since' not in st.session_state:
|
| 935 |
st.session_state.since = "2025-01-01"
|
| 936 |
+
if 'until' not in st.session_state:
|
| 937 |
+
st.session_state.until = datetime.now().strftime("%Y-%m-%d")
|
| 938 |
+
if 'min_faves' not in st.session_state:
|
| 939 |
+
st.session_state.min_faves = 0
|
| 940 |
+
if 'min_retweets' not in st.session_state:
|
| 941 |
+
st.session_state.min_retweets = 0
|
| 942 |
+
if 'min_replies' not in st.session_state:
|
| 943 |
+
st.session_state.min_replies = 0
|
| 944 |
if 'results' not in st.session_state:
|
| 945 |
st.session_state.results = None
|
| 946 |
if 'dataset_id' not in st.session_state:
|
|
|
|
| 955 |
st.session_state.username2 = ""
|
| 956 |
if 'compare_since' not in st.session_state:
|
| 957 |
st.session_state.compare_since = "2025-01-01"
|
| 958 |
+
if 'compare_until' not in st.session_state:
|
| 959 |
+
st.session_state.compare_until = datetime.now().strftime("%Y-%m-%d")
|
| 960 |
|
| 961 |
# Create tabs
|
| 962 |
+
tabs = st.tabs(["π Account Analysis", "π¬ Comment Analysis", "π Compare", "π₯ Followers", "β° Scheduler"])
|
| 963 |
|
| 964 |
# Account Analysis tab
|
| 965 |
with tabs[0]:
|
|
|
|
| 969 |
st.write("Analyze tweets from a specific Twitter account")
|
| 970 |
|
| 971 |
# Input fields in a cleaner layout
|
| 972 |
+
col1, col2, col3 = st.columns([3, 2, 2])
|
| 973 |
with col1:
|
| 974 |
st.session_state.username = st.text_input("Enter Twitter username (without @)",
|
| 975 |
value=st.session_state.username,
|
| 976 |
key="account_username",
|
| 977 |
placeholder="e.g. elonmusk")
|
| 978 |
with col2:
|
| 979 |
+
st.session_state.since = st.date_input("Start date",
|
| 980 |
value=datetime.strptime(st.session_state.since, "%Y-%m-%d")
|
| 981 |
if isinstance(st.session_state.since, str)
|
| 982 |
else st.session_state.since,
|
| 983 |
key="account_since")
|
| 984 |
with col3:
|
| 985 |
+
st.session_state.until = st.date_input("End date",
|
| 986 |
+
value=datetime.strptime(st.session_state.until, "%Y-%m-%d")
|
| 987 |
+
if isinstance(st.session_state.until, str)
|
| 988 |
+
else st.session_state.until,
|
| 989 |
+
key="account_until")
|
| 990 |
+
|
| 991 |
+
# Optional engagement filters
|
| 992 |
+
with st.expander("βοΈ Advanced Filters (Optional)", expanded=False):
|
| 993 |
+
st.info("All filters are set to 0 by default to capture maximum tweets. Increase values to filter for more engaging content.")
|
| 994 |
+
col1, col2, col3 = st.columns(3)
|
| 995 |
+
with col1:
|
| 996 |
+
st.session_state.min_faves = st.number_input("Minimum Likes",
|
| 997 |
+
min_value=0,
|
| 998 |
+
max_value=10000,
|
| 999 |
+
value=st.session_state.min_faves,
|
| 1000 |
+
step=10,
|
| 1001 |
+
key="account_min_faves")
|
| 1002 |
+
with col2:
|
| 1003 |
+
st.session_state.min_retweets = st.number_input("Minimum Retweets",
|
| 1004 |
+
min_value=0,
|
| 1005 |
+
max_value=1000,
|
| 1006 |
+
value=st.session_state.min_retweets,
|
| 1007 |
+
step=5,
|
| 1008 |
+
key="account_min_retweets")
|
| 1009 |
+
with col3:
|
| 1010 |
+
st.session_state.min_replies = st.number_input("Minimum Replies",
|
| 1011 |
+
min_value=0,
|
| 1012 |
+
max_value=1000,
|
| 1013 |
+
value=st.session_state.min_replies,
|
| 1014 |
+
step=5,
|
| 1015 |
+
key="account_min_replies")
|
| 1016 |
|
| 1017 |
+
# Convert dates to string format
|
| 1018 |
if not isinstance(st.session_state.since, str):
|
| 1019 |
st.session_state.since = st.session_state.since.strftime("%Y-%m-%d")
|
| 1020 |
+
if not isinstance(st.session_state.until, str):
|
| 1021 |
+
st.session_state.until = st.session_state.until.strftime("%Y-%m-%d")
|
| 1022 |
|
| 1023 |
# Run button
|
| 1024 |
run_button = st.button("π Analyze Account Tweets", key="run_account", use_container_width=True)
|
|
|
|
| 1026 |
# Run analysis when button is clicked
|
| 1027 |
if run_button:
|
| 1028 |
if st.session_state.username:
|
| 1029 |
+
# Validate date range
|
| 1030 |
+
if st.session_state.since > st.session_state.until:
|
| 1031 |
+
st.error("Start date must be before end date.")
|
| 1032 |
+
else:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1033 |
st.session_state.results, st.session_state.dataset_id = run_apify_account_analysis({
|
| 1034 |
+
"username": st.session_state.username,
|
| 1035 |
"since": st.session_state.since,
|
| 1036 |
+
"until": st.session_state.until,
|
| 1037 |
+
"min_faves": st.session_state.min_faves,
|
| 1038 |
+
"min_retweets": st.session_state.min_retweets,
|
| 1039 |
+
"min_replies": st.session_state.min_replies
|
| 1040 |
+
})
|
| 1041 |
+
|
| 1042 |
+
# Process results to check for mock data
|
| 1043 |
+
processed_df, _, mock_data_detected = process_tweet_data(st.session_state.results, extract_account_info=True)
|
| 1044 |
+
|
| 1045 |
+
if mock_data_detected:
|
| 1046 |
+
st.warning("Mock data detected in the response, indicating limited results. This may be due to strict filters or no tweets in the date range.")
|
| 1047 |
+
|
| 1048 |
+
if not processed_df.empty:
|
| 1049 |
+
date_range = f"{st.session_state.since} to {st.session_state.until}"
|
| 1050 |
+
st.success(f"Analysis complete! Found {len(processed_df)} tweets from {date_range}.")
|
| 1051 |
+
st.balloons()
|
| 1052 |
+
analyze_and_display_data(processed_df, st.session_state.dataset_id, "Account")
|
| 1053 |
+
else:
|
| 1054 |
+
st.warning("No results found. Try a different date range or reduce the engagement filters.")
|
| 1055 |
else:
|
| 1056 |
st.error("Please enter a Twitter username")
|
| 1057 |
|
|
|
|
| 1062 |
st.write("Analyze comments directed at a specific Twitter account")
|
| 1063 |
|
| 1064 |
# Input fields in a cleaner layout
|
| 1065 |
+
col1, col2, col3 = st.columns([3, 2, 2])
|
| 1066 |
with col1:
|
| 1067 |
tweet_id = st.text_input("Enter Twitter ID",
|
| 1068 |
key="comment_id",
|
| 1069 |
placeholder="e.g. YSJaganTrends")
|
| 1070 |
with col2:
|
| 1071 |
+
comment_since = st.date_input("Start date",
|
| 1072 |
value=datetime.strptime(st.session_state.since, "%Y-%m-%d")
|
| 1073 |
if isinstance(st.session_state.since, str)
|
| 1074 |
else st.session_state.since,
|
| 1075 |
key="comment_since")
|
| 1076 |
with col3:
|
| 1077 |
+
comment_until = st.date_input("End date",
|
| 1078 |
+
value=datetime.strptime(st.session_state.until, "%Y-%m-%d")
|
| 1079 |
+
if isinstance(st.session_state.until, str)
|
| 1080 |
+
else st.session_state.until,
|
| 1081 |
+
key="comment_until")
|
|
|
|
| 1082 |
|
| 1083 |
# Run button
|
| 1084 |
comment_button = st.button("π Analyze Comments", key="run_comment", use_container_width=True)
|
|
|
|
| 1086 |
# Run analysis when button is clicked
|
| 1087 |
if comment_button:
|
| 1088 |
if tweet_id:
|
| 1089 |
+
# Validate date range
|
| 1090 |
+
if comment_since > comment_until:
|
| 1091 |
+
st.error("Start date must be before end date.")
|
| 1092 |
+
else:
|
| 1093 |
+
raw_results, dataset_id = run_apify_comment_analysis({
|
| 1094 |
+
"id": tweet_id,
|
| 1095 |
+
"since": comment_since.strftime("%Y-%m-%d"),
|
| 1096 |
+
"until": comment_until.strftime("%Y-%m-%d")
|
| 1097 |
+
})
|
| 1098 |
+
|
| 1099 |
+
# Process data to remove mock tweets and get the actual count
|
| 1100 |
+
processed_df, _, mock_data_detected = process_tweet_data(raw_results)
|
| 1101 |
+
|
| 1102 |
+
if not processed_df.empty:
|
| 1103 |
+
date_range = f"{comment_since.strftime('%Y-%m-%d')} to {comment_until.strftime('%Y-%m-%d')}"
|
| 1104 |
+
st.success(f"Analysis complete! Found {len(processed_df)} actual comments from {date_range}.")
|
| 1105 |
+
st.balloons()
|
| 1106 |
+
# Display the results using the processed DataFrame
|
| 1107 |
+
analyze_and_display_data(processed_df, dataset_id, "Comment")
|
| 1108 |
+
elif mock_data_detected and processed_df.empty:
|
| 1109 |
+
st.warning("Mock data was returned by the API, indicating no specific comments were found for your query. Please try adjusting your date range.")
|
| 1110 |
+
else: # No mock data, but still empty (or raw_results was empty)
|
| 1111 |
+
st.warning("No results found. Try a different query or date range.")
|
| 1112 |
else:
|
| 1113 |
st.error("Please enter a Twitter ID")
|
| 1114 |
|
|
|
|
| 1136 |
)
|
| 1137 |
|
| 1138 |
# Shared settings
|
| 1139 |
+
col1, col2 = st.columns([1, 1])
|
| 1140 |
with col1:
|
| 1141 |
# Use a different key for the date input to avoid conflicts
|
| 1142 |
compare_since_date = st.date_input(
|
| 1143 |
+
"Start date",
|
| 1144 |
value=datetime.strptime(st.session_state.compare_since, "%Y-%m-%d"),
|
| 1145 |
key="compare_since_dateinput"
|
| 1146 |
)
|
| 1147 |
st.session_state.compare_since = compare_since_date.strftime("%Y-%m-%d")
|
| 1148 |
with col2:
|
| 1149 |
+
compare_until_date = st.date_input(
|
| 1150 |
+
"End date",
|
| 1151 |
+
value=datetime.strptime(st.session_state.compare_until, "%Y-%m-%d"),
|
| 1152 |
+
key="compare_until_dateinput"
|
|
|
|
|
|
|
|
|
|
| 1153 |
)
|
| 1154 |
+
st.session_state.compare_until = compare_until_date.strftime("%Y-%m-%d")
|
| 1155 |
|
| 1156 |
compare_button = st.button("βοΈ Compare Accounts", key="run_compare", use_container_width=True)
|
| 1157 |
|
| 1158 |
if compare_button:
|
| 1159 |
if st.session_state.username1 and st.session_state.username2:
|
| 1160 |
+
# Validate date range
|
| 1161 |
+
if st.session_state.compare_since > st.session_state.compare_until:
|
| 1162 |
+
st.error("Start date must be before end date.")
|
| 1163 |
+
else:
|
| 1164 |
+
def fetch_and_process_user_data(username, since, until):
|
| 1165 |
+
date_range = f"{since} to {until}"
|
| 1166 |
+
with st.spinner(f"Fetching tweets for @{username} from {date_range}..."):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1167 |
results, dataset_id = run_apify_account_analysis({
|
| 1168 |
+
"username": username,
|
| 1169 |
"since": since,
|
| 1170 |
+
"until": until,
|
| 1171 |
+
"min_faves": 0,
|
| 1172 |
+
"min_retweets": 0,
|
| 1173 |
+
"min_replies": 0
|
| 1174 |
+
})
|
| 1175 |
+
processed_df, metrics, mock_data = process_tweet_data(results, extract_account_info=True)
|
| 1176 |
+
|
| 1177 |
+
if mock_data:
|
| 1178 |
+
st.warning(f"Mock data detected for @{username}, indicating limited results in the date range.")
|
| 1179 |
+
|
| 1180 |
+
if not processed_df.empty:
|
| 1181 |
+
account_details = metrics.get("account_details", {})
|
| 1182 |
+
followers_info = f" | {account_details.get('followers_count', 'N/A')} followers" if account_details.get('followers_count') else ""
|
| 1183 |
+
st.success(f"Found {len(processed_df)} tweets for @{username} from {date_range}{followers_info}.")
|
| 1184 |
+
else:
|
| 1185 |
+
st.warning(f"No results for @{username} in the specified date range.")
|
| 1186 |
|
| 1187 |
+
return processed_df, metrics, dataset_id
|
| 1188 |
|
| 1189 |
+
col1, col2 = st.columns(2)
|
| 1190 |
+
|
| 1191 |
+
with col1:
|
| 1192 |
+
df1, metrics1, dsid1 = fetch_and_process_user_data(
|
| 1193 |
+
st.session_state.username1,
|
| 1194 |
+
st.session_state.compare_since,
|
| 1195 |
+
st.session_state.compare_until
|
| 1196 |
+
)
|
| 1197 |
+
if not df1.empty:
|
| 1198 |
+
display_compact_analysis(df1, metrics1, st.session_state.username1, dsid1)
|
| 1199 |
|
| 1200 |
+
with col2:
|
| 1201 |
+
df2, metrics2, dsid2 = fetch_and_process_user_data(
|
| 1202 |
+
st.session_state.username2,
|
| 1203 |
+
st.session_state.compare_since,
|
| 1204 |
+
st.session_state.compare_until
|
| 1205 |
+
)
|
| 1206 |
+
if not df2.empty:
|
| 1207 |
+
display_compact_analysis(df2, metrics2, st.session_state.username2, dsid2)
|
| 1208 |
|
| 1209 |
# Display tweets side by side after the analysis
|
| 1210 |
if not df1.empty or not df2.empty:
|
|
|
|
| 1230 |
else:
|
| 1231 |
st.error("Please enter both Twitter usernames to compare.")
|
| 1232 |
|
| 1233 |
+
# Followers tab
|
| 1234 |
with tabs[3]:
|
| 1235 |
+
st.header("π₯ Followers & Following Analysis")
|
| 1236 |
+
st.write("Analyze followers and following lists for any Twitter account")
|
| 1237 |
+
|
| 1238 |
+
# Input fields
|
| 1239 |
+
col1, col2, col3 = st.columns([3, 2, 2])
|
| 1240 |
+
with col1:
|
| 1241 |
+
followers_username = st.text_input(
|
| 1242 |
+
"Enter Twitter username (without @)",
|
| 1243 |
+
key="followers_username",
|
| 1244 |
+
placeholder="e.g. JanaSenaParty"
|
| 1245 |
+
)
|
| 1246 |
+
with col2:
|
| 1247 |
+
relationship_type = st.selectbox(
|
| 1248 |
+
"Analysis Type",
|
| 1249 |
+
["followers", "following"],
|
| 1250 |
+
key="relationship_type"
|
| 1251 |
+
)
|
| 1252 |
+
with col3:
|
| 1253 |
+
max_users = st.number_input(
|
| 1254 |
+
"Max Users to Fetch",
|
| 1255 |
+
min_value=10,
|
| 1256 |
+
max_value=1000,
|
| 1257 |
+
value=100,
|
| 1258 |
+
step=10,
|
| 1259 |
+
key="max_followers"
|
| 1260 |
+
)
|
| 1261 |
+
|
| 1262 |
+
# Analyze button
|
| 1263 |
+
followers_button = st.button("π₯ Analyze Followers/Following", key="run_followers", use_container_width=True)
|
| 1264 |
+
|
| 1265 |
+
if followers_button:
|
| 1266 |
+
if followers_username:
|
| 1267 |
+
try:
|
| 1268 |
+
# Fetch followers/following data
|
| 1269 |
+
data, dataset_id = run_apify_followers_analysis({
|
| 1270 |
+
"username": followers_username,
|
| 1271 |
+
"relationship_type": relationship_type,
|
| 1272 |
+
"max_items": max_users
|
| 1273 |
+
})
|
| 1274 |
+
|
| 1275 |
+
if data:
|
| 1276 |
+
# Process the data
|
| 1277 |
+
df = process_followers_data(data, relationship_type)
|
| 1278 |
+
|
| 1279 |
+
if not df.empty:
|
| 1280 |
+
st.success(f"Found {len(df)} {relationship_type} for @{followers_username}")
|
| 1281 |
+
st.balloons()
|
| 1282 |
+
|
| 1283 |
+
# Display statistics
|
| 1284 |
+
col1, col2, col3, col4 = st.columns(4)
|
| 1285 |
+
with col1:
|
| 1286 |
+
st.metric("Total Users", len(df))
|
| 1287 |
+
with col2:
|
| 1288 |
+
verified_count = df['Verified'].sum()
|
| 1289 |
+
st.metric("Verified Users", verified_count)
|
| 1290 |
+
with col3:
|
| 1291 |
+
avg_followers = df['Followers'].mean()
|
| 1292 |
+
st.metric("Avg Followers", f"{avg_followers:,.0f}")
|
| 1293 |
+
with col4:
|
| 1294 |
+
avg_following = df['Following'].mean()
|
| 1295 |
+
st.metric("Avg Following", f"{avg_following:,.0f}")
|
| 1296 |
+
|
| 1297 |
+
# Show top users by followers
|
| 1298 |
+
st.subheader(f"π Top {relationship_type.title()} by Followers")
|
| 1299 |
+
top_users = df.nlargest(10, 'Followers')[['Username', 'Name', 'Followers', 'Following', 'Verified']]
|
| 1300 |
+
st.dataframe(top_users, use_container_width=True)
|
| 1301 |
+
|
| 1302 |
+
# Download CSV
|
| 1303 |
+
csv = df.to_csv(index=False).encode('utf-8')
|
| 1304 |
+
st.download_button(
|
| 1305 |
+
f"π₯ Download {relationship_type.title()} CSV",
|
| 1306 |
+
csv,
|
| 1307 |
+
f"{followers_username}_{relationship_type}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
|
| 1308 |
+
"text/csv",
|
| 1309 |
+
key=f"download-{relationship_type}",
|
| 1310 |
+
use_container_width=True
|
| 1311 |
+
)
|
| 1312 |
+
|
| 1313 |
+
# Show detailed list
|
| 1314 |
+
st.subheader(f"π All {relationship_type.title()}")
|
| 1315 |
+
with st.expander(f"View all {len(df)} {relationship_type}"):
|
| 1316 |
+
for _, user in df.iterrows():
|
| 1317 |
+
with st.container():
|
| 1318 |
+
col1, col2 = st.columns([1, 4])
|
| 1319 |
+
with col1:
|
| 1320 |
+
if user['Profile_Image']:
|
| 1321 |
+
st.image(user['Profile_Image'], width=50)
|
| 1322 |
+
with col2:
|
| 1323 |
+
verified_badge = " β
" if user['Verified'] else ""
|
| 1324 |
+
st.write(f"**@{user['Username']}** ({user['Name']}){verified_badge}")
|
| 1325 |
+
st.caption(f"π₯ {user['Followers']:,} followers β’ {user['Following']:,} following")
|
| 1326 |
+
if user['Bio']:
|
| 1327 |
+
st.caption(f"π {user['Bio'][:100]}..." if len(user['Bio']) > 100 else user['Bio'])
|
| 1328 |
+
st.divider()
|
| 1329 |
+
else:
|
| 1330 |
+
st.warning(f"No {relationship_type} data found for @{followers_username}")
|
| 1331 |
+
else:
|
| 1332 |
+
st.warning(f"No {relationship_type} data could be retrieved for @{followers_username}")
|
| 1333 |
+
st.info("This may be due to:")
|
| 1334 |
+
st.write("β’ Account being private/protected")
|
| 1335 |
+
st.write("β’ Account having no followers/following")
|
| 1336 |
+
st.write("β’ API limitations or temporary issues")
|
| 1337 |
+
|
| 1338 |
+
except Exception as e:
|
| 1339 |
+
st.error(f"Error fetching {relationship_type}: {str(e)}")
|
| 1340 |
+
else:
|
| 1341 |
+
st.error("Please enter a Twitter username")
|
| 1342 |
+
|
| 1343 |
+
# Scheduler tab
|
| 1344 |
+
with tabs[4]:
|
| 1345 |
st.header("β° Daily Scheduler")
|
| 1346 |
st.write("Configure daily automatic fetching of tweets and storage to MongoDB.")
|
| 1347 |
|
|
|
|
| 1404 |
st.subheader("βοΈ Scheduler Configuration")
|
| 1405 |
usernames_input = st.text_area("Usernames to schedule (one per line)", value="\n".join(existing_users), key="sched_usernames")
|
| 1406 |
|
| 1407 |
+
col1, col2, col3 = st.columns(3)
|
| 1408 |
with col1:
|
| 1409 |
+
sched_since = st.date_input("Start date", value=(datetime.now() - timedelta(days=30)).date(), key="sched_since")
|
| 1410 |
with col2:
|
| 1411 |
+
sched_until = st.date_input("End date", value=datetime.now().date(), key="sched_until")
|
| 1412 |
+
with col3:
|
| 1413 |
+
sched_time = st.time_input("Run at (24h format)", datetime.now().replace(hour=2, minute=0, second=0, microsecond=0).time(), key="sched_time")
|
| 1414 |
|
| 1415 |
# Buttons row
|
| 1416 |
col1, col2 = st.columns(2)
|
|
|
|
| 1418 |
if st.button("βΆοΈ Start Scheduler", key="start_scheduler", use_container_width=True):
|
| 1419 |
usernames = [u.strip() for u in usernames_input.split("\n") if u.strip()]
|
| 1420 |
if usernames:
|
| 1421 |
+
# Validate date range
|
| 1422 |
+
if sched_since > sched_until:
|
| 1423 |
+
st.error("Start date must be before end date.")
|
| 1424 |
+
else:
|
| 1425 |
+
# Save/update usernames in DB
|
| 1426 |
+
save_scheduler_usernames(usernames)
|
| 1427 |
+
|
| 1428 |
+
# Clear existing jobs with tag
|
| 1429 |
+
schedule.clear('tweet_jobs')
|
| 1430 |
|
| 1431 |
+
def scheduled_job():
|
| 1432 |
+
schedule_fetch(usernames, sched_since.strftime("%Y-%m-%d"), sched_until.strftime("%Y-%m-%d"))
|
| 1433 |
|
| 1434 |
+
schedule.every().day.at(sched_time.strftime("%H:%M")).tag('tweet_jobs').do(scheduled_job)
|
| 1435 |
+
date_range = f"{sched_since.strftime('%Y-%m-%d')} to {sched_until.strftime('%Y-%m-%d')}"
|
| 1436 |
+
st.success(f"Scheduler started for {len(usernames)} accounts daily at {sched_time.strftime('%H:%M')} for date range {date_range}.")
|
| 1437 |
|
| 1438 |
+
# Launch scheduler loop thread if not already running
|
| 1439 |
+
if 'scheduler_thread' not in st.session_state:
|
| 1440 |
+
thread = threading.Thread(target=_run_schedule_loop, daemon=True)
|
| 1441 |
+
thread.start()
|
| 1442 |
+
st.session_state.scheduler_thread = thread
|
| 1443 |
else:
|
| 1444 |
st.error("Please input at least one username.")
|
| 1445 |
|
|
|
|
| 1447 |
if st.button("π Run Now", key="run_now_btn", use_container_width=True, type="secondary"):
|
| 1448 |
usernames = [u.strip() for u in usernames_input.split("\n") if u.strip()]
|
| 1449 |
if usernames:
|
| 1450 |
+
# Validate date range
|
| 1451 |
+
if sched_since > sched_until:
|
| 1452 |
+
st.error("Start date must be before end date.")
|
| 1453 |
+
else:
|
| 1454 |
+
date_range = f"{sched_since.strftime('%Y-%m-%d')} to {sched_until.strftime('%Y-%m-%d')}"
|
| 1455 |
+
with st.spinner(f"Scraping tweets for {len(usernames)} accounts from {date_range}..."):
|
| 1456 |
+
try:
|
| 1457 |
+
total_tweets = 0
|
| 1458 |
+
for username in usernames:
|
| 1459 |
+
with st.spinner(f"Scraping @{username} from {date_range}..."):
|
| 1460 |
+
results, _ = run_apify_account_analysis({
|
| 1461 |
+
"username": username,
|
| 1462 |
+
"since": sched_since.strftime("%Y-%m-%d"),
|
| 1463 |
+
"until": sched_until.strftime("%Y-%m-%d"),
|
| 1464 |
+
"min_faves": 0,
|
| 1465 |
+
"min_retweets": 0,
|
| 1466 |
+
"min_replies": 0
|
| 1467 |
+
})
|
| 1468 |
+
df, metrics, _ = process_tweet_data(results, extract_account_info=True)
|
| 1469 |
+
if not df.empty:
|
| 1470 |
+
# Generate AI summary
|
| 1471 |
+
ai_summary = None
|
| 1472 |
+
if GENAI_AVAILABLE and GEMINI_API_KEY:
|
| 1473 |
+
with st.spinner(f"Generating AI summary for @{username}..."):
|
| 1474 |
+
try:
|
| 1475 |
+
context = f"The following are account tweets for Twitter account @{username}"
|
| 1476 |
+
ai_summary = get_gemini_summary(df, context)
|
| 1477 |
+
except Exception as e:
|
| 1478 |
+
st.warning(f"AI summary generation failed for @{username}: {e}")
|
| 1479 |
+
|
| 1480 |
+
# ENHANCED: Store with raw data and account details
|
| 1481 |
+
account_details = metrics.get("account_details", {})
|
| 1482 |
+
store_to_mongodb(df, "Account", ai_summary, raw_data=results, account_details=account_details)
|
| 1483 |
+
total_tweets += len(df)
|
| 1484 |
+
summary_status = " (with AI summary)" if ai_summary else ""
|
| 1485 |
+
account_info = f" | Followers: {account_details.get('followers_count', 'N/A')}" if account_details.get('followers_count') else ""
|
| 1486 |
+
st.success(f"β
@{username}: {len(df)} tweets scraped and stored from {date_range}{summary_status}{account_info}")
|
| 1487 |
+
else:
|
| 1488 |
+
st.warning(f"β οΈ @{username}: No tweets found in the specified date range")
|
| 1489 |
+
|
| 1490 |
+
if total_tweets > 0:
|
| 1491 |
+
st.success(f"π Successfully scraped and stored {total_tweets} tweets from {len(usernames)} accounts in date range {date_range}!")
|
| 1492 |
+
st.info("Data has been stored in your MongoDB DataCollector database.")
|
| 1493 |
+
else:
|
| 1494 |
+
st.warning("No tweets were found for any of the accounts in the specified date range.")
|
| 1495 |
+
except Exception as e:
|
| 1496 |
+
st.error(f"β Error during scraping: {str(e)}")
|
| 1497 |
else:
|
| 1498 |
st.error("Please input at least one username.")
|
| 1499 |
|
|
|
|
| 1512 |
st.success("Scheduler stopped. All scheduled jobs cleared.")
|
| 1513 |
st.rerun()
|
| 1514 |
|
| 1515 |
+
# ENHANCED: Show API limitations and setup instructions
|
| 1516 |
+
st.sidebar.title("π API Notes & Features")
|
| 1517 |
+
st.sidebar.info(
|
| 1518 |
+
"""
|
| 1519 |
+
**New Features:**
|
| 1520 |
+
|
| 1521 |
+
β
**Date Range Fetching:** All tweets between start and end dates are fetched (no max limit)
|
| 1522 |
+
|
| 1523 |
+
β
**Account Analysis:** Comprehensive account details shown in all analysis views
|
| 1524 |
+
|
| 1525 |
+
β
**Zero Engagement Filters:** Default engagement filters set to 0 for maximum tweet capture
|
| 1526 |
+
|
| 1527 |
+
βοΈ **Optional Filters:** Users can set custom engagement thresholds if desired
|
| 1528 |
+
|
| 1529 |
+
**Known Limitations:**
|
| 1530 |
+
|
| 1531 |
+
π« **Tweet-level comment replies** are not available due to Twitter API restrictions. Only direct comments to the main account are fetched.
|
| 1532 |
+
|
| 1533 |
+
β οΈ **Tweet count discrepancies** may occur due to:
|
| 1534 |
+
- Private/protected tweets
|
| 1535 |
+
- Deleted tweets
|
| 1536 |
+
- API rate limiting
|
| 1537 |
+
- Account restrictions
|
| 1538 |
+
|
| 1539 |
+
π‘ **Tips for better results:**
|
| 1540 |
+
- Use appropriate date ranges
|
| 1541 |
+
- Keep engagement filters at 0 (default) for maximum capture
|
| 1542 |
+
- Use broader time periods for more comprehensive data
|
| 1543 |
+
"""
|
| 1544 |
+
)
|
| 1545 |
+
|
| 1546 |
# Show instructions for setting up Gemini
|
| 1547 |
if not GENAI_AVAILABLE or not GEMINI_API_KEY:
|
| 1548 |
st.sidebar.title("Setup Gemini API")
|
|
|
|
| 1620 |
except:
|
| 1621 |
pass
|
| 1622 |
|
| 1623 |
+
def run_apify_followers_analysis(input):
|
| 1624 |
+
"""
|
| 1625 |
+
Fetch followers/following data using Apify actor
|
| 1626 |
+
This is a placeholder for when the followers actor works
|
| 1627 |
+
"""
|
| 1628 |
+
username = input["username"]
|
| 1629 |
+
relationship_type = input.get("relationship_type", "followers") # "followers" or "following"
|
| 1630 |
+
max_items = input.get("max_items", 100)
|
| 1631 |
+
|
| 1632 |
+
# Try the followers actor first
|
| 1633 |
+
try:
|
| 1634 |
+
if relationship_type == "followers":
|
| 1635 |
+
run_input = {
|
| 1636 |
+
"twitterHandles": [username],
|
| 1637 |
+
"maxItems": max_items,
|
| 1638 |
+
"getFollowers": True,
|
| 1639 |
+
"getFollowing": False,
|
| 1640 |
+
"getRetweeters": False,
|
| 1641 |
+
"includeUnavailableUsers": False,
|
| 1642 |
+
}
|
| 1643 |
+
else: # following
|
| 1644 |
+
run_input = {
|
| 1645 |
+
"twitterHandles": [username],
|
| 1646 |
+
"maxItems": max_items,
|
| 1647 |
+
"getFollowers": False,
|
| 1648 |
+
"getFollowing": True,
|
| 1649 |
+
"getRetweeters": False,
|
| 1650 |
+
"includeUnavailableUsers": False,
|
| 1651 |
+
}
|
| 1652 |
+
|
| 1653 |
+
with st.spinner(f"Fetching {relationship_type} for @{username}..."):
|
| 1654 |
+
# Try the actor you specified
|
| 1655 |
+
run = client.actor("V38PZzpEgOfeeWvZY").call(run_input=run_input)
|
| 1656 |
+
data = list(client.dataset(run["defaultDatasetId"]).iterate_items())
|
| 1657 |
+
|
| 1658 |
+
if data:
|
| 1659 |
+
return data, run["defaultDatasetId"]
|
| 1660 |
+
else:
|
| 1661 |
+
# Fallback: Use alternative followers scraper
|
| 1662 |
+
return run_apify_followers_fallback(input)
|
| 1663 |
+
|
| 1664 |
+
except Exception as e:
|
| 1665 |
+
st.warning(f"Primary followers actor failed: {e}")
|
| 1666 |
+
# Fallback to alternative scraper
|
| 1667 |
+
return run_apify_followers_fallback(input)
|
| 1668 |
+
|
| 1669 |
+
def run_apify_followers_fallback(input):
|
| 1670 |
+
"""
|
| 1671 |
+
Fallback method using alternative followers scraper
|
| 1672 |
+
"""
|
| 1673 |
+
username = input["username"]
|
| 1674 |
+
relationship_type = input.get("relationship_type", "followers")
|
| 1675 |
+
max_items = input.get("max_items", 100)
|
| 1676 |
+
|
| 1677 |
+
try:
|
| 1678 |
+
# Use curious_coder/twitter-scraper as fallback
|
| 1679 |
+
run_input = {
|
| 1680 |
+
"profileUrl": f"https://twitter.com/{username}",
|
| 1681 |
+
"friendshipType": relationship_type, # "followers" or "following"
|
| 1682 |
+
"count": max_items,
|
| 1683 |
+
"minDelay": 1,
|
| 1684 |
+
"maxDelay": 3
|
| 1685 |
+
}
|
| 1686 |
+
|
| 1687 |
+
with st.spinner(f"Fetching {relationship_type} for @{username} (fallback method)..."):
|
| 1688 |
+
run = client.actor("curious_coder/twitter-scraper").call(run_input=run_input)
|
| 1689 |
+
data = list(client.dataset(run["defaultDatasetId"]).iterate_items())
|
| 1690 |
+
return data, run["defaultDatasetId"]
|
| 1691 |
+
|
| 1692 |
+
except Exception as e:
|
| 1693 |
+
st.error(f"All followers scrapers failed: {e}")
|
| 1694 |
+
return [], None
|
| 1695 |
+
|
| 1696 |
+
def process_followers_data(data, relationship_type="followers"):
|
| 1697 |
+
"""
|
| 1698 |
+
Process followers/following data into a structured format
|
| 1699 |
+
"""
|
| 1700 |
+
processed_data = []
|
| 1701 |
+
|
| 1702 |
+
for item in data:
|
| 1703 |
+
# Handle different data structures from different actors
|
| 1704 |
+
username = item.get('username', item.get('screen_name', item.get('userName', '')))
|
| 1705 |
+
name = item.get('name', item.get('displayName', ''))
|
| 1706 |
+
|
| 1707 |
+
processed_item = {
|
| 1708 |
+
"Username": username,
|
| 1709 |
+
"Name": name,
|
| 1710 |
+
"Bio": item.get('description', item.get('bio', '')),
|
| 1711 |
+
"Location": item.get('location', ''),
|
| 1712 |
+
"Followers": item.get('followers_count', item.get('followersCount', item.get('followers', 0))),
|
| 1713 |
+
"Following": item.get('following_count', item.get('followingCount', item.get('following', 0))),
|
| 1714 |
+
"Tweets": item.get('tweet_count', item.get('statusesCount', item.get('statuses_count', 0))),
|
| 1715 |
+
"Verified": item.get('verified', item.get('isVerified', False)),
|
| 1716 |
+
"Profile_Image": item.get('profile_image_url', item.get('profileImageUrl', '')),
|
| 1717 |
+
"Created_At": item.get('created_at', item.get('createdAt', '')),
|
| 1718 |
+
"URL": item.get('url', f"https://twitter.com/{username}"),
|
| 1719 |
+
"Relationship_Type": relationship_type
|
| 1720 |
+
}
|
| 1721 |
+
processed_data.append(processed_item)
|
| 1722 |
+
|
| 1723 |
+
return pd.DataFrame(processed_data)
|
| 1724 |
+
|
| 1725 |
# Footer with attribution
|
| 1726 |
st.divider()
|
| 1727 |
st.caption("Powered by Apify Twitter Scraper API β’ Created with Streamlit β’ AI Summaries by Google Gemini β’ Times in Indian Standard Time (IST)")
|