Spaces:

poemsforaphrodite
/

scraper

Paused

App Files Files Community

poemsforaphrodite commited on Jun 26, 2025

Commit

bbfa3ce

verified ·

1 Parent(s): fd8c6cf

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +614 -254

src/streamlit_app.py CHANGED Viewed

@@ -120,44 +120,70 @@ def get_gemini_summary(tweets_data, context=""):
     except Exception as e:
         return f"Error generating summary: {str(e)}"
 def run_apify_comment_analysis(input):
     # Prepare the Actor input with exact format for Comment Analysis
     id = input["id"]
     since_date = input["since"]
-    max_items = input["max_items"]
-    # Get current date for the "until" parameter
-    current_date = datetime.now().strftime("%Y-%m-%d")
-    # Use fixed date format as specified in the example
     run_input = {
         "@": id,
-    "filter:blue_verified": False,
-    "filter:consumer_video": False,
-    "filter:has_engagement": True,
-    "filter:hashtags": False,
-    "filter:images": False,
-    "filter:links": False,
-    "filter:media": False,
-    "filter:mentions": False,
-    "filter:native_video": False,
-    "filter:nativeretweets": False,
-    "filter:news": False,
-    "filter:pro_video": False,
-    "filter:quote": False,
-    "filter:replies": False,
-    "filter:safe": False,
-    "filter:spaces": False,
-    "filter:twimg": False,
-    "filter:verified": False,
-    "filter:videos": False,
-    "filter:vine": False,
-    "include:nativeretweets": False,
-    "lang": "en",
-        "since": since_date + "_23:59:59_UTC",
-    "to": id,
-        "until": current_date + "_23:59:59_UTC",
-        "maxItems": max_items,
         "queryType": "Latest",
         "min_retweets": 0,
         "min_faves": 0,
@@ -168,33 +194,29 @@ def run_apify_comment_analysis(input):
     }
     # Show loading state
-    with st.spinner("Fetching comments from Twitter..."):
         # Run the Actor and wait for it to finish
         run = client.actor("CJdippxWmn9uRfooo").call(run_input=run_input)
-        # Fetch data from the run's dataset
         data = list(client.dataset(run["defaultDatasetId"]).iterate_items())
-        # Ensure we don't exceed max_items
-        if len(data) > max_items:
-            data = data[:max_items]
     return data, run["defaultDatasetId"]
-def run_apify_account_analysis(input, disable_engagement_filters=False):
     # Prepare the Actor input with exact format for Account Analysis
     username = input["username"]
     since_date = input["since"]
-    max_items = input["max_items"]
-    # Get current date for the "until" parameter
-    current_date = datetime.now().strftime("%Y-%m-%d")
-    # Use the exact format provided by the user
     run_input = {
         "filter:blue_verified": False,
         "filter:consumer_video": False,
-        "filter:has_engagement": True,
         "filter:hashtags": False,
         "filter:images": False,
         "filter:links": False,
@@ -215,33 +237,24 @@ def run_apify_account_analysis(input, disable_engagement_filters=False):
         "from": username,
         "include:nativeretweets": False,
         "lang": "en",
-        "maxItems": max_items,
         "queryType": "Latest",
-        "since": since_date + "_23:59:59_UTC",
-        "until": current_date + "_23:59:59_UTC",
     }
-    if not disable_engagement_filters:
-        run_input.update({
-            "min_faves": 500,
-            "min_retweets": 0,
-            "min_replies": 0,
-            "-min_retweets": 0,
-            "-min_faves": 0,
-            "-min_replies": 0
-        })
     # Show loading state
-    with st.spinner("Fetching tweets from Twitter..."):
         # Run the Actor and wait for it to finish
         run = client.actor("CJdippxWmn9uRfooo").call(run_input=run_input)
-        # Fetch data from the run's dataset
         data = list(client.dataset(run["defaultDatasetId"]).iterate_items())
-        # Ensure we don't exceed max_items
-        if len(data) > max_items:
-            data = data[:max_items]
     return data, run["defaultDatasetId"]
@@ -282,13 +295,14 @@ def convert_to_ist(utc_datetime):
     ist_datetime = utc_datetime.astimezone(ist_tz)
     return ist_datetime
-# Function to process tweet data and create dataframe
-def process_tweet_data(data):
     processed_data = []
     all_hashtags = []
     all_mentions = []
     mock_data_detected = False
     mock_data_signature = "From KaitoEasyAPI, a reminder:Our API pricing is based on the volume of data returned."
     for item in data:
         text = item.get("text", "")
@@ -321,6 +335,10 @@ def process_tweet_data(data):
             # Get author info
             author = item.get("author", {})
             # Check if media exists
             has_media = False
             if "extendedEntities" in item and "media" in item["extendedEntities"]:
@@ -380,6 +398,7 @@ def process_tweet_data(data):
     metrics = {
         "hashtags": all_hashtags,
         "mentions": all_mentions,
     }
     return df, metrics, mock_data_detected
@@ -388,6 +407,17 @@ def process_tweet_data(data):
 def display_compact_analysis(df, metrics, username, dataset_id):
     st.subheader(f"@{username}")
     # Calculate metrics for analysis
     total_tweets = len(df)
     total_likes = df["Likes"].sum()
@@ -461,9 +491,12 @@ def display_compact_analysis(df, metrics, username, dataset_id):
 # Function to analyze and display the tweet data
 def analyze_and_display_data(data, dataset_id, analysis_type="Account"):
     if not isinstance(data, pd.DataFrame): # If raw data is passed
-        # Process the data into a dataframe
-        df, metrics, _ = process_tweet_data(data) # We don't need mock_data_detected here
     else: # If DataFrame is already processed (e.g. after retry)
         df = data
         # Recalculate metrics if df might have changed (e.g. if mock data was removed before this call)
@@ -474,7 +507,7 @@ def analyze_and_display_data(data, dataset_id, analysis_type="Account"):
                 all_hashtags_retry.extend(row["Hashtags"].split(", "))
             if pd.notna(row.get("Mentions")) and row["Mentions"]:
                 all_mentions_retry.extend(row["Mentions"].split(", "))
-        metrics = {"hashtags": all_hashtags_retry, "mentions": all_mentions_retry}
     if not df.empty:
         # Calculate additional metrics for analysis
@@ -529,6 +562,29 @@ def analyze_and_display_data(data, dataset_id, analysis_type="Account"):
         left_col, right_col = st.columns([1, 1])
         with left_col:
             st.subheader("📈 Key Metrics")
             # Basic stats
@@ -735,8 +791,8 @@ def display_tweet_list_compact(df):
             # Small divider
             st.write("---")
-# Function to store processed tweets into MongoDB (upsert by tweet ID)
-def store_to_mongodb(df, analysis_type="Account", ai_summary=None):
     if df.empty:
         return
     if not MONGODB_AVAILABLE:
@@ -748,19 +804,20 @@ def store_to_mongodb(df, analysis_type="Account", ai_summary=None):
         user_tweets = df[df['Username'] == username]
         # Calculate aggregated metrics (convert to native Python types for MongoDB)
         total_tweets = int(len(user_tweets))
-        total_likes = int(user_tweets["Likes"].sum())
-        total_retweets = int(user_tweets["Retweets"].sum())
-        total_replies = int(user_tweets["Replies"].sum())
-        total_bookmarks = int(user_tweets["Bookmarks"].sum())
-        total_views = int(user_tweets["Views"].sum())
         total_engagement = total_likes + total_retweets + total_replies + total_bookmarks
         avg_engagement = float(total_engagement / total_tweets) if total_tweets > 0 else 0.0
         # Get all tweets as a list
         tweets_list = user_tweets.to_dict("records")
-        # Create account document
         account_doc = {
             "username": username,
             "analysis_type": analysis_type,
@@ -774,7 +831,9 @@ def store_to_mongodb(df, analysis_type="Account", ai_summary=None):
             "total_engagement": total_engagement,
             "avg_engagement_per_tweet": avg_engagement,
             "tweets": tweets_list,
-            "ai_summary": ai_summary
         }
         # Upsert by username - one document per account
@@ -786,15 +845,18 @@ def store_to_mongodb(df, analysis_type="Account", ai_summary=None):
 # --- Scheduler utilities ---
-def fetch_and_store(username, since, max_items):
     """Helper to fetch tweets for a username and store them in MongoDB."""
     try:
         results, _ = run_apify_account_analysis({
             "username": username,
             "since": since,
-            "max_items": max_items
-        }, disable_engagement_filters=True)
-        df, _, _ = process_tweet_data(results)
         # Generate AI summary if available
         ai_summary = None
@@ -805,14 +867,16 @@ def fetch_and_store(username, since, max_items):
             except Exception as e:
                 print(f"AI summary generation failed for @{username}: {e}")
-        store_to_mongodb(df, "Account", ai_summary)
     except Exception as e:
         print(f"Scheduler error fetching @{username}: {e}")
-def schedule_fetch(usernames, since, max_items):
     for user in usernames:
-        fetch_and_store(user, since, max_items)
 def _run_schedule_loop():
@@ -869,8 +933,14 @@ if 'id' not in st.session_state:
     st.session_state.id = ""
 if 'since' not in st.session_state:
     st.session_state.since = "2025-01-01"
-if 'max_items' not in st.session_state:
-    st.session_state.max_items = 200
 if 'results' not in st.session_state:
     st.session_state.results = None
 if 'dataset_id' not in st.session_state:
@@ -885,11 +955,11 @@ if 'username2' not in st.session_state:
     st.session_state.username2 = ""
 if 'compare_since' not in st.session_state:
     st.session_state.compare_since = "2025-01-01"
-if 'compare_max_items' not in st.session_state:
-    st.session_state.compare_max_items = 200
 # Create tabs
-tabs = st.tabs(["📊 Account Analysis", "💬 Comment Analysis", "🆚 Compare", "⏰ Scheduler"])
 # Account Analysis tab
 with tabs[0]:
@@ -899,29 +969,56 @@ with tabs[0]:
         st.write("Analyze tweets from a specific Twitter account")
         # Input fields in a cleaner layout
-        col1, col2, col3 = st.columns([3, 2, 1])
         with col1:
             st.session_state.username = st.text_input("Enter Twitter username (without @)",
                                                value=st.session_state.username,
                                                key="account_username",
                                                placeholder="e.g. elonmusk")
         with col2:
-            st.session_state.since = st.date_input("Since date",
                                                  value=datetime.strptime(st.session_state.since, "%Y-%m-%d")
                                                  if isinstance(st.session_state.since, str)
                                                  else st.session_state.since,
                                                  key="account_since")
         with col3:
-            st.session_state.max_items = st.number_input("Max tweets",
-                                                  min_value=10,
-                                                  max_value=1000,
-                                                  value=st.session_state.max_items,
-                                                  step=10,
-                                                  key="account_max_items")
-        # Convert date to string format
         if not isinstance(st.session_state.since, str):
             st.session_state.since = st.session_state.since.strftime("%Y-%m-%d")
         # Run button
         run_button = st.button("🔍 Analyze Account Tweets", key="run_account", use_container_width=True)
@@ -929,33 +1026,32 @@ with tabs[0]:
     # Run analysis when button is clicked
     if run_button:
         if st.session_state.username:
-            # Initial attempt
-            st.session_state.results, st.session_state.dataset_id = run_apify_account_analysis({
-                "username": st.session_state.username,
-                "since": st.session_state.since,
-                "max_items": st.session_state.max_items
-            })
-            # Process initial results to check for mock data
-            processed_df, _, mock_data_detected = process_tweet_data(st.session_state.results)
-            if mock_data_detected:
-                st.warning("Mock data detected in the initial response, indicating no specific tweets found with current filters. Retrying without engagement filters...")
-                # Retry without engagement filters
                 st.session_state.results, st.session_state.dataset_id = run_apify_account_analysis({
-                    "username": st.session_state.username,
                     "since": st.session_state.since,
-                    "max_items": st.session_state.max_items
-                }, disable_engagement_filters=True)
-                # Re-process the data after retry
-                processed_df, _, _ = process_tweet_data(st.session_state.results) # mock_data_detected should be False now or data is empty
-            if not processed_df.empty:
-                st.success(f"Analysis complete! Found {len(processed_df)} tweets.")
-                st.balloons()
-                analyze_and_display_data(processed_df, st.session_state.dataset_id, "Account")
-            else:
-                st.warning("No results found even after adjustments. Try a different query or date range.")
         else:
             st.error("Please enter a Twitter username")
@@ -966,24 +1062,23 @@ with tabs[1]:
         st.write("Analyze comments directed at a specific Twitter account")
         # Input fields in a cleaner layout
-        col1, col2, col3 = st.columns([3, 2, 1])
         with col1:
             tweet_id = st.text_input("Enter Twitter ID",
                                     key="comment_id",
                                     placeholder="e.g. YSJaganTrends")
         with col2:
-            comment_since = st.date_input("Since date",
                                         value=datetime.strptime(st.session_state.since, "%Y-%m-%d")
                                         if isinstance(st.session_state.since, str)
                                         else st.session_state.since,
                                         key="comment_since")
         with col3:
-            comment_max_items = st.number_input("Max comments",
-                                              min_value=10,
-                                              max_value=1000,
-                                              value=st.session_state.max_items,
-                                              step=10,
-                                              key="comment_max_items")
         # Run button
         comment_button = st.button("🔍 Analyze Comments", key="run_comment", use_container_width=True)
@@ -991,24 +1086,29 @@ with tabs[1]:
     # Run analysis when button is clicked
     if comment_button:
         if tweet_id:
-            raw_results, dataset_id = run_apify_comment_analysis({
-                "id": tweet_id,
-                "since": comment_since.strftime("%Y-%m-%d"),
-                "max_items": comment_max_items
-            })
-            # Process data to remove mock tweets and get the actual count
-            processed_df, _, mock_data_detected = process_tweet_data(raw_results)
-            if not processed_df.empty:
-                st.success(f"Analysis complete! Found {len(processed_df)} actual comments.")
-                st.balloons()
-                # Display the results using the processed DataFrame
-                analyze_and_display_data(processed_df, dataset_id, "Comment")
-            elif mock_data_detected and processed_df.empty:
-                st.warning("Mock data was returned by the API, indicating no specific comments were found for your query. Please try adjusting your parameters.")
-            else: # No mock data, but still empty (or raw_results was empty)
-                st.warning("No results found. Try a different query or date range.")
         else:
             st.error("Please enter a Twitter ID")
@@ -1036,74 +1136,75 @@ with tabs[2]:
             )
         # Shared settings
-        col1, col2 = st.columns([2, 1])
         with col1:
             # Use a different key for the date input to avoid conflicts
             compare_since_date = st.date_input(
-                "Since date",
                 value=datetime.strptime(st.session_state.compare_since, "%Y-%m-%d"),
                 key="compare_since_dateinput"
             )
             st.session_state.compare_since = compare_since_date.strftime("%Y-%m-%d")
         with col2:
-            st.session_state.compare_max_items = st.number_input(
-                "Max tweets per account",
-                min_value=10,
-                max_value=1000,
-                value=st.session_state.compare_max_items,
-                step=10,
-                key="compare_max_items_num"
             )
         compare_button = st.button("⚖️ Compare Accounts", key="run_compare", use_container_width=True)
     if compare_button:
         if st.session_state.username1 and st.session_state.username2:
-            def fetch_and_process_user_data(username, since, max_items):
-                with st.spinner(f"Fetching tweets for @{username}..."):
-                    results, dataset_id = run_apify_account_analysis({
-                        "username": username,
-                        "since": since,
-                        "max_items": max_items
-                    })
-                    processed_df, metrics, mock_data = process_tweet_data(results)
-                    if mock_data:
-                        st.warning(f"Mock data for @{username}, retrying without engagement filters...")
                         results, dataset_id = run_apify_account_analysis({
-                            "username": username,
                             "since": since,
-                            "max_items": max_items
-                        }, disable_engagement_filters=True)
-                        processed_df, metrics, _ = process_tweet_data(results)
-                    if not processed_df.empty:
-                        st.success(f"Found {len(processed_df)} tweets for @{username}.")
-                    else:
-                        st.warning(f"No results for @{username}.")
-                    return processed_df, metrics, dataset_id
-            col1, col2 = st.columns(2)
-            with col1:
-                df1, metrics1, dsid1 = fetch_and_process_user_data(
-                    st.session_state.username1,
-                    st.session_state.compare_since,
-                    st.session_state.compare_max_items
-                )
-                if not df1.empty:
-                    display_compact_analysis(df1, metrics1, st.session_state.username1, dsid1)
-            with col2:
-                df2, metrics2, dsid2 = fetch_and_process_user_data(
-                    st.session_state.username2,
-                    st.session_state.compare_since,
-                    st.session_state.compare_max_items
-                )
-                if not df2.empty:
-                    display_compact_analysis(df2, metrics2, st.session_state.username2, dsid2)
             # Display tweets side by side after the analysis
             if not df1.empty or not df2.empty:
@@ -1129,8 +1230,118 @@ with tabs[2]:
         else:
             st.error("Please enter both Twitter usernames to compare.")
-# Scheduler tab
 with tabs[3]:
     st.header("⏰ Daily Scheduler")
     st.write("Configure daily automatic fetching of tweets and storage to MongoDB.")
@@ -1193,13 +1404,13 @@ with tabs[3]:
     st.subheader("⚙️ Scheduler Configuration")
     usernames_input = st.text_area("Usernames to schedule (one per line)", value="\n".join(existing_users), key="sched_usernames")
-    col1, col2 = st.columns(2)
     with col1:
-        sched_since = st.date_input("Since date", value=(datetime.now() - timedelta(days=30)).date(), key="sched_since")
     with col2:
-        sched_max_items = st.number_input("Max tweets per account", min_value=10, max_value=1000, value=200, step=10, key="sched_max_items")
-    sched_time = st.time_input("Run at (24h format)", datetime.now().replace(hour=2, minute=0, second=0, microsecond=0).time(), key="sched_time")
     # Buttons row
     col1, col2 = st.columns(2)
@@ -1207,23 +1418,28 @@ with tabs[3]:
         if st.button("▶️ Start Scheduler", key="start_scheduler", use_container_width=True):
             usernames = [u.strip() for u in usernames_input.split("\n") if u.strip()]
             if usernames:
-                # Save/update usernames in DB
-                save_scheduler_usernames(usernames)
-                # Clear existing jobs with tag
-                schedule.clear('tweet_jobs')
-                def scheduled_job():
-                    schedule_fetch(usernames, sched_since.strftime("%Y-%m-%d"), sched_max_items)
-                schedule.every().day.at(sched_time.strftime("%H:%M")).tag('tweet_jobs').do(scheduled_job)
-                st.success(f"Scheduler started for {len(usernames)} accounts daily at {sched_time.strftime('%H:%M')}.")
-                # Launch scheduler loop thread if not already running
-                if 'scheduler_thread' not in st.session_state:
-                    thread = threading.Thread(target=_run_schedule_loop, daemon=True)
-                    thread.start()
-                    st.session_state.scheduler_thread = thread
             else:
                 st.error("Please input at least one username.")
@@ -1231,42 +1447,53 @@ with tabs[3]:
         if st.button("🚀 Run Now", key="run_now_btn", use_container_width=True, type="secondary"):
             usernames = [u.strip() for u in usernames_input.split("\n") if u.strip()]
             if usernames:
-                with st.spinner(f"Scraping tweets for {len(usernames)} accounts..."):
-                    try:
-                        total_tweets = 0
-                        for username in usernames:
-                            with st.spinner(f"Scraping @{username}..."):
-                                results, _ = run_apify_account_analysis({
-                                    "username": username,
-                                    "since": sched_since.strftime("%Y-%m-%d"),
-                                    "max_items": sched_max_items
-                                }, disable_engagement_filters=True)
-                                df, _, _ = process_tweet_data(results)
-                                if not df.empty:
-                                    # Generate AI summary
-                                    ai_summary = None
-                                    if GENAI_AVAILABLE and GEMINI_API_KEY:
-                                        with st.spinner(f"Generating AI summary for @{username}..."):
-                                            try:
-                                                context = f"The following are account tweets for Twitter account @{username}"
-                                                ai_summary = get_gemini_summary(df, context)
-                                            except Exception as e:
-                                                st.warning(f"AI summary generation failed for @{username}: {e}")
-                                    store_to_mongodb(df, "Account", ai_summary)
-                                    total_tweets += len(df)
-                                    summary_status = " (with AI summary)" if ai_summary else ""
-                                    st.success(f"✅ @{username}: {len(df)} tweets scraped and stored{summary_status}")
-                                else:
-                                    st.warning(f"⚠️ @{username}: No tweets found")
-                        if total_tweets > 0:
-                            st.success(f"🎉 Successfully scraped and stored {total_tweets} tweets from {len(usernames)} accounts!")
-                            st.info("Data has been stored in your MongoDB DataCollector database.")
-                        else:
-                            st.warning("No tweets were found for any of the accounts.")
-                    except Exception as e:
-                        st.error(f"❌ Error during scraping: {str(e)}")
             else:
                 st.error("Please input at least one username.")
@@ -1285,6 +1512,37 @@ with tabs[3]:
             st.success("Scheduler stopped. All scheduled jobs cleared.")
             st.rerun()
 # Show instructions for setting up Gemini
 if not GENAI_AVAILABLE or not GEMINI_API_KEY:
     st.sidebar.title("Setup Gemini API")
@@ -1362,6 +1620,108 @@ try:
 except:
     pass
 # Footer with attribution
 st.divider()
 st.caption("Powered by Apify Twitter Scraper API • Created with Streamlit • AI Summaries by Google Gemini • Times in Indian Standard Time (IST)")

     except Exception as e:
         return f"Error generating summary: {str(e)}"
+# Function to extract account details from API response
+def extract_account_details(author_data):
+    """Extract comprehensive account details from author data"""
+    # If no data provided (None), return empty dict
+    if author_data is None:
+        return {}
+    # Create account details with defaults for all fields
+    account_details = {
+        "user_id": author_data.get("id", ""),
+        "name": author_data.get("name", ""),
+        "username": author_data.get("userName", ""),
+        "bio": author_data.get("description", author_data.get("biography", "")),
+        "location": author_data.get("location", ""),
+        "website": author_data.get("url", ""),
+        "followers_count": author_data.get("followersCount", author_data.get("followers_count", author_data.get("followers", 0))),
+        "following_count": author_data.get("followingCount", author_data.get("following_count", author_data.get("following", 0))),
+        "tweet_count": author_data.get("statusesCount", author_data.get("tweet_count", 0)),
+        "listed_count": author_data.get("listedCount", author_data.get("listed_count", 0)),
+        "verified": author_data.get("verified", author_data.get("isVerified", author_data.get("isBlueVerified", False))),
+        "protected": author_data.get("protected", False),
+        "profile_image_url": author_data.get("profileImageUrl", author_data.get("profile_image_url", "")),
+        "profile_banner_url": author_data.get("profileBannerUrl", author_data.get("profile_banner_url", "")),
+        "created_at": author_data.get("createdAt", author_data.get("created_at", "")),
+        "favourites_count": author_data.get("favouritesCount", author_data.get("favourites_count", 0)),
+        "media_count": author_data.get("mediaCount", author_data.get("media_count", 0))
+    }
+    return account_details
 def run_apify_comment_analysis(input):
     # Prepare the Actor input with exact format for Comment Analysis
     id = input["id"]
     since_date = input["since"]
+    until_date = input.get("until", datetime.now().strftime("%Y-%m-%d"))  # NEW: Add until date
+    # Use fixed date format as specified in the example - FIXED QUERY PARAMETERS
     run_input = {
         "@": id,
+        "filter:blue_verified": False,
+        "filter:consumer_video": False,
+        "filter:has_engagement": False,  # Always False to get more comments
+        "filter:hashtags": False,
+        "filter:images": False,
+        "filter:links": False,
+        "filter:media": False,
+        "filter:mentions": False,
+        "filter:native_video": False,
+        "filter:nativeretweets": False,
+        "filter:news": False,
+        "filter:pro_video": False,
+        "filter:quote": False,
+        "filter:replies": False,  # Keep false to get actual comments
+        "filter:safe": False,
+        "filter:spaces": False,
+        "filter:twimg": False,
+        "filter:verified": False,
+        "filter:videos": False,
+        "filter:vine": False,
+        "include:nativeretweets": False,
+        "lang": "en",
+        "since": since_date + "_00:00:00_UTC",
+        "to": id,
+        "until": until_date + "_23:59:59_UTC",
         "queryType": "Latest",
         "min_retweets": 0,
         "min_faves": 0,
     }
     # Show loading state
+    with st.spinner(f"Fetching comments from {since_date} to {until_date}..."):
         # Run the Actor and wait for it to finish
         run = client.actor("CJdippxWmn9uRfooo").call(run_input=run_input)
+        # Fetch ALL data from the run's dataset (no maxItems limit)
         data = list(client.dataset(run["defaultDatasetId"]).iterate_items())
     return data, run["defaultDatasetId"]
+def run_apify_account_analysis(input, disable_engagement_filters=True):
     # Prepare the Actor input with exact format for Account Analysis
     username = input["username"]
     since_date = input["since"]
+    until_date = input.get("until", datetime.now().strftime("%Y-%m-%d"))  # NEW: Add until date
+    min_faves = input.get("min_faves", 0)  # NEW: Configurable engagement
+    min_retweets = input.get("min_retweets", 0)  # NEW: Configurable engagement
+    min_replies = input.get("min_replies", 0)  # NEW: Configurable engagement
+    # Use the exact format provided by the user - IMPROVED QUERY PARAMETERS
     run_input = {
         "filter:blue_verified": False,
         "filter:consumer_video": False,
+        "filter:has_engagement": False,  # Always False for maximum tweet capture
         "filter:hashtags": False,
         "filter:images": False,
         "filter:links": False,
         "from": username,
         "include:nativeretweets": False,
         "lang": "en",
         "queryType": "Latest",
+        "since": since_date + "_00:00:00_UTC",
+        "until": until_date + "_23:59:59_UTC",
+        "min_faves": min_faves,  # NEW: User configurable, default 0
+        "min_retweets": min_retweets,  # NEW: User configurable, default 0
+        "min_replies": min_replies,  # NEW: User configurable, default 0
+        "-min_retweets": 0,
+        "-min_faves": 0,
+        "-min_replies": 0
     }
     # Show loading state
+    with st.spinner(f"Fetching tweets from {since_date} to {until_date}..."):
         # Run the Actor and wait for it to finish
         run = client.actor("CJdippxWmn9uRfooo").call(run_input=run_input)
+        # Fetch ALL data from the run's dataset (no maxItems limit)
         data = list(client.dataset(run["defaultDatasetId"]).iterate_items())
     return data, run["defaultDatasetId"]
     ist_datetime = utc_datetime.astimezone(ist_tz)
     return ist_datetime
+# Function to process tweet data and create dataframe - ENHANCED FOR ACCOUNT DETAILS
+def process_tweet_data(data, extract_account_info=False):
     processed_data = []
     all_hashtags = []
     all_mentions = []
     mock_data_detected = False
     mock_data_signature = "From KaitoEasyAPI, a reminder:Our API pricing is based on the volume of data returned."
+    account_details = {}
     for item in data:
         text = item.get("text", "")
             # Get author info
             author = item.get("author", {})
+            # ENHANCED: Extract account details if requested
+            if extract_account_info and not account_details and author:
+                account_details = extract_account_details(author)
             # Check if media exists
             has_media = False
             if "extendedEntities" in item and "media" in item["extendedEntities"]:
     metrics = {
         "hashtags": all_hashtags,
         "mentions": all_mentions,
+        "account_details": account_details  # ADDED: Include account details
     }
     return df, metrics, mock_data_detected
 def display_compact_analysis(df, metrics, username, dataset_id):
     st.subheader(f"@{username}")
+    # ENHANCED: Display account details if available
+    account_details = metrics.get("account_details", {})
+    if account_details:
+        st.markdown("##### 👤 Account Info")
+        if account_details.get("followers_count"):
+            st.metric("Followers", f"{account_details['followers_count']:,}")
+        if account_details.get("following_count"):
+            st.metric("Following", f"{account_details['following_count']:,}")
+        if account_details.get("verified"):
+            st.success("✅ Verified")
     # Calculate metrics for analysis
     total_tweets = len(df)
     total_likes = df["Likes"].sum()
 # Function to analyze and display the tweet data
 def analyze_and_display_data(data, dataset_id, analysis_type="Account"):
+    raw_data = None
     if not isinstance(data, pd.DataFrame): # If raw data is passed
+        # Store raw data for sentiment analysis
+        raw_data = data
+        # Process the data into a dataframe - ENHANCED: Extract account details
+        df, metrics, _ = process_tweet_data(data, extract_account_info=True)
     else: # If DataFrame is already processed (e.g. after retry)
         df = data
         # Recalculate metrics if df might have changed (e.g. if mock data was removed before this call)
                 all_hashtags_retry.extend(row["Hashtags"].split(", "))
             if pd.notna(row.get("Mentions")) and row["Mentions"]:
                 all_mentions_retry.extend(row["Mentions"].split(", "))
+        metrics = {"hashtags": all_hashtags_retry, "mentions": all_mentions_retry, "account_details": {}}
     if not df.empty:
         # Calculate additional metrics for analysis
         left_col, right_col = st.columns([1, 1])
         with left_col:
+            # ENHANCED: Display account details if available
+            account_details = metrics.get("account_details", {})
+            if account_details:
+                st.subheader("👤 Account Information")
+                acc_col1, acc_col2, acc_col3 = st.columns(3)
+                with acc_col1:
+                    if account_details.get("followers_count"):
+                        st.metric("Followers", f"{account_details['followers_count']:,}")
+                    if account_details.get("following_count"):
+                        st.metric("Following", f"{account_details['following_count']:,}")
+                with acc_col2:
+                    if account_details.get("tweet_count"):
+                        st.metric("Total Tweets (All Time)", f"{account_details['tweet_count']:,}")
+                    if account_details.get("listed_count"):
+                        st.metric("Listed Count", f"{account_details['listed_count']:,}")
+                with acc_col3:
+                    if account_details.get("verified"):
+                        st.success("✅ Verified Account")
+                    if account_details.get("bio"):
+                        st.write(f"**Bio:** {account_details['bio']}")
+                st.divider()
             st.subheader("📈 Key Metrics")
             # Basic stats
             # Small divider
             st.write("---")
+# Function to store processed tweets into MongoDB (upsert by tweet ID) - ENHANCED FOR RAW DATA
+def store_to_mongodb(df, analysis_type="Account", ai_summary=None, raw_data=None, account_details=None):
     if df.empty:
         return
     if not MONGODB_AVAILABLE:
         user_tweets = df[df['Username'] == username]
         # Calculate aggregated metrics (convert to native Python types for MongoDB)
+        # Handle missing columns gracefully
         total_tweets = int(len(user_tweets))
+        total_likes = int(user_tweets.get("Likes", pd.Series([0])).sum()) if "Likes" in user_tweets.columns else 0
+        total_retweets = int(user_tweets.get("Retweets", pd.Series([0])).sum()) if "Retweets" in user_tweets.columns else 0
+        total_replies = int(user_tweets.get("Replies", pd.Series([0])).sum()) if "Replies" in user_tweets.columns else 0
+        total_bookmarks = int(user_tweets.get("Bookmarks", pd.Series([0])).sum()) if "Bookmarks" in user_tweets.columns else 0
+        total_views = int(user_tweets.get("Views", pd.Series([0])).sum()) if "Views" in user_tweets.columns else 0
         total_engagement = total_likes + total_retweets + total_replies + total_bookmarks
         avg_engagement = float(total_engagement / total_tweets) if total_tweets > 0 else 0.0
         # Get all tweets as a list
         tweets_list = user_tweets.to_dict("records")
+        # ENHANCED: Create account document with raw data and account details
         account_doc = {
             "username": username,
             "analysis_type": analysis_type,
             "total_engagement": total_engagement,
             "avg_engagement_per_tweet": avg_engagement,
             "tweets": tweets_list,
+            "ai_summary": ai_summary,
+            "raw_tweets": raw_data if raw_data else [],  # ADDED: Store raw data for sentiment analysis
+            "account_details": account_details if account_details else {}  # ADDED: Store account details
         }
         # Upsert by username - one document per account
 # --- Scheduler utilities ---
+def fetch_and_store(username, since, until):
     """Helper to fetch tweets for a username and store them in MongoDB."""
     try:
         results, _ = run_apify_account_analysis({
             "username": username,
             "since": since,
+            "until": until,
+            "min_faves": 0,
+            "min_retweets": 0,
+            "min_replies": 0
+        })
+        df, metrics, _ = process_tweet_data(results, extract_account_info=True)
         # Generate AI summary if available
         ai_summary = None
             except Exception as e:
                 print(f"AI summary generation failed for @{username}: {e}")
+        # ENHANCED: Store with raw data and account details
+        account_details = metrics.get("account_details", {})
+        store_to_mongodb(df, "Account", ai_summary, raw_data=results, account_details=account_details)
     except Exception as e:
         print(f"Scheduler error fetching @{username}: {e}")
+def schedule_fetch(usernames, since, until):
     for user in usernames:
+        fetch_and_store(user, since, until)
 def _run_schedule_loop():
     st.session_state.id = ""
 if 'since' not in st.session_state:
     st.session_state.since = "2025-01-01"
+if 'until' not in st.session_state:
+    st.session_state.until = datetime.now().strftime("%Y-%m-%d")
+if 'min_faves' not in st.session_state:
+    st.session_state.min_faves = 0
+if 'min_retweets' not in st.session_state:
+    st.session_state.min_retweets = 0
+if 'min_replies' not in st.session_state:
+    st.session_state.min_replies = 0
 if 'results' not in st.session_state:
     st.session_state.results = None
 if 'dataset_id' not in st.session_state:
     st.session_state.username2 = ""
 if 'compare_since' not in st.session_state:
     st.session_state.compare_since = "2025-01-01"
+if 'compare_until' not in st.session_state:
+    st.session_state.compare_until = datetime.now().strftime("%Y-%m-%d")
 # Create tabs
+tabs = st.tabs(["📊 Account Analysis", "💬 Comment Analysis", "🆚 Compare", "👥 Followers", "⏰ Scheduler"])
 # Account Analysis tab
 with tabs[0]:
         st.write("Analyze tweets from a specific Twitter account")
         # Input fields in a cleaner layout
+        col1, col2, col3 = st.columns([3, 2, 2])
         with col1:
             st.session_state.username = st.text_input("Enter Twitter username (without @)",
                                                value=st.session_state.username,
                                                key="account_username",
                                                placeholder="e.g. elonmusk")
         with col2:
+            st.session_state.since = st.date_input("Start date",
                                                  value=datetime.strptime(st.session_state.since, "%Y-%m-%d")
                                                  if isinstance(st.session_state.since, str)
                                                  else st.session_state.since,
                                                  key="account_since")
         with col3:
+            st.session_state.until = st.date_input("End date",
+                                                 value=datetime.strptime(st.session_state.until, "%Y-%m-%d")
+                                                 if isinstance(st.session_state.until, str)
+                                                 else st.session_state.until,
+                                                 key="account_until")
+        # Optional engagement filters
+        with st.expander("⚙️ Advanced Filters (Optional)", expanded=False):
+            st.info("All filters are set to 0 by default to capture maximum tweets. Increase values to filter for more engaging content.")
+            col1, col2, col3 = st.columns(3)
+            with col1:
+                st.session_state.min_faves = st.number_input("Minimum Likes",
+                                                      min_value=0,
+                                                      max_value=10000,
+                                                      value=st.session_state.min_faves,
+                                                      step=10,
+                                                      key="account_min_faves")
+            with col2:
+                st.session_state.min_retweets = st.number_input("Minimum Retweets",
+                                                         min_value=0,
+                                                         max_value=1000,
+                                                         value=st.session_state.min_retweets,
+                                                         step=5,
+                                                         key="account_min_retweets")
+            with col3:
+                st.session_state.min_replies = st.number_input("Minimum Replies",
+                                                        min_value=0,
+                                                        max_value=1000,
+                                                        value=st.session_state.min_replies,
+                                                        step=5,
+                                                        key="account_min_replies")
+        # Convert dates to string format
         if not isinstance(st.session_state.since, str):
             st.session_state.since = st.session_state.since.strftime("%Y-%m-%d")
+        if not isinstance(st.session_state.until, str):
+            st.session_state.until = st.session_state.until.strftime("%Y-%m-%d")
         # Run button
         run_button = st.button("🔍 Analyze Account Tweets", key="run_account", use_container_width=True)
     # Run analysis when button is clicked
     if run_button:
         if st.session_state.username:
+            # Validate date range
+            if st.session_state.since > st.session_state.until:
+                st.error("Start date must be before end date.")
+            else:
                 st.session_state.results, st.session_state.dataset_id = run_apify_account_analysis({
+                    "username": st.session_state.username,
                     "since": st.session_state.since,
+                    "until": st.session_state.until,
+                    "min_faves": st.session_state.min_faves,
+                    "min_retweets": st.session_state.min_retweets,
+                    "min_replies": st.session_state.min_replies
+                })
+                # Process results to check for mock data
+                processed_df, _, mock_data_detected = process_tweet_data(st.session_state.results, extract_account_info=True)
+                if mock_data_detected:
+                    st.warning("Mock data detected in the response, indicating limited results. This may be due to strict filters or no tweets in the date range.")
+                if not processed_df.empty:
+                    date_range = f"{st.session_state.since} to {st.session_state.until}"
+                    st.success(f"Analysis complete! Found {len(processed_df)} tweets from {date_range}.")
+                    st.balloons()
+                    analyze_and_display_data(processed_df, st.session_state.dataset_id, "Account")
+                else:
+                    st.warning("No results found. Try a different date range or reduce the engagement filters.")
         else:
             st.error("Please enter a Twitter username")
         st.write("Analyze comments directed at a specific Twitter account")
         # Input fields in a cleaner layout
+        col1, col2, col3 = st.columns([3, 2, 2])
         with col1:
             tweet_id = st.text_input("Enter Twitter ID",
                                     key="comment_id",
                                     placeholder="e.g. YSJaganTrends")
         with col2:
+            comment_since = st.date_input("Start date",
                                         value=datetime.strptime(st.session_state.since, "%Y-%m-%d")
                                         if isinstance(st.session_state.since, str)
                                         else st.session_state.since,
                                         key="comment_since")
         with col3:
+            comment_until = st.date_input("End date",
+                                        value=datetime.strptime(st.session_state.until, "%Y-%m-%d")
+                                        if isinstance(st.session_state.until, str)
+                                        else st.session_state.until,
+                                        key="comment_until")
         # Run button
         comment_button = st.button("🔍 Analyze Comments", key="run_comment", use_container_width=True)
     # Run analysis when button is clicked
     if comment_button:
         if tweet_id:
+            # Validate date range
+            if comment_since > comment_until:
+                st.error("Start date must be before end date.")
+            else:
+                raw_results, dataset_id = run_apify_comment_analysis({
+                    "id": tweet_id,
+                    "since": comment_since.strftime("%Y-%m-%d"),
+                    "until": comment_until.strftime("%Y-%m-%d")
+                })
+                # Process data to remove mock tweets and get the actual count
+                processed_df, _, mock_data_detected = process_tweet_data(raw_results)
+                if not processed_df.empty:
+                    date_range = f"{comment_since.strftime('%Y-%m-%d')} to {comment_until.strftime('%Y-%m-%d')}"
+                    st.success(f"Analysis complete! Found {len(processed_df)} actual comments from {date_range}.")
+                    st.balloons()
+                    # Display the results using the processed DataFrame
+                    analyze_and_display_data(processed_df, dataset_id, "Comment")
+                elif mock_data_detected and processed_df.empty:
+                    st.warning("Mock data was returned by the API, indicating no specific comments were found for your query. Please try adjusting your date range.")
+                else: # No mock data, but still empty (or raw_results was empty)
+                    st.warning("No results found. Try a different query or date range.")
         else:
             st.error("Please enter a Twitter ID")
             )
         # Shared settings
+        col1, col2 = st.columns([1, 1])
         with col1:
             # Use a different key for the date input to avoid conflicts
             compare_since_date = st.date_input(
+                "Start date",
                 value=datetime.strptime(st.session_state.compare_since, "%Y-%m-%d"),
                 key="compare_since_dateinput"
             )
             st.session_state.compare_since = compare_since_date.strftime("%Y-%m-%d")
         with col2:
+            compare_until_date = st.date_input(
+                "End date",
+                value=datetime.strptime(st.session_state.compare_until, "%Y-%m-%d"),
+                key="compare_until_dateinput"
             )
+            st.session_state.compare_until = compare_until_date.strftime("%Y-%m-%d")
         compare_button = st.button("⚖️ Compare Accounts", key="run_compare", use_container_width=True)
     if compare_button:
         if st.session_state.username1 and st.session_state.username2:
+            # Validate date range
+            if st.session_state.compare_since > st.session_state.compare_until:
+                st.error("Start date must be before end date.")
+            else:
+                def fetch_and_process_user_data(username, since, until):
+                    date_range = f"{since} to {until}"
+                    with st.spinner(f"Fetching tweets for @{username} from {date_range}..."):
                         results, dataset_id = run_apify_account_analysis({
+                            "username": username,
                             "since": since,
+                            "until": until,
+                            "min_faves": 0,
+                            "min_retweets": 0,
+                            "min_replies": 0
+                        })
+                        processed_df, metrics, mock_data = process_tweet_data(results, extract_account_info=True)
+                        if mock_data:
+                            st.warning(f"Mock data detected for @{username}, indicating limited results in the date range.")
+                        if not processed_df.empty:
+                            account_details = metrics.get("account_details", {})
+                            followers_info = f" | {account_details.get('followers_count', 'N/A')} followers" if account_details.get('followers_count') else ""
+                            st.success(f"Found {len(processed_df)} tweets for @{username} from {date_range}{followers_info}.")
+                        else:
+                            st.warning(f"No results for @{username} in the specified date range.")
+                        return processed_df, metrics, dataset_id
+                col1, col2 = st.columns(2)
+                with col1:
+                    df1, metrics1, dsid1 = fetch_and_process_user_data(
+                        st.session_state.username1,
+                        st.session_state.compare_since,
+                        st.session_state.compare_until
+                    )
+                    if not df1.empty:
+                        display_compact_analysis(df1, metrics1, st.session_state.username1, dsid1)
+                with col2:
+                    df2, metrics2, dsid2 = fetch_and_process_user_data(
+                        st.session_state.username2,
+                        st.session_state.compare_since,
+                        st.session_state.compare_until
+                    )
+                    if not df2.empty:
+                        display_compact_analysis(df2, metrics2, st.session_state.username2, dsid2)
             # Display tweets side by side after the analysis
             if not df1.empty or not df2.empty:
         else:
             st.error("Please enter both Twitter usernames to compare.")
+# Followers tab
 with tabs[3]:
+    st.header("👥 Followers & Following Analysis")
+    st.write("Analyze followers and following lists for any Twitter account")
+    # Input fields
+    col1, col2, col3 = st.columns([3, 2, 2])
+    with col1:
+        followers_username = st.text_input(
+            "Enter Twitter username (without @)",
+            key="followers_username",
+            placeholder="e.g. JanaSenaParty"
+        )
+    with col2:
+        relationship_type = st.selectbox(
+            "Analysis Type",
+            ["followers", "following"],
+            key="relationship_type"
+        )
+    with col3:
+        max_users = st.number_input(
+            "Max Users to Fetch",
+            min_value=10,
+            max_value=1000,
+            value=100,
+            step=10,
+            key="max_followers"
+        )
+    # Analyze button
+    followers_button = st.button("👥 Analyze Followers/Following", key="run_followers", use_container_width=True)
+    if followers_button:
+        if followers_username:
+            try:
+                # Fetch followers/following data
+                data, dataset_id = run_apify_followers_analysis({
+                    "username": followers_username,
+                    "relationship_type": relationship_type,
+                    "max_items": max_users
+                })
+                if data:
+                    # Process the data
+                    df = process_followers_data(data, relationship_type)
+                    if not df.empty:
+                        st.success(f"Found {len(df)} {relationship_type} for @{followers_username}")
+                        st.balloons()
+                        # Display statistics
+                        col1, col2, col3, col4 = st.columns(4)
+                        with col1:
+                            st.metric("Total Users", len(df))
+                        with col2:
+                            verified_count = df['Verified'].sum()
+                            st.metric("Verified Users", verified_count)
+                        with col3:
+                            avg_followers = df['Followers'].mean()
+                            st.metric("Avg Followers", f"{avg_followers:,.0f}")
+                        with col4:
+                            avg_following = df['Following'].mean()
+                            st.metric("Avg Following", f"{avg_following:,.0f}")
+                        # Show top users by followers
+                        st.subheader(f"🔝 Top {relationship_type.title()} by Followers")
+                        top_users = df.nlargest(10, 'Followers')[['Username', 'Name', 'Followers', 'Following', 'Verified']]
+                        st.dataframe(top_users, use_container_width=True)
+                        # Download CSV
+                        csv = df.to_csv(index=False).encode('utf-8')
+                        st.download_button(
+                            f"📥 Download {relationship_type.title()} CSV",
+                            csv,
+                            f"{followers_username}_{relationship_type}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
+                            "text/csv",
+                            key=f"download-{relationship_type}",
+                            use_container_width=True
+                        )
+                        # Show detailed list
+                        st.subheader(f"📋 All {relationship_type.title()}")
+                        with st.expander(f"View all {len(df)} {relationship_type}"):
+                            for _, user in df.iterrows():
+                                with st.container():
+                                    col1, col2 = st.columns([1, 4])
+                                    with col1:
+                                        if user['Profile_Image']:
+                                            st.image(user['Profile_Image'], width=50)
+                                    with col2:
+                                        verified_badge = " ✅" if user['Verified'] else ""
+                                        st.write(f"**@{user['Username']}** ({user['Name']}){verified_badge}")
+                                        st.caption(f"👥 {user['Followers']:,} followers • {user['Following']:,} following")
+                                        if user['Bio']:
+                                            st.caption(f"📝 {user['Bio'][:100]}..." if len(user['Bio']) > 100 else user['Bio'])
+                                    st.divider()
+                    else:
+                        st.warning(f"No {relationship_type} data found for @{followers_username}")
+                else:
+                    st.warning(f"No {relationship_type} data could be retrieved for @{followers_username}")
+                    st.info("This may be due to:")
+                    st.write("• Account being private/protected")
+                    st.write("• Account having no followers/following")
+                    st.write("• API limitations or temporary issues")
+            except Exception as e:
+                st.error(f"Error fetching {relationship_type}: {str(e)}")
+        else:
+            st.error("Please enter a Twitter username")
+# Scheduler tab
+with tabs[4]:
     st.header("⏰ Daily Scheduler")
     st.write("Configure daily automatic fetching of tweets and storage to MongoDB.")
     st.subheader("⚙️ Scheduler Configuration")
     usernames_input = st.text_area("Usernames to schedule (one per line)", value="\n".join(existing_users), key="sched_usernames")
+    col1, col2, col3 = st.columns(3)
     with col1:
+        sched_since = st.date_input("Start date", value=(datetime.now() - timedelta(days=30)).date(), key="sched_since")
     with col2:
+        sched_until = st.date_input("End date", value=datetime.now().date(), key="sched_until")
+    with col3:
+        sched_time = st.time_input("Run at (24h format)", datetime.now().replace(hour=2, minute=0, second=0, microsecond=0).time(), key="sched_time")
     # Buttons row
     col1, col2 = st.columns(2)
         if st.button("▶️ Start Scheduler", key="start_scheduler", use_container_width=True):
             usernames = [u.strip() for u in usernames_input.split("\n") if u.strip()]
             if usernames:
+                # Validate date range
+                if sched_since > sched_until:
+                    st.error("Start date must be before end date.")
+                else:
+                    # Save/update usernames in DB
+                    save_scheduler_usernames(usernames)
+                    # Clear existing jobs with tag
+                    schedule.clear('tweet_jobs')
+                    def scheduled_job():
+                        schedule_fetch(usernames, sched_since.strftime("%Y-%m-%d"), sched_until.strftime("%Y-%m-%d"))
+                    schedule.every().day.at(sched_time.strftime("%H:%M")).tag('tweet_jobs').do(scheduled_job)
+                    date_range = f"{sched_since.strftime('%Y-%m-%d')} to {sched_until.strftime('%Y-%m-%d')}"
+                    st.success(f"Scheduler started for {len(usernames)} accounts daily at {sched_time.strftime('%H:%M')} for date range {date_range}.")
+                    # Launch scheduler loop thread if not already running
+                    if 'scheduler_thread' not in st.session_state:
+                        thread = threading.Thread(target=_run_schedule_loop, daemon=True)
+                        thread.start()
+                        st.session_state.scheduler_thread = thread
             else:
                 st.error("Please input at least one username.")
         if st.button("🚀 Run Now", key="run_now_btn", use_container_width=True, type="secondary"):
             usernames = [u.strip() for u in usernames_input.split("\n") if u.strip()]
             if usernames:
+                # Validate date range
+                if sched_since > sched_until:
+                    st.error("Start date must be before end date.")
+                else:
+                    date_range = f"{sched_since.strftime('%Y-%m-%d')} to {sched_until.strftime('%Y-%m-%d')}"
+                    with st.spinner(f"Scraping tweets for {len(usernames)} accounts from {date_range}..."):
+                        try:
+                            total_tweets = 0
+                            for username in usernames:
+                                with st.spinner(f"Scraping @{username} from {date_range}..."):
+                                    results, _ = run_apify_account_analysis({
+                                        "username": username,
+                                        "since": sched_since.strftime("%Y-%m-%d"),
+                                        "until": sched_until.strftime("%Y-%m-%d"),
+                                        "min_faves": 0,
+                                        "min_retweets": 0,
+                                        "min_replies": 0
+                                    })
+                                    df, metrics, _ = process_tweet_data(results, extract_account_info=True)
+                                    if not df.empty:
+                                        # Generate AI summary
+                                        ai_summary = None
+                                        if GENAI_AVAILABLE and GEMINI_API_KEY:
+                                            with st.spinner(f"Generating AI summary for @{username}..."):
+                                                try:
+                                                    context = f"The following are account tweets for Twitter account @{username}"
+                                                    ai_summary = get_gemini_summary(df, context)
+                                                except Exception as e:
+                                                    st.warning(f"AI summary generation failed for @{username}: {e}")
+                                        # ENHANCED: Store with raw data and account details
+                                        account_details = metrics.get("account_details", {})
+                                        store_to_mongodb(df, "Account", ai_summary, raw_data=results, account_details=account_details)
+                                        total_tweets += len(df)
+                                        summary_status = " (with AI summary)" if ai_summary else ""
+                                        account_info = f" | Followers: {account_details.get('followers_count', 'N/A')}" if account_details.get('followers_count') else ""
+                                        st.success(f"✅ @{username}: {len(df)} tweets scraped and stored from {date_range}{summary_status}{account_info}")
+                                    else:
+                                        st.warning(f"⚠️ @{username}: No tweets found in the specified date range")
+                            if total_tweets > 0:
+                                st.success(f"🎉 Successfully scraped and stored {total_tweets} tweets from {len(usernames)} accounts in date range {date_range}!")
+                                st.info("Data has been stored in your MongoDB DataCollector database.")
+                            else:
+                                st.warning("No tweets were found for any of the accounts in the specified date range.")
+                        except Exception as e:
+                            st.error(f"❌ Error during scraping: {str(e)}")
             else:
                 st.error("Please input at least one username.")
             st.success("Scheduler stopped. All scheduled jobs cleared.")
             st.rerun()
+# ENHANCED: Show API limitations and setup instructions
+st.sidebar.title("📋 API Notes & Features")
+st.sidebar.info(
+    """
+    **New Features:**
+    ✅ **Date Range Fetching:** All tweets between start and end dates are fetched (no max limit)
+    ✅ **Account Analysis:** Comprehensive account details shown in all analysis views
+    ✅ **Zero Engagement Filters:** Default engagement filters set to 0 for maximum tweet capture
+    ⚙️ **Optional Filters:** Users can set custom engagement thresholds if desired
+    **Known Limitations:**
+    🚫 **Tweet-level comment replies** are not available due to Twitter API restrictions. Only direct comments to the main account are fetched.
+    ⚠️ **Tweet count discrepancies** may occur due to:
+    - Private/protected tweets
+    - Deleted tweets
+    - API rate limiting
+    - Account restrictions
+    💡 **Tips for better results:**
+    - Use appropriate date ranges
+    - Keep engagement filters at 0 (default) for maximum capture
+    - Use broader time periods for more comprehensive data
+    """
+)
 # Show instructions for setting up Gemini
 if not GENAI_AVAILABLE or not GEMINI_API_KEY:
     st.sidebar.title("Setup Gemini API")
 except:
     pass
+def run_apify_followers_analysis(input):
+    """
+    Fetch followers/following data using Apify actor
+    This is a placeholder for when the followers actor works
+    """
+    username = input["username"]
+    relationship_type = input.get("relationship_type", "followers")  # "followers" or "following"
+    max_items = input.get("max_items", 100)
+    # Try the followers actor first
+    try:
+        if relationship_type == "followers":
+            run_input = {
+                "twitterHandles": [username],
+                "maxItems": max_items,
+                "getFollowers": True,
+                "getFollowing": False,
+                "getRetweeters": False,
+                "includeUnavailableUsers": False,
+            }
+        else:  # following
+            run_input = {
+                "twitterHandles": [username],
+                "maxItems": max_items,
+                "getFollowers": False,
+                "getFollowing": True,
+                "getRetweeters": False,
+                "includeUnavailableUsers": False,
+            }
+        with st.spinner(f"Fetching {relationship_type} for @{username}..."):
+            # Try the actor you specified
+            run = client.actor("V38PZzpEgOfeeWvZY").call(run_input=run_input)
+            data = list(client.dataset(run["defaultDatasetId"]).iterate_items())
+            if data:
+                return data, run["defaultDatasetId"]
+            else:
+                # Fallback: Use alternative followers scraper
+                return run_apify_followers_fallback(input)
+    except Exception as e:
+        st.warning(f"Primary followers actor failed: {e}")
+        # Fallback to alternative scraper
+        return run_apify_followers_fallback(input)
+def run_apify_followers_fallback(input):
+    """
+    Fallback method using alternative followers scraper
+    """
+    username = input["username"]
+    relationship_type = input.get("relationship_type", "followers")
+    max_items = input.get("max_items", 100)
+    try:
+        # Use curious_coder/twitter-scraper as fallback
+        run_input = {
+            "profileUrl": f"https://twitter.com/{username}",
+            "friendshipType": relationship_type,  # "followers" or "following"
+            "count": max_items,
+            "minDelay": 1,
+            "maxDelay": 3
+        }
+        with st.spinner(f"Fetching {relationship_type} for @{username} (fallback method)..."):
+            run = client.actor("curious_coder/twitter-scraper").call(run_input=run_input)
+            data = list(client.dataset(run["defaultDatasetId"]).iterate_items())
+            return data, run["defaultDatasetId"]
+    except Exception as e:
+        st.error(f"All followers scrapers failed: {e}")
+        return [], None
+def process_followers_data(data, relationship_type="followers"):
+    """
+    Process followers/following data into a structured format
+    """
+    processed_data = []
+    for item in data:
+        # Handle different data structures from different actors
+        username = item.get('username', item.get('screen_name', item.get('userName', '')))
+        name = item.get('name', item.get('displayName', ''))
+        processed_item = {
+            "Username": username,
+            "Name": name,
+            "Bio": item.get('description', item.get('bio', '')),
+            "Location": item.get('location', ''),
+            "Followers": item.get('followers_count', item.get('followersCount', item.get('followers', 0))),
+            "Following": item.get('following_count', item.get('followingCount', item.get('following', 0))),
+            "Tweets": item.get('tweet_count', item.get('statusesCount', item.get('statuses_count', 0))),
+            "Verified": item.get('verified', item.get('isVerified', False)),
+            "Profile_Image": item.get('profile_image_url', item.get('profileImageUrl', '')),
+            "Created_At": item.get('created_at', item.get('createdAt', '')),
+            "URL": item.get('url', f"https://twitter.com/{username}"),
+            "Relationship_Type": relationship_type
+        }
+        processed_data.append(processed_item)
+    return pd.DataFrame(processed_data)
 # Footer with attribution
 st.divider()
 st.caption("Powered by Apify Twitter Scraper API • Created with Streamlit • AI Summaries by Google Gemini • Times in Indian Standard Time (IST)")