poemsforaphrodite commited on
Commit
3de2787
·
verified ·
1 Parent(s): ddd128b

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +525 -18
src/streamlit_app.py CHANGED
@@ -174,10 +174,14 @@ class ApifyService:
174
 
175
  def fetch_account_tweets(self, username: str, since: str, until: str) -> Tuple[List[Dict], str]:
176
  """Fetch tweets posted by a specific account."""
 
 
 
 
177
  run_input = {
178
- "from": username,
179
- "since": f"{since}_00:00:00_UTC",
180
- "until": f"{until}_23:59:59_UTC",
181
  "queryType": "Latest",
182
  "include:nativeretweets": True,
183
  }
@@ -190,10 +194,14 @@ class ApifyService:
190
 
191
  def fetch_account_comments(self, username: str, since: str, until: str) -> Tuple[List[Dict], str]:
192
  """Fetch comments/replies directed to a specific account."""
 
 
 
 
193
  run_input = {
194
- "to": username,
195
- "since": f"{since}_00:00:00_UTC",
196
- "until": f"{until}_23:59:59_UTC",
197
  "queryType": "Latest",
198
  }
199
 
@@ -256,7 +264,7 @@ class GeminiService:
256
  class TweetDataProcessor:
257
  """Processes raw tweet data into structured format."""
258
 
259
- def process_tweets(self, raw_data: List[Dict[str, Any]]) -> Tuple[pd.DataFrame, Dict[str, Any]]:
260
  """Transform raw API data into clean DataFrame and metrics."""
261
  processed_data = []
262
  hashtags_counter = Counter()
@@ -268,7 +276,7 @@ class TweetDataProcessor:
268
 
269
  for item in raw_data:
270
  try:
271
- processed_tweet = self._process_single_tweet(item, hashtags_counter, mentions_counter, all_author_data)
272
  if processed_tweet:
273
  processed_data.append(processed_tweet)
274
  else:
@@ -288,10 +296,16 @@ class TweetDataProcessor:
288
  st.warning(f"⚠️ {error_count} items had processing errors")
289
 
290
  # Extract best account details
291
- account_details = self._extract_best_account_details(all_author_data)
292
 
293
- # Create DataFrame and metrics
294
  df = pd.DataFrame(processed_data)
 
 
 
 
 
 
295
  metrics = {
296
  "top_hashtags": hashtags_counter.most_common(5),
297
  "top_mentions": mentions_counter.most_common(5),
@@ -300,6 +314,193 @@ class TweetDataProcessor:
300
 
301
  return df, metrics
302
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
303
  def _is_mock_tweet(self, item: Dict) -> bool:
304
  """Detect if a tweet is mock/invalid data that should be ignored."""
305
  # Check for missing essential fields that real tweets should have
@@ -329,12 +530,19 @@ class TweetDataProcessor:
329
  return False
330
 
331
  def _process_single_tweet(self, item: Dict, hashtags_counter: Counter,
332
- mentions_counter: Counter, all_author_data: List) -> Optional[Dict]:
333
  """Process a single tweet item."""
334
  # Extract author data
335
  author = item.get("author", {})
336
  if author:
337
- all_author_data.append(author)
 
 
 
 
 
 
 
338
 
339
  # Check if this is a mock/invalid tweet (has minimal or no real data)
340
  is_mock_tweet = self._is_mock_tweet(item)
@@ -385,9 +593,21 @@ class TweetDataProcessor:
385
  "Mentions": ", ".join(mentions),
386
  }
387
 
388
- def _extract_best_account_details(self, all_author_data: List[Dict]) -> Dict:
389
  """Extract the most complete account details from author data."""
390
  if not all_author_data:
 
 
 
 
 
 
 
 
 
 
 
 
391
  return {}
392
 
393
  # Find the author data with the most complete information
@@ -422,11 +642,28 @@ class TweetDataProcessor:
422
  score = 0
423
 
424
  # Check for follower metrics (high priority)
425
- if author.get("followers", 0) > 0 or author.get("followersCount", 0) > 0:
 
 
 
 
 
426
  score += 3
427
- if author.get("following", 0) > 0 or author.get("followingCount", 0) > 0:
 
 
 
 
 
 
428
  score += 2
429
- if author.get("statusesCount", 0) > 0:
 
 
 
 
 
 
430
  score += 2
431
 
432
  # Check for profile information (lower priority)
@@ -437,14 +674,42 @@ class TweetDataProcessor:
437
 
438
  return score
439
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
440
  def _standardize_account_details(self, author_data: Dict) -> Dict:
441
  """Standardize account details from various possible field names."""
442
- # Try multiple possible field names for metrics
 
 
 
 
443
  followers_count = (
444
  author_data.get("followers") or
445
  author_data.get("followersCount") or
446
  author_data.get("followers_count") or
 
447
  safe_get_nested(author_data, ["publicMetrics", "followers_count"]) or
 
448
  0
449
  )
450
 
@@ -453,7 +718,9 @@ class TweetDataProcessor:
453
  author_data.get("followingCount") or
454
  author_data.get("following_count") or
455
  author_data.get("friends_count") or
 
456
  safe_get_nested(author_data, ["publicMetrics", "following_count"]) or
 
457
  0
458
  )
459
 
@@ -461,10 +728,23 @@ class TweetDataProcessor:
461
  author_data.get("statusesCount") or
462
  author_data.get("statuses_count") or
463
  author_data.get("tweet_count") or
 
464
  safe_get_nested(author_data, ["publicMetrics", "tweet_count"]) or
 
465
  0
466
  )
467
 
 
 
 
 
 
 
 
 
 
 
 
468
  return {
469
  "name": author_data.get("name", ""),
470
  "username": author_data.get("userName", "") or author_data.get("username", ""),
@@ -474,6 +754,12 @@ class TweetDataProcessor:
474
  "tweet_count": tweet_count,
475
  "verified": author_data.get("verified", False) or author_data.get("isVerified", False),
476
  "profile_image_url": author_data.get("profileImageUrl", "") or author_data.get("profile_image_url", ""),
 
 
 
 
 
 
477
  }
478
 
479
  # =============================================================================
@@ -510,6 +796,12 @@ class UIComponents:
510
  @staticmethod
511
  def _display_account_metrics(account_details: Dict) -> None:
512
  """Display account metrics (followers, following, posts)."""
 
 
 
 
 
 
513
  m1, m2, m3 = st.columns(3)
514
 
515
  followers = account_details.get('followers_count', 0)
@@ -532,10 +824,225 @@ class UIComponents:
532
  help="Total tweet count from Twitter API"
533
  )
534
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
535
  # Warning for missing data
536
  if followers == 0 and following == 0 and posts == 0:
537
  st.warning("⚠️ Account metrics unavailable - this may be due to API limitations or account privacy settings")
538
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
539
  @staticmethod
540
  def display_key_metrics(df: pd.DataFrame) -> None:
541
  """Display key engagement metrics."""
@@ -896,7 +1403,7 @@ class TwitterAnalyzerApp:
896
  return
897
 
898
  # Process data
899
- df, metrics = self.processor.process_tweets(raw_data)
900
 
901
  # Generate AI summary if available
902
  gemini_summary = None
 
174
 
175
  def fetch_account_tweets(self, username: str, since: str, until: str) -> Tuple[List[Dict], str]:
176
  """Fetch tweets posted by a specific account."""
177
+ # Handle both simple date (YYYY-MM-DD) and full timestamp (YYYY-MM-DD_HH:MM:SS) formats
178
+ since_formatted = f"{since}_UTC" if "_" in since else f"{since}_00:00:00_UTC"
179
+ until_formatted = f"{until}_UTC" if "_" in until else f"{until}_23:59:59_UTC"
180
+
181
  run_input = {
182
+ "from": username.strip(),
183
+ "since": since_formatted,
184
+ "until": until_formatted,
185
  "queryType": "Latest",
186
  "include:nativeretweets": True,
187
  }
 
194
 
195
  def fetch_account_comments(self, username: str, since: str, until: str) -> Tuple[List[Dict], str]:
196
  """Fetch comments/replies directed to a specific account."""
197
+ # Handle both simple date (YYYY-MM-DD) and full timestamp (YYYY-MM-DD_HH:MM:SS) formats
198
+ since_formatted = f"{since}_UTC" if "_" in since else f"{since}_00:00:00_UTC"
199
+ until_formatted = f"{until}_UTC" if "_" in until else f"{until}_23:59:59_UTC"
200
+
201
  run_input = {
202
+ "to": username.strip(),
203
+ "since": since_formatted,
204
+ "until": until_formatted,
205
  "queryType": "Latest",
206
  }
207
 
 
264
  class TweetDataProcessor:
265
  """Processes raw tweet data into structured format."""
266
 
267
+ def process_tweets(self, raw_data: List[Dict[str, Any]], target_username: str = None) -> Tuple[pd.DataFrame, Dict[str, Any]]:
268
  """Transform raw API data into clean DataFrame and metrics."""
269
  processed_data = []
270
  hashtags_counter = Counter()
 
276
 
277
  for item in raw_data:
278
  try:
279
+ processed_tweet = self._process_single_tweet(item, hashtags_counter, mentions_counter, all_author_data, target_username)
280
  if processed_tweet:
281
  processed_data.append(processed_tweet)
282
  else:
 
296
  st.warning(f"⚠️ {error_count} items had processing errors")
297
 
298
  # Extract best account details
299
+ account_details = self._extract_best_account_details(all_author_data, target_username)
300
 
301
+ # Create DataFrame and calculate engagement metrics from tweet data
302
  df = pd.DataFrame(processed_data)
303
+ engagement_metrics = self._calculate_engagement_metrics(df, target_username)
304
+
305
+ # Add engagement metrics to account_details
306
+ if account_details:
307
+ account_details.update(engagement_metrics)
308
+
309
  metrics = {
310
  "top_hashtags": hashtags_counter.most_common(5),
311
  "top_mentions": mentions_counter.most_common(5),
 
314
 
315
  return df, metrics
316
 
317
+ def _calculate_engagement_metrics(self, df: pd.DataFrame, target_username: str = None) -> Dict:
318
+ """Calculate comprehensive engagement metrics from tweet data."""
319
+ if df.empty:
320
+ return self._get_empty_metrics()
321
+
322
+ # Filter to only tweets from the target user if specified
323
+ if target_username:
324
+ user_tweets = df[df['Username'].str.lower() == target_username.lower()]
325
+ else:
326
+ user_tweets = df
327
+
328
+ if user_tweets.empty:
329
+ return self._get_empty_metrics()
330
+
331
+ # Basic engagement totals
332
+ likes_count = user_tweets['Likes'].sum() if 'Likes' in user_tweets.columns else 0
333
+ views_count = user_tweets['Views'].sum() if 'Views' in user_tweets.columns else 0
334
+ reply_count = user_tweets['Replies'].sum() if 'Replies' in user_tweets.columns else 0
335
+ repost_count = user_tweets['Retweets'].sum() if 'Retweets' in user_tweets.columns else 0
336
+
337
+ tweet_count = len(user_tweets)
338
+
339
+ # Content quality metrics
340
+ avg_likes_per_tweet = likes_count / tweet_count if tweet_count > 0 else 0
341
+ avg_views_per_tweet = views_count / tweet_count if tweet_count > 0 else 0
342
+ avg_engagement_rate = ((likes_count + repost_count) / views_count * 100) if views_count > 0 else 0
343
+
344
+ # Content length analysis
345
+ if 'Text' in user_tweets.columns:
346
+ text_lengths = user_tweets['Text'].astype(str).str.len()
347
+ avg_tweet_length = text_lengths.mean()
348
+ longest_tweet_length = text_lengths.max()
349
+ shortest_tweet_length = text_lengths.min()
350
+ else:
351
+ avg_tweet_length = longest_tweet_length = shortest_tweet_length = 0
352
+
353
+ # Media usage metrics
354
+ if 'Has_Media' in user_tweets.columns:
355
+ tweets_with_media = user_tweets['Has_Media'].sum()
356
+ media_usage_percentage = (tweets_with_media / tweet_count * 100) if tweet_count > 0 else 0
357
+
358
+ # Media effectiveness
359
+ media_tweets = user_tweets[user_tweets['Has_Media'] == True]
360
+ no_media_tweets = user_tweets[user_tweets['Has_Media'] == False]
361
+
362
+ avg_likes_with_media = media_tweets['Likes'].mean() if len(media_tweets) > 0 else 0
363
+ avg_likes_without_media = no_media_tweets['Likes'].mean() if len(no_media_tweets) > 0 else 0
364
+ else:
365
+ tweets_with_media = media_usage_percentage = 0
366
+ avg_likes_with_media = avg_likes_without_media = 0
367
+
368
+ # Hashtag and mention analysis
369
+ if 'Hashtags' in user_tweets.columns:
370
+ # Count hashtags from the Hashtags field (comma-separated string)
371
+ hashtag_counts = user_tweets['Hashtags'].astype(str).apply(lambda x: len([h.strip() for h in x.split(',') if h.strip()]))
372
+ total_hashtags_used = hashtag_counts.sum()
373
+ avg_hashtags_per_tweet = hashtag_counts.mean()
374
+ tweets_with_hashtags_percentage = ((hashtag_counts > 0).sum() / tweet_count * 100) if tweet_count > 0 else 0
375
+ elif 'Hashtag_Count' in user_tweets.columns:
376
+ # Fallback to Hashtag_Count if available
377
+ total_hashtags_used = user_tweets['Hashtag_Count'].sum()
378
+ avg_hashtags_per_tweet = user_tweets['Hashtag_Count'].mean()
379
+ tweets_with_hashtags_percentage = ((user_tweets['Hashtag_Count'] > 0).sum() / tweet_count * 100) if tweet_count > 0 else 0
380
+ else:
381
+ total_hashtags_used = avg_hashtags_per_tweet = tweets_with_hashtags_percentage = 0
382
+
383
+ if 'Mentions' in user_tweets.columns:
384
+ # Count mentions from the Mentions field (comma-separated string)
385
+ mention_counts = user_tweets['Mentions'].astype(str).apply(lambda x: len([m.strip() for m in x.split(',') if m.strip()]))
386
+ total_mentions_used = mention_counts.sum()
387
+ avg_mentions_per_tweet = mention_counts.mean()
388
+ elif 'Mention_Count' in user_tweets.columns:
389
+ # Fallback to Mention_Count if available
390
+ total_mentions_used = user_tweets['Mention_Count'].sum()
391
+ avg_mentions_per_tweet = user_tweets['Mention_Count'].mean()
392
+ else:
393
+ total_mentions_used = avg_mentions_per_tweet = 0
394
+
395
+ # Timing and activity patterns
396
+ if 'Hour' in user_tweets.columns:
397
+ most_active_hour = user_tweets['Hour'].mode().values[0] if len(user_tweets['Hour'].mode()) > 0 else 0
398
+ hourly_distribution = user_tweets['Hour'].value_counts().head(3).to_dict()
399
+ else:
400
+ most_active_hour = 0
401
+ hourly_distribution = {}
402
+
403
+ if 'Day_of_Week' in user_tweets.columns:
404
+ most_active_day = user_tweets['Day_of_Week'].mode().values[0] if len(user_tweets['Day_of_Week'].mode()) > 0 else "Unknown"
405
+ else:
406
+ most_active_day = "Unknown"
407
+
408
+ # Performance metrics
409
+ if 'Likes' in user_tweets.columns and not user_tweets.empty:
410
+ highest_likes = user_tweets['Likes'].max()
411
+ top_tweet_idx = user_tweets['Likes'].idxmax()
412
+ top_tweet_text = user_tweets.loc[top_tweet_idx, 'Text'][:100] + "..." if 'Text' in user_tweets.columns else ""
413
+ top_tweet_url = user_tweets.loc[top_tweet_idx, 'URL'] if 'URL' in user_tweets.columns else ""
414
+
415
+ # Viral content (top 10% threshold)
416
+ viral_threshold = user_tweets['Likes'].quantile(0.9)
417
+ viral_tweets_count = (user_tweets['Likes'] > viral_threshold).sum()
418
+ viral_content_percentage = (viral_tweets_count / tweet_count * 100) if tweet_count > 0 else 0
419
+ else:
420
+ highest_likes = viral_tweets_count = viral_content_percentage = 0
421
+ top_tweet_text = top_tweet_url = ""
422
+
423
+ # Audience engagement ratios
424
+ like_to_view_ratio = (likes_count / views_count * 100) if views_count > 0 else 0
425
+ retweet_to_like_ratio = (repost_count / likes_count * 100) if likes_count > 0 else 0
426
+ reply_to_like_ratio = (reply_count / likes_count * 100) if likes_count > 0 else 0
427
+
428
+ # Engagement score (weighted: likes=1, retweets=2, replies=3)
429
+ total_engagement = likes_count + repost_count + reply_count
430
+ engagement_score = (likes_count * 1 + repost_count * 2 + reply_count * 3) / tweet_count if tweet_count > 0 else 0
431
+
432
+ return {
433
+ # Basic metrics
434
+ "likes_count": int(likes_count),
435
+ "views_count": int(views_count),
436
+ "reply_count": int(reply_count),
437
+ "repost_count": int(repost_count),
438
+
439
+ # Content quality metrics
440
+ "avg_likes_per_tweet": round(avg_likes_per_tweet, 1),
441
+ "avg_views_per_tweet": round(avg_views_per_tweet, 1),
442
+ "avg_engagement_rate": round(avg_engagement_rate, 2),
443
+ "avg_tweet_length": round(avg_tweet_length, 1),
444
+ "longest_tweet_length": int(longest_tweet_length),
445
+ "shortest_tweet_length": int(shortest_tweet_length),
446
+
447
+ # Media usage metrics
448
+ "tweets_with_media_count": int(tweets_with_media),
449
+ "media_usage_percentage": round(media_usage_percentage, 1),
450
+ "avg_likes_with_media": round(avg_likes_with_media, 1),
451
+ "avg_likes_without_media": round(avg_likes_without_media, 1),
452
+
453
+ # Hashtag and mention metrics
454
+ "total_hashtags_used": int(total_hashtags_used),
455
+ "avg_hashtags_per_tweet": round(avg_hashtags_per_tweet, 1),
456
+ "tweets_with_hashtags_percentage": round(tweets_with_hashtags_percentage, 1),
457
+ "total_mentions_used": int(total_mentions_used),
458
+ "avg_mentions_per_tweet": round(avg_mentions_per_tweet, 1),
459
+
460
+ # Activity patterns
461
+ "most_active_hour": int(most_active_hour),
462
+ "most_active_day": str(most_active_day),
463
+ "top_activity_hours": list(hourly_distribution.keys())[:3],
464
+
465
+ # Performance metrics
466
+ "highest_likes": int(highest_likes),
467
+ "top_tweet_text": str(top_tweet_text),
468
+ "top_tweet_url": str(top_tweet_url),
469
+ "viral_tweets_count": int(viral_tweets_count),
470
+ "viral_content_percentage": round(viral_content_percentage, 1),
471
+
472
+ # Engagement ratios
473
+ "like_to_view_ratio": round(like_to_view_ratio, 2),
474
+ "retweet_to_like_ratio": round(retweet_to_like_ratio, 2),
475
+ "reply_to_like_ratio": round(reply_to_like_ratio, 2),
476
+ "engagement_score": round(engagement_score, 1),
477
+ "total_engagement": int(total_engagement),
478
+ }
479
+
480
+ def _get_empty_metrics(self) -> Dict:
481
+ """Return empty metrics structure."""
482
+ return {
483
+ # Basic metrics
484
+ "likes_count": 0, "views_count": 0, "reply_count": 0, "repost_count": 0,
485
+ # Content quality metrics
486
+ "avg_likes_per_tweet": 0, "avg_views_per_tweet": 0, "avg_engagement_rate": 0,
487
+ "avg_tweet_length": 0, "longest_tweet_length": 0, "shortest_tweet_length": 0,
488
+ # Media usage metrics
489
+ "tweets_with_media_count": 0, "media_usage_percentage": 0,
490
+ "avg_likes_with_media": 0, "avg_likes_without_media": 0,
491
+ # Hashtag and mention metrics
492
+ "total_hashtags_used": 0, "avg_hashtags_per_tweet": 0, "tweets_with_hashtags_percentage": 0,
493
+ "total_mentions_used": 0, "avg_mentions_per_tweet": 0,
494
+ # Activity patterns
495
+ "most_active_hour": 0, "most_active_day": "Unknown", "top_activity_hours": [],
496
+ # Performance metrics
497
+ "highest_likes": 0, "top_tweet_text": "", "top_tweet_url": "",
498
+ "viral_tweets_count": 0, "viral_content_percentage": 0,
499
+ # Engagement ratios
500
+ "like_to_view_ratio": 0, "retweet_to_like_ratio": 0, "reply_to_like_ratio": 0,
501
+ "engagement_score": 0, "total_engagement": 0,
502
+ }
503
+
504
  def _is_mock_tweet(self, item: Dict) -> bool:
505
  """Detect if a tweet is mock/invalid data that should be ignored."""
506
  # Check for missing essential fields that real tweets should have
 
530
  return False
531
 
532
  def _process_single_tweet(self, item: Dict, hashtags_counter: Counter,
533
+ mentions_counter: Counter, all_author_data: List, target_username: str = None) -> Optional[Dict]:
534
  """Process a single tweet item."""
535
  # Extract author data
536
  author = item.get("author", {})
537
  if author:
538
+ # Only collect author data from the target user if target_username is specified
539
+ # This prevents random accounts from being saved in replies data
540
+ if target_username:
541
+ author_username = author.get("userName", "").lower()
542
+ if author_username == target_username.lower():
543
+ all_author_data.append(author)
544
+ else:
545
+ all_author_data.append(author)
546
 
547
  # Check if this is a mock/invalid tweet (has minimal or no real data)
548
  is_mock_tweet = self._is_mock_tweet(item)
 
593
  "Mentions": ", ".join(mentions),
594
  }
595
 
596
+ def _extract_best_account_details(self, all_author_data: List[Dict], target_username: str = None) -> Dict:
597
  """Extract the most complete account details from author data."""
598
  if not all_author_data:
599
+ # If no author data and we have a target username, create a basic structure
600
+ if target_username:
601
+ return {
602
+ "name": target_username,
603
+ "username": target_username,
604
+ "bio": "",
605
+ "followers_count": 0,
606
+ "following_count": 0,
607
+ "tweet_count": 0,
608
+ "verified": False,
609
+ "profile_image_url": ""
610
+ }
611
  return {}
612
 
613
  # Find the author data with the most complete information
 
642
  score = 0
643
 
644
  # Check for follower metrics (high priority)
645
+ followers = (author.get("followers") or author.get("followersCount") or
646
+ author.get("followers_count") or
647
+ author.get("publicMetrics", {}).get("followers_count") or
648
+ safe_get_nested(author, ["publicMetrics", "followers_count"]) or
649
+ safe_get_nested(author, ["public_metrics", "followers_count"]) or 0)
650
+ if followers > 0:
651
  score += 3
652
+
653
+ following = (author.get("following") or author.get("followingCount") or
654
+ author.get("following_count") or author.get("friends_count") or
655
+ author.get("publicMetrics", {}).get("following_count") or
656
+ safe_get_nested(author, ["publicMetrics", "following_count"]) or
657
+ safe_get_nested(author, ["public_metrics", "following_count"]) or 0)
658
+ if following > 0:
659
  score += 2
660
+
661
+ tweet_count = (author.get("statusesCount") or author.get("statuses_count") or
662
+ author.get("tweet_count") or
663
+ author.get("publicMetrics", {}).get("tweet_count") or
664
+ safe_get_nested(author, ["publicMetrics", "tweet_count"]) or
665
+ safe_get_nested(author, ["public_metrics", "tweet_count"]) or 0)
666
+ if tweet_count > 0:
667
  score += 2
668
 
669
  # Check for profile information (lower priority)
 
674
 
675
  return score
676
 
677
+ def _convert_to_ist_format(self, twitter_date_str: str) -> str:
678
+ """Convert Twitter date string to IST format."""
679
+ if not twitter_date_str or twitter_date_str == "":
680
+ return ""
681
+
682
+ try:
683
+ # Parse the Twitter date format: "Mon Jul 08 09:31:59 +0000 2013"
684
+ utc_dt = datetime.strptime(twitter_date_str, TWITTER_DATE_FORMAT)
685
+
686
+ # Convert to IST
687
+ ist_tz = pytz.timezone(IST_TIMEZONE)
688
+ ist_dt = utc_dt.astimezone(ist_tz)
689
+
690
+ # Format as a more readable IST date
691
+ # Format: "8 July 2013, 3:01 PM IST"
692
+ formatted_date = ist_dt.strftime("%d %B %Y, %I:%M %p IST")
693
+
694
+ return formatted_date
695
+ except ValueError:
696
+ # If parsing fails, return the original string
697
+ return twitter_date_str
698
+
699
  def _standardize_account_details(self, author_data: Dict) -> Dict:
700
  """Standardize account details from various possible field names."""
701
+ # Debug: Print raw author data keys (only in debug mode)
702
+ if st.session_state.get('debug_mode', False):
703
+ st.write(f"Debug - Author data keys: {list(author_data.keys())}")
704
+
705
+ # Try multiple possible field names for metrics with additional variations
706
  followers_count = (
707
  author_data.get("followers") or
708
  author_data.get("followersCount") or
709
  author_data.get("followers_count") or
710
+ author_data.get("publicMetrics", {}).get("followers_count") or
711
  safe_get_nested(author_data, ["publicMetrics", "followers_count"]) or
712
+ safe_get_nested(author_data, ["public_metrics", "followers_count"]) or
713
  0
714
  )
715
 
 
718
  author_data.get("followingCount") or
719
  author_data.get("following_count") or
720
  author_data.get("friends_count") or
721
+ author_data.get("publicMetrics", {}).get("following_count") or
722
  safe_get_nested(author_data, ["publicMetrics", "following_count"]) or
723
+ safe_get_nested(author_data, ["public_metrics", "following_count"]) or
724
  0
725
  )
726
 
 
728
  author_data.get("statusesCount") or
729
  author_data.get("statuses_count") or
730
  author_data.get("tweet_count") or
731
+ author_data.get("publicMetrics", {}).get("tweet_count") or
732
  safe_get_nested(author_data, ["publicMetrics", "tweet_count"]) or
733
+ safe_get_nested(author_data, ["public_metrics", "tweet_count"]) or
734
  0
735
  )
736
 
737
+ # Extract account creation date
738
+ raw_create_date = (
739
+ author_data.get("createdAt") or
740
+ author_data.get("created_at") or
741
+ author_data.get("account_create_date") or
742
+ ""
743
+ )
744
+
745
+ # Convert to IST format if we have a valid date
746
+ account_create_date = self._convert_to_ist_format(raw_create_date)
747
+
748
  return {
749
  "name": author_data.get("name", ""),
750
  "username": author_data.get("userName", "") or author_data.get("username", ""),
 
754
  "tweet_count": tweet_count,
755
  "verified": author_data.get("verified", False) or author_data.get("isVerified", False),
756
  "profile_image_url": author_data.get("profileImageUrl", "") or author_data.get("profile_image_url", ""),
757
+ "account_create_date": account_create_date,
758
+ # Engagement metrics will be calculated from tweet data and added later
759
+ "likes_count": 0,
760
+ "views_count": 0,
761
+ "reply_count": 0,
762
+ "repost_count": 0,
763
  }
764
 
765
  # =============================================================================
 
796
  @staticmethod
797
  def _display_account_metrics(account_details: Dict) -> None:
798
  """Display account metrics (followers, following, posts)."""
799
+ # Account creation date
800
+ create_date = account_details.get('account_create_date', '')
801
+ if create_date:
802
+ st.caption(f"📅 Account created: {create_date}")
803
+
804
+ # Basic metrics
805
  m1, m2, m3 = st.columns(3)
806
 
807
  followers = account_details.get('followers_count', 0)
 
824
  help="Total tweet count from Twitter API"
825
  )
826
 
827
+ # Engagement metrics
828
+ likes = account_details.get('likes_count', 0)
829
+ views = account_details.get('views_count', 0)
830
+ replies = account_details.get('reply_count', 0)
831
+ reposts = account_details.get('repost_count', 0)
832
+
833
+ if likes > 0 or views > 0 or replies > 0 or reposts > 0:
834
+ st.caption("**📊 Total Engagement:**")
835
+ e1, e2, e3, e4 = st.columns(4)
836
+
837
+ e1.metric(
838
+ "Likes",
839
+ format_large_number(likes),
840
+ help="Total likes count"
841
+ )
842
+ e2.metric(
843
+ "Views",
844
+ format_large_number(views),
845
+ help="Total views/impressions count"
846
+ )
847
+ e3.metric(
848
+ "Replies",
849
+ format_large_number(replies),
850
+ help="Total replies count"
851
+ )
852
+ e4.metric(
853
+ "Reposts",
854
+ format_large_number(reposts),
855
+ help="Total reposts/retweets count"
856
+ )
857
+
858
+ # Advanced metrics sections
859
+ UIComponents._display_content_quality_metrics(account_details)
860
+ UIComponents._display_media_usage_metrics(account_details)
861
+ UIComponents._display_activity_patterns(account_details)
862
+ UIComponents._display_performance_metrics(account_details)
863
+ UIComponents._display_engagement_ratios(account_details)
864
+
865
  # Warning for missing data
866
  if followers == 0 and following == 0 and posts == 0:
867
  st.warning("⚠️ Account metrics unavailable - this may be due to API limitations or account privacy settings")
868
 
869
+ @staticmethod
870
+ def _display_content_quality_metrics(account_details: Dict) -> None:
871
+ """Display content quality metrics."""
872
+ avg_likes = account_details.get('avg_likes_per_tweet', 0)
873
+ avg_views = account_details.get('avg_views_per_tweet', 0)
874
+ engagement_rate = account_details.get('avg_engagement_rate', 0)
875
+ avg_length = account_details.get('avg_tweet_length', 0)
876
+
877
+ if avg_likes > 0 or avg_views > 0 or engagement_rate > 0:
878
+ st.caption("**📈 Content Quality:**")
879
+ q1, q2, q3, q4 = st.columns(4)
880
+
881
+ q1.metric(
882
+ "Avg Likes/Tweet",
883
+ f"{avg_likes:.1f}",
884
+ help="Average likes per tweet"
885
+ )
886
+ q2.metric(
887
+ "Avg Views/Tweet",
888
+ format_large_number(int(avg_views)),
889
+ help="Average views per tweet"
890
+ )
891
+ q3.metric(
892
+ "Engagement Rate",
893
+ f"{engagement_rate:.1f}%",
894
+ help="(Likes + Retweets) / Views * 100"
895
+ )
896
+ q4.metric(
897
+ "Avg Tweet Length",
898
+ f"{avg_length:.0f} chars",
899
+ help="Average character length per tweet"
900
+ )
901
+
902
+ @staticmethod
903
+ def _display_media_usage_metrics(account_details: Dict) -> None:
904
+ """Display media usage metrics."""
905
+ media_count = account_details.get('tweets_with_media_count', 0)
906
+ media_percentage = account_details.get('media_usage_percentage', 0)
907
+ likes_with_media = account_details.get('avg_likes_with_media', 0)
908
+ likes_without_media = account_details.get('avg_likes_without_media', 0)
909
+
910
+ if media_count > 0 or media_percentage > 0:
911
+ st.caption("**🎬 Media Usage:**")
912
+ m1, m2, m3, m4 = st.columns(4)
913
+
914
+ m1.metric(
915
+ "Tweets with Media",
916
+ f"{media_count}",
917
+ help="Number of tweets with media attachments"
918
+ )
919
+ m2.metric(
920
+ "Media Usage",
921
+ f"{media_percentage:.1f}%",
922
+ help="Percentage of tweets with media"
923
+ )
924
+ m3.metric(
925
+ "Avg Likes (Media)",
926
+ f"{likes_with_media:.1f}",
927
+ help="Average likes for tweets with media"
928
+ )
929
+ m4.metric(
930
+ "Avg Likes (No Media)",
931
+ f"{likes_without_media:.1f}",
932
+ help="Average likes for tweets without media"
933
+ )
934
+
935
+ @staticmethod
936
+ def _display_activity_patterns(account_details: Dict) -> None:
937
+ """Display activity pattern metrics."""
938
+ most_active_hour = account_details.get('most_active_hour', 0)
939
+ most_active_day = account_details.get('most_active_day', 'Unknown')
940
+ top_hours = account_details.get('top_activity_hours', [])
941
+
942
+ if most_active_hour > 0 or most_active_day != 'Unknown':
943
+ st.caption("**⏰ Activity Patterns:**")
944
+ a1, a2, a3, a4 = st.columns(4)
945
+
946
+ a1.metric(
947
+ "Most Active Hour",
948
+ f"{most_active_hour}:00",
949
+ help="Hour of day with most tweets"
950
+ )
951
+ a2.metric(
952
+ "Most Active Day",
953
+ most_active_day,
954
+ help="Day of week with most tweets"
955
+ )
956
+ a3.metric(
957
+ "Top Hours",
958
+ ", ".join([f"{h}:00" for h in top_hours[:2]]),
959
+ help="Top active hours"
960
+ )
961
+
962
+ # Hashtag and mention usage
963
+ hashtags = account_details.get('total_hashtags_used', 0)
964
+ mentions = account_details.get('total_mentions_used', 0)
965
+ a4.metric(
966
+ "Hashtags Used",
967
+ f"{hashtags}",
968
+ help="Total hashtags used in tweets"
969
+ )
970
+
971
+ @staticmethod
972
+ def _display_performance_metrics(account_details: Dict) -> None:
973
+ """Display performance metrics."""
974
+ highest_likes = account_details.get('highest_likes', 0)
975
+ viral_count = account_details.get('viral_tweets_count', 0)
976
+ viral_percentage = account_details.get('viral_content_percentage', 0)
977
+ top_tweet_text = account_details.get('top_tweet_text', '')
978
+ top_tweet_url = account_details.get('top_tweet_url', '')
979
+
980
+ if highest_likes > 0 or viral_count > 0:
981
+ st.caption("**🚀 Performance:**")
982
+ p1, p2, p3, p4 = st.columns(4)
983
+
984
+ p1.metric(
985
+ "Highest Likes",
986
+ format_large_number(highest_likes),
987
+ help="Most likes on a single tweet"
988
+ )
989
+ p2.metric(
990
+ "Viral Tweets",
991
+ f"{viral_count}",
992
+ help="Tweets in top 10% by likes"
993
+ )
994
+ p3.metric(
995
+ "Viral Content %",
996
+ f"{viral_percentage:.1f}%",
997
+ help="Percentage of viral tweets"
998
+ )
999
+ p4.metric(
1000
+ "Engagement Score",
1001
+ f"{account_details.get('engagement_score', 0):.1f}",
1002
+ help="Weighted engagement score (likes×1 + retweets×2 + replies×3)"
1003
+ )
1004
+
1005
+ # Show top tweet if available
1006
+ if top_tweet_text and top_tweet_url:
1007
+ st.caption("**🏆 Top Performing Tweet:**")
1008
+ with st.expander("View top tweet"):
1009
+ st.write(f"**Likes:** {format_large_number(highest_likes)}")
1010
+ st.write(f"**Text:** {top_tweet_text}")
1011
+ st.write(f"**URL:** {top_tweet_url}")
1012
+
1013
+ @staticmethod
1014
+ def _display_engagement_ratios(account_details: Dict) -> None:
1015
+ """Display engagement ratio metrics."""
1016
+ like_to_view = account_details.get('like_to_view_ratio', 0)
1017
+ retweet_to_like = account_details.get('retweet_to_like_ratio', 0)
1018
+ reply_to_like = account_details.get('reply_to_like_ratio', 0)
1019
+ total_engagement = account_details.get('total_engagement', 0)
1020
+
1021
+ if like_to_view > 0 or retweet_to_like > 0 or reply_to_like > 0:
1022
+ st.caption("**📊 Engagement Ratios:**")
1023
+ r1, r2, r3, r4 = st.columns(4)
1024
+
1025
+ r1.metric(
1026
+ "Like Rate",
1027
+ f"{like_to_view:.2f}%",
1028
+ help="Likes per view percentage"
1029
+ )
1030
+ r2.metric(
1031
+ "Retweet Rate",
1032
+ f"{retweet_to_like:.2f}%",
1033
+ help="Retweets per like percentage"
1034
+ )
1035
+ r3.metric(
1036
+ "Reply Rate",
1037
+ f"{reply_to_like:.2f}%",
1038
+ help="Replies per like percentage"
1039
+ )
1040
+ r4.metric(
1041
+ "Total Engagement",
1042
+ format_large_number(total_engagement),
1043
+ help="Total likes + retweets + replies"
1044
+ )
1045
+
1046
  @staticmethod
1047
  def display_key_metrics(df: pd.DataFrame) -> None:
1048
  """Display key engagement metrics."""
 
1403
  return
1404
 
1405
  # Process data
1406
+ df, metrics = self.processor.process_tweets(raw_data, self.username)
1407
 
1408
  # Generate AI summary if available
1409
  gemini_summary = None