Arjon07CSE commited on
Commit
826b942
·
verified ·
1 Parent(s): 6372aa2

updated for non political

Browse files
Files changed (1) hide show
  1. app.py +465 -31
app.py CHANGED
@@ -34,12 +34,12 @@ logger.info("Application starting up.")
34
 
35
  # --- APPLICATION CONFIGURATION ---
36
  APP_TITLE = "Social Perception Analyzer"
37
- APP_TAGLINE = "Prepared for the Policymakers of Bangladesh Nationalist Party (BNP)"
38
- APP_FOOTER = "Developed by CDSR"
39
 
40
  # --- FONT CONFIGURATION ---
41
  FONT_PATH = 'NotoSansBengali-Regular.ttf'
42
- BANGLA_FONT = None
43
 
44
  def setup_bangla_font():
45
  """Properly set up Bengali font for all visualizations"""
@@ -99,6 +99,7 @@ PHRASES_TO_JOIN = {
99
  "তারেক রহমান": "তারেক_রহমান",
100
  "খালেদা জিয়া": "খালেদা_জিয়া",
101
  "বিএনপি জিন্দাবাদ": "বিএনপি_জিন্দাবাদ"
 
102
  }
103
 
104
  def get_dynamic_time_agg(start_date, end_date):
@@ -556,17 +557,22 @@ def generate_scraper_dashboard(df: pd.DataFrame):
556
  if not media_counts.empty:
557
  fig_media, ax = plt.subplots(figsize=(8, 6))
558
  media_counts.plot(kind='barh', ax=ax, color='skyblue')
559
- ax.set_title("Top 15 Media Sources", fontproperties=BANGLA_FONT)
560
- ax.set_xlabel("Article Count", fontproperties=BANGLA_FONT)
561
- ax.set_ylabel("মিডিয়া", fontproperties=BANGLA_FONT)
562
  yticks = np.arange(len(media_counts.index))
563
  ax.set_yticks(yticks)
564
- ax.set_yticklabels(media_counts.index, fontproperties=BANGLA_FONT, fontsize=12)
565
- # Ensure all tick labels use Bengali font
566
  for label in ax.get_xticklabels():
567
  label.set_fontproperties(BANGLA_FONT)
 
568
  for label in ax.get_yticklabels():
569
  label.set_fontproperties(BANGLA_FONT)
 
 
 
 
 
570
  plt.tight_layout()
571
 
572
  # Word cloud generation
@@ -652,12 +658,22 @@ def generate_youtube_dashboard(videos_df, comments_df):
652
  if not channel_views.empty:
653
  fig_channel_dominance, ax = plt.subplots(figsize=(10, 6))
654
  channel_views.plot(kind='barh', ax=ax, color='slateblue')
655
- ax.set_title("Top 10 Dominant Channels by View Count", fontproperties=BANGLA_FONT)
656
- ax.set_xlabel("মোট ভিউ", fontproperties=BANGLA_FONT)
657
- ax.set_ylabel("চ্যানেল", fontproperties=BANGLA_FONT)
658
  yticks = np.arange(len(channel_views.index))
659
  ax.set_yticks(yticks)
660
- ax.set_yticklabels(channel_views.index, fontproperties=BANGLA_FONT, fontsize=12)
 
 
 
 
 
 
 
 
 
 
661
  plt.tight_layout()
662
  dashboard_components["yt_channel_dominance_plot"] = fig_channel_dominance
663
 
@@ -676,9 +692,19 @@ def generate_youtube_dashboard(videos_df, comments_df):
676
  )
677
  ax.axvline(median_views, color='blue', linestyle='--', label='Median Views')
678
  ax.axhline(median_engagement, color='green', linestyle='--', label='Median Engagement')
679
- ax.set_xlabel("মোট ভিউ", fontproperties=BANGLA_FONT)
680
- ax.set_ylabel("এনগেজমেন্ট রেট", fontproperties=BANGLA_FONT)
681
- ax.set_title("Content Performance Quadrant", fontproperties=BANGLA_FONT)
 
 
 
 
 
 
 
 
 
 
682
  plt.tight_layout()
683
  except Exception as e:
684
  logger.error(f"Quadrant plot failed: {e}")
@@ -720,9 +746,19 @@ def generate_youtube_dashboard(videos_df, comments_df):
720
  if not channel_counts.empty:
721
  fig_channels, ax = plt.subplots(figsize=(8, 6))
722
  channel_counts.plot(kind='barh', ax=ax, color='coral')
723
- ax.set_title("Top 15 Channels by Video Volume", fontproperties=BANGLA_FONT)
724
- ax.set_yticklabels(channel_counts.index, fontproperties=BANGLA_FONT)
725
- ax.set_xlabel("Video Count", fontproperties=BANGLA_FONT)
 
 
 
 
 
 
 
 
 
 
726
  plt.tight_layout()
727
  dashboard_components["yt_channel_plot"] = fig_channels
728
 
@@ -783,12 +819,22 @@ def generate_youtube_dashboard(videos_df, comments_df):
783
  if not top_videos.empty:
784
  fig_top_videos, ax = plt.subplots(figsize=(10, 6))
785
  top_videos.plot(kind='barh', ax=ax, color='dodgerblue')
786
- ax.set_title("Top 10 Videos by Comment Count", fontproperties=BANGLA_FONT)
787
- ax.set_xlabel("মন্তব্য সংখ্যা", fontproperties=BANGLA_FONT)
788
- ax.set_ylabel("ভিডিও শিরোনাম", fontproperties=BANGLA_FONT)
789
  yticks = np.arange(len(top_videos.index))
790
  ax.set_yticks(yticks)
791
- ax.set_yticklabels(top_videos.index, fontproperties=BANGLA_FONT, fontsize=12)
 
 
 
 
 
 
 
 
 
 
792
  plt.tight_layout()
793
  dashboard_components["yt_top_videos_plot"] = fig_top_videos
794
 
@@ -812,12 +858,22 @@ def generate_youtube_dashboard(videos_df, comments_df):
812
  if not top_engagement.empty:
813
  fig_engagement, ax = plt.subplots(figsize=(10, 6))
814
  ax.barh(top_engagement['video_title'], top_engagement['engagement_rate'], color='mediumseagreen')
815
- ax.set_title("Top 10 Videos by Engagement Rate", fontproperties=BANGLA_FONT)
816
- ax.set_xlabel("এনগেজমেন্ট রেট (মন্তব্য/ভিউ)", fontproperties=BANGLA_FONT)
817
- ax.set_ylabel("ভিডিও শিরোনাম", fontproperties=BANGLA_FONT)
818
  yticks = np.arange(len(top_engagement['video_title']))
819
  ax.set_yticks(yticks)
820
- ax.set_yticklabels(top_engagement['video_title'], fontproperties=BANGLA_FONT, fontsize=12)
 
 
 
 
 
 
 
 
 
 
821
  plt.tight_layout()
822
  except Exception as e:
823
  logger.error(f"Engagement rate calculation failed: {e}")
@@ -864,7 +920,7 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="orange"),
864
  gr.Markdown("### Search Criteria")
865
  search_keywords_textbox = gr.Textbox(
866
  label="Search Keywords",
867
- placeholder="e.g., বিএনপি সমাবেশ",
868
  info="Keywords to search for in news articles."
869
  )
870
  sites_to_search_textbox = gr.Textbox(
@@ -896,7 +952,7 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="orange"),
896
  )
897
  filter_keywords_textbox = gr.Textbox(
898
  label="Filter Keywords (comma-separated, optional)",
899
- placeholder="e.g., নির্বাচন, সরকার",
900
  info="Filter results by these keywords."
901
  )
902
 
@@ -951,7 +1007,7 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="orange"),
951
  with gr.Column(scale=1):
952
  yt_search_keywords = gr.Textbox(
953
  label="YouTube Search Keywords",
954
- placeholder="e.g., BNP Rally",
955
  info="Keywords to search for in YouTube videos."
956
  )
957
  yt_max_videos_slider = gr.Slider(
@@ -1205,5 +1261,383 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="orange"),
1205
  # ==============================================================================
1206
  # LAUNCH THE APP
1207
  # ==============================================================================
1208
- if __name__ == "__main__":
1209
- app.launch( debug=True,share=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
  # --- APPLICATION CONFIGURATION ---
36
  APP_TITLE = "Social Perception Analyzer"
37
+ APP_TAGLINE = "Analyze GoogleNews & YouTube video trends, engagement, and comment activity for your search topics."
38
+ APP_FOOTER = "Developed by Arjon"
39
 
40
  # --- FONT CONFIGURATION ---
41
  FONT_PATH = 'NotoSansBengali-Regular.ttf'
42
+ BANGLA_FONT = FONT_PATH
43
 
44
  def setup_bangla_font():
45
  """Properly set up Bengali font for all visualizations"""
 
99
  "তারেক রহমান": "তারেক_রহমান",
100
  "খালেদা জিয়া": "খালেদা_জিয়া",
101
  "বিএনপি জিন্দাবাদ": "বিএনপি_জিন্দাবাদ"
102
+ "মুহাম্মদ ইউনূস": "মুহাম্মদ_ইউনূস"
103
  }
104
 
105
  def get_dynamic_time_agg(start_date, end_date):
 
557
  if not media_counts.empty:
558
  fig_media, ax = plt.subplots(figsize=(8, 6))
559
  media_counts.plot(kind='barh', ax=ax, color='skyblue')
560
+ ax.set_title("Top 15 Media Sources", fontproperties=BANGLA_FONT, fontsize=18)
561
+ ax.set_xlabel("Article Count", fontproperties=BANGLA_FONT, fontsize=14)
562
+ ax.set_ylabel("মিডিয়া", fontproperties=BANGLA_FONT, fontsize=14)
563
  yticks = np.arange(len(media_counts.index))
564
  ax.set_yticks(yticks)
565
+ ax.set_yticklabels(media_counts.index, fontproperties=BANGLA_FONT, fontsize=14)
 
566
  for label in ax.get_xticklabels():
567
  label.set_fontproperties(BANGLA_FONT)
568
+ label.set_fontsize(12)
569
  for label in ax.get_yticklabels():
570
  label.set_fontproperties(BANGLA_FONT)
571
+ label.set_fontsize(14)
572
+ legend = ax.get_legend()
573
+ if legend:
574
+ for text in legend.get_texts():
575
+ text.set_fontproperties(BANGLA_FONT)
576
  plt.tight_layout()
577
 
578
  # Word cloud generation
 
658
  if not channel_views.empty:
659
  fig_channel_dominance, ax = plt.subplots(figsize=(10, 6))
660
  channel_views.plot(kind='barh', ax=ax, color='slateblue')
661
+ ax.set_title("Top 10 Dominant Channels by View Count", fontproperties=BANGLA_FONT, fontsize=18)
662
+ ax.set_xlabel("মোট ভিউ", fontproperties=BANGLA_FONT, fontsize=14)
663
+ ax.set_ylabel("চ্যানেল", fontproperties=BANGLA_FONT, fontsize=14)
664
  yticks = np.arange(len(channel_views.index))
665
  ax.set_yticks(yticks)
666
+ ax.set_yticklabels(channel_views.index, fontproperties=BANGLA_FONT, fontsize=14)
667
+ for label in ax.get_xticklabels():
668
+ label.set_fontproperties(BANGLA_FONT)
669
+ label.set_fontsize(12)
670
+ for label in ax.get_yticklabels():
671
+ label.set_fontproperties(BANGLA_FONT)
672
+ label.set_fontsize(14)
673
+ legend = ax.get_legend()
674
+ if legend:
675
+ for text in legend.get_texts():
676
+ text.set_fontproperties(BANGLA_FONT)
677
  plt.tight_layout()
678
  dashboard_components["yt_channel_dominance_plot"] = fig_channel_dominance
679
 
 
692
  )
693
  ax.axvline(median_views, color='blue', linestyle='--', label='Median Views')
694
  ax.axhline(median_engagement, color='green', linestyle='--', label='Median Engagement')
695
+ ax.set_xlabel("মোট ভিউ", fontproperties=BANGLA_FONT, fontsize=14)
696
+ ax.set_ylabel("এনগেজমেন্ট রেট", fontproperties=BANGLA_FONT, fontsize=14)
697
+ ax.set_title("Content Performance Quadrant", fontproperties=BANGLA_FONT, fontsize=18)
698
+ for label in ax.get_xticklabels():
699
+ label.set_fontproperties(BANGLA_FONT)
700
+ label.set_fontsize(12)
701
+ for label in ax.get_yticklabels():
702
+ label.set_fontproperties(BANGLA_FONT)
703
+ label.set_fontsize(14)
704
+ legend = ax.get_legend()
705
+ if legend:
706
+ for text in legend.get_texts():
707
+ text.set_fontproperties(BANGLA_FONT)
708
  plt.tight_layout()
709
  except Exception as e:
710
  logger.error(f"Quadrant plot failed: {e}")
 
746
  if not channel_counts.empty:
747
  fig_channels, ax = plt.subplots(figsize=(8, 6))
748
  channel_counts.plot(kind='barh', ax=ax, color='coral')
749
+ ax.set_title("Top 15 Channels by Video Volume", fontproperties=BANGLA_FONT, fontsize=18)
750
+ ax.set_yticklabels(channel_counts.index, fontproperties=BANGLA_FONT, fontsize=14)
751
+ ax.set_xlabel("Video Count", fontproperties=BANGLA_FONT, fontsize=14)
752
+ for label in ax.get_xticklabels():
753
+ label.set_fontproperties(BANGLA_FONT)
754
+ label.set_fontsize(12)
755
+ for label in ax.get_yticklabels():
756
+ label.set_fontproperties(BANGLA_FONT)
757
+ label.set_fontsize(14)
758
+ legend = ax.get_legend()
759
+ if legend:
760
+ for text in legend.get_texts():
761
+ text.set_fontproperties(BANGLA_FONT)
762
  plt.tight_layout()
763
  dashboard_components["yt_channel_plot"] = fig_channels
764
 
 
819
  if not top_videos.empty:
820
  fig_top_videos, ax = plt.subplots(figsize=(10, 6))
821
  top_videos.plot(kind='barh', ax=ax, color='dodgerblue')
822
+ ax.set_title("Top 10 Videos by Comment Count", fontproperties=BANGLA_FONT, fontsize=18)
823
+ ax.set_xlabel("মন্তব্য সংখ্যা", fontproperties=BANGLA_FONT, fontsize=14)
824
+ ax.set_ylabel("ভিডিও শিরোনাম", fontproperties=BANGLA_FONT, fontsize=14)
825
  yticks = np.arange(len(top_videos.index))
826
  ax.set_yticks(yticks)
827
+ ax.set_yticklabels(top_videos.index, fontproperties=BANGLA_FONT, fontsize=14)
828
+ for label in ax.get_xticklabels():
829
+ label.set_fontproperties(BANGLA_FONT)
830
+ label.set_fontsize(12)
831
+ for label in ax.get_yticklabels():
832
+ label.set_fontproperties(BANGLA_FONT)
833
+ label.set_fontsize(14)
834
+ legend = ax.get_legend()
835
+ if legend:
836
+ for text in legend.get_texts():
837
+ text.set_fontproperties(BANGLA_FONT)
838
  plt.tight_layout()
839
  dashboard_components["yt_top_videos_plot"] = fig_top_videos
840
 
 
858
  if not top_engagement.empty:
859
  fig_engagement, ax = plt.subplots(figsize=(10, 6))
860
  ax.barh(top_engagement['video_title'], top_engagement['engagement_rate'], color='mediumseagreen')
861
+ ax.set_title("Top 10 Videos by Engagement Rate", fontproperties=BANGLA_FONT, fontsize=18)
862
+ ax.set_xlabel("এনগেজমেন্ট রেট (মন্তব্য/ভিউ)", fontproperties=BANGLA_FONT, fontsize=14)
863
+ ax.set_ylabel("ভিডিও শিরোনাম", fontproperties=BANGLA_FONT, fontsize=14)
864
  yticks = np.arange(len(top_engagement['video_title']))
865
  ax.set_yticks(yticks)
866
+ ax.set_yticklabels(top_engagement['video_title'], fontproperties=BANGLA_FONT, fontsize=14)
867
+ for label in ax.get_xticklabels():
868
+ label.set_fontproperties(BANGLA_FONT)
869
+ label.set_fontsize(12)
870
+ for label in ax.get_yticklabels():
871
+ label.set_fontproperties(BANGLA_FONT)
872
+ label.set_fontsize(14)
873
+ legend = ax.get_legend()
874
+ if legend:
875
+ for text in legend.get_texts():
876
+ text.set_fontproperties(BANGLA_FONT)
877
  plt.tight_layout()
878
  except Exception as e:
879
  logger.error(f"Engagement rate calculation failed: {e}")
 
920
  gr.Markdown("### Search Criteria")
921
  search_keywords_textbox = gr.Textbox(
922
  label="Search Keywords",
923
+ placeholder="e.g., বাংলাদেশ, নির্বাচন",
924
  info="Keywords to search for in news articles."
925
  )
926
  sites_to_search_textbox = gr.Textbox(
 
952
  )
953
  filter_keywords_textbox = gr.Textbox(
954
  label="Filter Keywords (comma-separated, optional)",
955
+ placeholder="e.g., ডাকসু, নোবেল",
956
  info="Filter results by these keywords."
957
  )
958
 
 
1007
  with gr.Column(scale=1):
1008
  yt_search_keywords = gr.Textbox(
1009
  label="YouTube Search Keywords",
1010
+ placeholder="e.g., ক্রিকেট",
1011
  info="Keywords to search for in YouTube videos."
1012
  )
1013
  yt_max_videos_slider = gr.Slider(
 
1261
  # ==============================================================================
1262
  # LAUNCH THE APP
1263
  # ==============================================================================
1264
+ custom_css = """
1265
+ body, .gradio-container {
1266
+ background: #181a20 !important;
1267
+ font-family: 'Inter', 'Noto Sans', sans-serif;
1268
+ }
1269
+ .gr-card {
1270
+ background: #23263a;
1271
+ border-radius: 18px;
1272
+ box-shadow: 0 4px 24px rgba(0,0,0,0.12);
1273
+ padding: 24px;
1274
+ margin-bottom: 24px;
1275
+ }
1276
+ .gr-title {
1277
+ color: #fff;
1278
+ font-size: 2.2rem;
1279
+ font-weight: 700;
1280
+ margin-bottom: 12px;
1281
+ }
1282
+ .gr-metric {
1283
+ color: #22d3ee;
1284
+ font-size: 2.5rem;
1285
+ font-weight: 800;
1286
+ }
1287
+ .gr-label {
1288
+ color: #94a3b8;
1289
+ font-size: 1.1rem;
1290
+ margin-bottom: 6px;
1291
+ }
1292
+ .gradio-row, .gradio-column {
1293
+ background: transparent !important;
1294
+ }
1295
+ .gradio-button {
1296
+ border-radius: 8px !important;
1297
+ background: linear-gradient(90deg,#3b82f6,#22d3ee) !important;
1298
+ color: #fff !important;
1299
+ font-weight: 600 !important;
1300
+ box-shadow: 0 2px 8px rgba(34,211,238,0.08);
1301
+ transition: background 0.2s;
1302
+ }
1303
+ .gradio-button:hover {
1304
+ background: linear-gradient(90deg,#22d3ee,#3b82f6) !important;
1305
+ }
1306
+ .gradio-markdown h1, .gradio-markdown h2, .gradio-markdown h3 {
1307
+ color: #fff !important;
1308
+ }
1309
+ .gradio-markdown {
1310
+ color: #cbd5e1 !important;
1311
+ }
1312
+ """
1313
+
1314
+ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="orange"), title=APP_TITLE, css=custom_css) as app:
1315
+ gr.HTML("""
1316
+ <div class='gr-card' style='margin-bottom:32px;'>
1317
+ <div class='gr-title'>Social Perception Analyzer</div>
1318
+ <div style='color:#94a3b8;font-size:1.2rem;margin-bottom:8px;'>Prepared for the Policymakers of Bangladesh Nationalist Party (BNP)</div>
1319
+ <div style='color:#22d3ee;font-size:1rem;'>Developed by CDSR</div>
1320
+ </div>
1321
+ """)
1322
+ # --- STATE MANAGEMENT ---
1323
+ scraper_results_state = gr.State()
1324
+ youtube_results_state = gr.State()
1325
+
1326
+ with gr.Tabs():
1327
+ with gr.TabItem("1. News Scraper", id=0):
1328
+ gr.HTML("<div class='gr-card' style='margin-bottom:24px;'><h2>News Scraper</h2><p>Search and filter news articles from top Bangladeshi sources. Use advanced filters and download results.</p></div>")
1329
+ with gr.Row():
1330
+ with gr.Column(scale=1):
1331
+ gr.HTML("<div class='gr-card'><h3>Search Criteria</h3></div>")
1332
+ search_keywords_textbox = gr.Textbox(
1333
+ label="Search Keywords",
1334
+ placeholder="e.g., বিএনপি সমাবেশ",
1335
+ info="Keywords to search for in news articles."
1336
+ )
1337
+ sites_to_search_textbox = gr.Textbox(
1338
+ label="Target Sites (Optional, comma-separated)",
1339
+ placeholder="e.g., prothomalo.com",
1340
+ info="Limit search to specific news sites."
1341
+ )
1342
+ start_date_textbox = gr.Textbox(
1343
+ label="Start Date",
1344
+ placeholder="YYYY-MM-DD or 'last week'",
1345
+ info="Start date for news scraping."
1346
+ )
1347
+ end_date_textbox = gr.Textbox(
1348
+ label="End Date",
1349
+ placeholder="YYYY-MM-DD or 'today'",
1350
+ info="End date for news scraping."
1351
+ )
1352
+ gr.HTML("<div class='gr-card'><h3>Scraping Parameters</h3></div>")
1353
+ interval_days_slider = gr.Slider(
1354
+ 1, 7, 3, step=1,
1355
+ label="Days per Interval",
1356
+ info="How many days to group each scraping interval."
1357
+ )
1358
+ max_pages_slider = gr.Slider(
1359
+ 1, 10, 5, step=1,
1360
+ label="Max Pages per Interval",
1361
+ info="Maximum number of pages to fetch per interval."
1362
+ )
1363
+ filter_keywords_textbox = gr.Textbox(
1364
+ label="Filter Keywords (comma-separated, optional)",
1365
+ placeholder="e.g., নির্বাচন, সরকার",
1366
+ info="Filter results by these keywords."
1367
+ )
1368
+ start_scraper_button = gr.Button("Start Scraping & Analysis", variant="primary")
1369
+ scraper_progress = gr.Progress()
1370
+ with gr.Column(scale=2):
1371
+ gr.HTML("<div class='gr-card'><h3>Filtered Results</h3></div>")
1372
+ scraper_results_df = gr.DataFrame(
1373
+ label="Filtered Results",
1374
+ interactive=True
1375
+ )
1376
+ scraper_download_file = gr.File(
1377
+ label="Download Filtered Results CSV"
1378
+ )
1379
+ with gr.TabItem("2. News Analytics", id=1):
1380
+ gr.HTML("<div class='gr-card' style='margin-bottom:24px;'><h2>News Analytics Dashboard</h2><p>Visualize key metrics, trends, and top sources from scraped news data. All plots and metrics update dynamically.</p></div>")
1381
+ with gr.Row():
1382
+ with gr.Column(scale=1):
1383
+ gr.HTML("<div class='gr-card'><h3>Key Metrics</h3></div>")
1384
+ kpi_total_articles = gr.HTML()
1385
+ kpi_unique_media = gr.HTML()
1386
+ kpi_date_range = gr.HTML()
1387
+ with gr.Column(scale=2):
1388
+ gr.HTML("<div class='gr-card'><h3>Trends</h3></div>")
1389
+ dashboard_timeline_plot = gr.LinePlot(
1390
+ label="News Volume Timeline"
1391
+ )
1392
+ with gr.Row():
1393
+ with gr.Column(scale=1):
1394
+ gr.HTML("<div class='gr-card'><h3>Top Sources</h3></div>")
1395
+ dashboard_media_plot = gr.Plot(
1396
+ label="Top Media Sources by Article Count"
1397
+ )
1398
+ with gr.Column(scale=1):
1399
+ gr.HTML("<div class='gr-card'><h3>Headline Word Cloud</h3></div>")
1400
+ dashboard_wordcloud_plot = gr.Plot(
1401
+ label="Headline Word Cloud"
1402
+ )
1403
+ with gr.TabItem("3. YouTube Topic Analysis", id=2):
1404
+ gr.HTML("<div class='gr-card' style='margin-bottom:24px;'><h2>YouTube Topic Analysis</h2><p>Analyze YouTube video trends, engagement, and comment activity for your search topics.</p></div>")
1405
+ with gr.Row():
1406
+ with gr.Column(scale=1):
1407
+ gr.HTML("<div class='gr-card'><h3>Search Criteria</h3></div>")
1408
+ yt_search_keywords = gr.Textbox(
1409
+ label="YouTube Search Keywords",
1410
+ placeholder="e.g., BNP Rally",
1411
+ info="Keywords to search for in YouTube videos."
1412
+ )
1413
+ yt_max_videos_slider = gr.Slider(
1414
+ 10, 100, 30, step=5,
1415
+ label="Max Videos for Stats",
1416
+ info="Maximum number of videos to scan for statistics."
1417
+ )
1418
+ yt_num_videos_comments_slider = gr.Slider(
1419
+ 1, 20, 5, step=1,
1420
+ label="Videos for Comments",
1421
+ info="Number of top videos to scrape comments from."
1422
+ )
1423
+ yt_max_comments_slider = gr.Slider(
1424
+ 10, 200, 50, step=10,
1425
+ label="Max Comments per Video",
1426
+ info="Maximum number of comments to fetch per video."
1427
+ )
1428
+ yt_published_after = gr.Textbox(
1429
+ label="Published After (Optional)",
1430
+ placeholder="YYYY-MM-DD",
1431
+ info="Only include videos published after this date."
1432
+ )
1433
+ start_youtube_analysis_button = gr.Button(
1434
+ "Start YouTube Analysis",
1435
+ variant="primary"
1436
+ )
1437
+ yt_progress = gr.Progress()
1438
+ with gr.Column(scale=2):
1439
+ gr.HTML("<div class='gr-card'><h3>Video Results</h3></div>")
1440
+ yt_results_df = gr.DataFrame(
1441
+ label="YouTube Video Results",
1442
+ interactive=True
1443
+ )
1444
+ yt_videos_download_file = gr.File(
1445
+ label="Download YouTube Video Results CSV"
1446
+ )
1447
+ yt_comments_df = gr.DataFrame(
1448
+ label="YouTube Comments Results",
1449
+ interactive=True
1450
+ )
1451
+ yt_comments_download_file = gr.File(
1452
+ label="Download YouTube Comments CSV"
1453
+ )
1454
+ yt_dashboard_html = gr.HTML()
1455
+ with gr.Row():
1456
+ with gr.Column(scale=1):
1457
+ gr.HTML("<div class='gr-card'><h3>Top Channels & Engagement</h3></div>")
1458
+ kpi_yt_videos_found = gr.HTML()
1459
+ kpi_yt_views_scanned = gr.HTML()
1460
+ kpi_yt_comments_scraped = gr.HTML()
1461
+ yt_channel_plot = gr.Plot(
1462
+ label="Top Channels by Video Volume"
1463
+ )
1464
+ yt_channel_dominance_plot = gr.Plot(
1465
+ label="Channel Dominance by View Count"
1466
+ )
1467
+ yt_top_videos_plot = gr.Plot(
1468
+ label="Top Videos by Comment Count"
1469
+ )
1470
+ yt_content_quadrant_plot = gr.Plot(
1471
+ label="Content Performance Quadrant"
1472
+ )
1473
+ yt_engagement_plot = gr.Plot(
1474
+ label="Top Videos by Engagement Rate"
1475
+ )
1476
+ with gr.Column(scale=1):
1477
+ gr.HTML("<div class='gr-card'><h3>Comment Activity & Word Cloud</h3></div>")
1478
+ yt_time_series_plot = gr.LinePlot(
1479
+ label="Comment Activity Over Time"
1480
+ )
1481
+ yt_wordcloud_plot = gr.Plot(
1482
+ label="Bengali Word Cloud from Comments"
1483
+ )
1484
+ yt_detailed_summary = gr.HTML()
1485
+ # --- EVENT HANDLERS ---
1486
+ def scraper_button_handler(search_keywords, sites, start_date, end_date, interval, max_pages, filter_keys):
1487
+ """Handle news scraper button click event."""
1488
+ try:
1489
+ df, filtered_df = run_news_scraper_pipeline(
1490
+ search_keywords, sites, start_date, end_date,
1491
+ interval, max_pages, filter_keys
1492
+ )
1493
+ scraper_results_state = df
1494
+ dashboard = generate_scraper_dashboard(df)
1495
+ if not df.empty:
1496
+ csv_path = "news_results.csv"
1497
+ df.to_csv(csv_path, index=False)
1498
+ scraper_download_file = gr.File(value=csv_path, visible=True)
1499
+ else:
1500
+ scraper_download_file = gr.File(visible=False)
1501
+ return (
1502
+ filtered_df,
1503
+ scraper_download_file,
1504
+ dashboard["kpi_total_articles"],
1505
+ dashboard["kpi_unique_media"],
1506
+ dashboard["kpi_date_range"],
1507
+ dashboard["dashboard_timeline_plot"],
1508
+ dashboard["dashboard_media_plot"],
1509
+ dashboard["dashboard_wordcloud_plot"]
1510
+ )
1511
+ except Exception as e:
1512
+ logger.error(f"Error in scraper button handler: {str(e)}")
1513
+ gr.Error(f"An error occurred during scraping: {str(e)}")
1514
+ return (
1515
+ pd.DataFrame(),
1516
+ gr.File(visible=False),
1517
+ gr.HTML(""), gr.HTML(""), gr.HTML(""),
1518
+ None, None, None
1519
+ )
1520
+
1521
+ start_scraper_button.click(
1522
+ fn=scraper_button_handler,
1523
+ inputs=[
1524
+ search_keywords_textbox,
1525
+ sites_to_search_textbox,
1526
+ start_date_textbox,
1527
+ end_date_textbox,
1528
+ interval_days_slider,
1529
+ max_pages_slider,
1530
+ filter_keywords_textbox
1531
+ ],
1532
+ outputs=[
1533
+ scraper_results_df,
1534
+ scraper_download_file,
1535
+ kpi_total_articles,
1536
+ kpi_unique_media,
1537
+ kpi_date_range,
1538
+ dashboard_timeline_plot,
1539
+ dashboard_media_plot,
1540
+ dashboard_wordcloud_plot
1541
+ ]
1542
+ )
1543
+
1544
+ def youtube_button_handler(keywords, max_videos, num_comments_videos, max_comments, published_after):
1545
+ """Handle YouTube analysis button click event."""
1546
+ try:
1547
+ videos_df, comments_df, summary_html = run_youtube_analysis_pipeline(
1548
+ api_key=None,
1549
+ query=keywords,
1550
+ max_videos_for_stats=max_videos,
1551
+ num_videos_for_comments=num_comments_videos,
1552
+ max_comments_per_video=max_comments,
1553
+ published_after=published_after
1554
+ )
1555
+ youtube_results_state = (videos_df, comments_df)
1556
+ yt_videos_csv = "youtube_videos.csv"
1557
+ yt_comments_csv = "youtube_comments.csv"
1558
+ if not videos_df.empty:
1559
+ videos_df.to_csv(yt_videos_csv, index=False)
1560
+ yt_videos_download_file = gr.File(value=yt_videos_csv, visible=True)
1561
+ else:
1562
+ yt_videos_download_file = gr.File(visible=False)
1563
+ if not comments_df.empty:
1564
+ if "video_title" not in comments_df.columns and "video_id" in comments_df.columns:
1565
+ title_map = videos_df.set_index("video_id")["video_title"].to_dict()
1566
+ comments_df["video_title"] = comments_df["video_id"].map(title_map)
1567
+ if "channel" not in comments_df.columns and "channel_title" in comments_df.columns:
1568
+ comments_df["channel"] = comments_df["channel_title"]
1569
+ comments_df.to_csv(yt_comments_csv, index=False)
1570
+ yt_comments_download_file = gr.File(value=yt_comments_csv, visible=True)
1571
+ else:
1572
+ yt_comments_download_file = gr.File(visible=False)
1573
+ dashboard = generate_youtube_dashboard(videos_df, comments_df)
1574
+ return (
1575
+ videos_df,
1576
+ yt_videos_download_file,
1577
+ comments_df,
1578
+ yt_comments_download_file,
1579
+ summary_html,
1580
+ dashboard["kpi_yt_videos_found"],
1581
+ dashboard["kpi_yt_views_scanned"],
1582
+ dashboard["kpi_yt_comments_scraped"],
1583
+ dashboard["yt_channel_plot"],
1584
+ dashboard["yt_channel_dominance_plot"],
1585
+ dashboard["yt_time_series_plot"],
1586
+ dashboard["yt_top_videos_plot"],
1587
+ dashboard["yt_content_quadrant_plot"],
1588
+ dashboard["yt_engagement_plot"],
1589
+ dashboard["yt_wordcloud_plot"],
1590
+ dashboard["yt_detailed_summary"]
1591
+ )
1592
+ except Exception as e:
1593
+ logger.error(f"Error in YouTube button handler: {str(e)}")
1594
+ gr.Error(f"An error occurred during YouTube analysis: {str(e)}")
1595
+ return (
1596
+ pd.DataFrame(), # yt_results_df
1597
+ gr.File(visible=False), # yt_videos_download_file
1598
+ pd.DataFrame(), # yt_comments_df
1599
+ gr.File(visible=False), # yt_comments_download_file
1600
+ gr.HTML(""), # yt_dashboard_html
1601
+ gr.HTML(""), # kpi_yt_videos_found
1602
+ gr.HTML(""), # kpi_yt_views_scanned
1603
+ gr.HTML(""), # kpi_yt_comments_scraped
1604
+ None, # yt_channel_plot
1605
+ None, # yt_channel_dominance_plot
1606
+ None, # yt_time_series_plot
1607
+ None, # yt_top_videos_plot
1608
+ None, # yt_content_quadrant_plot
1609
+ None, # yt_engagement_plot
1610
+ None, # yt_wordcloud_plot
1611
+ gr.HTML("") # yt_detailed_summary
1612
+ )
1613
+
1614
+ start_youtube_analysis_button.click(
1615
+ fn=youtube_button_handler,
1616
+ inputs=[
1617
+ yt_search_keywords,
1618
+ yt_max_videos_slider,
1619
+ yt_num_videos_comments_slider,
1620
+ yt_max_comments_slider,
1621
+ yt_published_after
1622
+ ],
1623
+ outputs=[
1624
+ yt_results_df,
1625
+ yt_videos_download_file,
1626
+ yt_comments_df,
1627
+ yt_comments_download_file,
1628
+ yt_dashboard_html,
1629
+ kpi_yt_videos_found,
1630
+ kpi_yt_views_scanned,
1631
+ kpi_yt_comments_scraped,
1632
+ yt_channel_plot,
1633
+ yt_channel_dominance_plot,
1634
+ yt_time_series_plot,
1635
+ yt_top_videos_plot,
1636
+ yt_content_quadrant_plot,
1637
+ yt_engagement_plot,
1638
+ yt_wordcloud_plot,
1639
+ yt_detailed_summary
1640
+ ]
1641
+ )
1642
+ if __name__ == "__main__":
1643
+ app.launch(debug=True, share=True)