DivYonko commited on
Commit
cee5a19
Β·
1 Parent(s): 47614fa

Fix smart filter in both pages/comments.py and frontend/streamlit_app.py

Browse files
Files changed (1) hide show
  1. frontend/streamlit_app.py +1002 -214
frontend/streamlit_app.py CHANGED
@@ -25,9 +25,10 @@ st.set_page_config(
25
 
26
  r = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, db=REDIS_DB, decode_responses=True)
27
 
28
- TOPIC_LABELS = ["Appreciation", "Question", "Promo", "Spam", "General", "MCQ Answer"]
29
  TOPIC_COLOR = {
30
  "Appreciation": "#f59e0b", "Question": "#3b82f6",
 
31
  "Promo": "#ec4899", "Spam": "#ef4444", "General": "#6b7280",
32
  "MCQ Answer": "#10b981"
33
  }
@@ -197,6 +198,18 @@ hr{border:none!important;border-top:1px solid var(--divider)!important;margin:1.
197
  .empty-icon{font-size:3.5rem;margin-bottom:16px;}
198
  .empty-title{font-size:1.1rem;color:var(--text-2);font-weight:700;}
199
  .empty-sub{font-size:0.84rem;color:var(--text-3);margin-top:6px;}
 
 
 
 
 
 
 
 
 
 
 
 
200
  </style>"""
201
 
202
  st.markdown(THEME_JS, unsafe_allow_html=True)
@@ -379,22 +392,31 @@ def compute_engagement(all_data_json: str, window: int = 50) -> dict:
379
 
380
  @st.cache_data(ttl=10, show_spinner=False)
381
  def compute_top_contributors(all_data_json: str, top_n: int = 10) -> list[dict]:
382
- """Return top N authors by message count with their sentiment breakdown."""
383
  import json as _j
384
  from collections import Counter
385
  msgs = _j.loads(all_data_json)
386
  if not msgs:
387
  return []
388
 
 
389
  author_data: dict[str, dict] = {}
390
  for m in msgs:
391
  a = m.get("author", "Unknown")
392
  if a not in author_data:
393
- author_data[a] = {"count": 0, "Positive": 0, "Neutral": 0, "Negative": 0}
 
 
 
 
394
  author_data[a]["count"] += 1
395
  s = m.get("sentiment", "Neutral")
396
- if s in author_data[a]:
397
  author_data[a][s] += 1
 
 
 
 
398
 
399
  sorted_authors = sorted(author_data.items(), key=lambda x: x[1]["count"], reverse=True)[:top_n]
400
  result = []
@@ -403,9 +425,16 @@ def compute_top_contributors(all_data_json: str, top_n: int = 10) -> list[dict]:
403
  result.append({
404
  "author": author,
405
  "count": d["count"],
406
- "pos_pct": round(d["Positive"] / total * 100),
407
- "neu_pct": round(d["Neutral"] / total * 100),
408
- "neg_pct": round(d["Negative"] / total * 100),
 
 
 
 
 
 
 
409
  })
410
  return result
411
 
@@ -459,6 +488,74 @@ def check_spam_alert(df_all: pd.DataFrame, threshold: float = 0.3, window: int =
459
  return None
460
 
461
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
462
  # ── SESSION STATE INIT ────────────────────────────────────────
463
  MAX_STREAMS = 5
464
  STREAM_COLORS = ["#7c3aed", "#10b981", "#f59e0b", "#3b82f6", "#ec4899"]
@@ -470,6 +567,8 @@ if "alert_dismissed" not in st.session_state:
470
  st.session_state.alert_dismissed = False
471
  if "last_alert_count" not in st.session_state:
472
  st.session_state.last_alert_count = 0
 
 
473
  # Multi-stream: list of dicts {video_id, redis_key, label, proc}
474
  if "streams" not in st.session_state:
475
  st.session_state.streams = [
@@ -488,8 +587,21 @@ with st.sidebar:
488
 
489
  # ── Display Settings ──
490
  st.markdown('<p style="font-size:0.68rem;font-weight:700;color:var(--accent);text-transform:uppercase;letter-spacing:0.1em;margin-bottom:8px;">Display Settings</p>', unsafe_allow_html=True)
491
- refresh_rate = st.slider("Refresh interval (s)", 5, 60, 15)
492
- msg_limit = st.slider("Message window", 10, 200, 50)
 
 
 
 
 
 
 
 
 
 
 
 
 
493
  auto_refresh = st.toggle("Live auto-refresh", value=True)
494
  st.divider()
495
 
@@ -553,11 +665,12 @@ with st.sidebar:
553
  st.session_state.streams[idx]["proc"] = proc
554
  st.session_state.streams[idx]["video_id"] = vid
555
  st.session_state.streams[idx]["redis_key"] = rkey
556
- # Store title for stream A only (page header)
 
 
557
  if idx == 0:
558
  update_config_video_id(vid)
559
- title = fetch_video_title(vid)
560
- r.set("video_title", title) if title else r.delete("video_title")
561
  st.session_state.alert_dismissed = False
562
  st.success(f"Stream {label} started β†’ `{rkey}`")
563
  else:
@@ -613,6 +726,38 @@ with st.sidebar:
613
  st.rerun()
614
  st.divider()
615
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
616
  # ── Danger Zone ──
617
  st.markdown('<p style="font-size:0.68rem;font-weight:700;color:#ef4444;text-transform:uppercase;letter-spacing:0.1em;margin-bottom:8px;">Danger Zone</p>', unsafe_allow_html=True)
618
  if st.button("πŸ—‘ Clear all data", width='stretch'):
@@ -632,7 +777,43 @@ with st.sidebar:
632
 
633
  # ── PAGE HEADER ───────────────────────────────────────────────
634
  _video_title = r.get("video_title")
635
- _subtitle = f"β–Ά {_video_title}" if _video_title else "Real-time sentiment Β· topic classification Β· engagement insights"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
636
 
637
  col_title, col_live = st.columns([7, 1])
638
  with col_title:
@@ -640,6 +821,7 @@ with col_title:
640
  '<div style="padding:8px 0 4px;">'
641
  '<div style="font-size:2rem;font-weight:800;color:var(--text-1);letter-spacing:-0.04em;">YouTube Live Chat Analytics</div>'
642
  f'<div style="font-size:1.25rem;color:var(--accent-text);font-weight:600;margin-top:6px;">{_subtitle}</div>'
 
643
  '</div>', unsafe_allow_html=True
644
  )
645
  with col_live:
@@ -652,8 +834,35 @@ with col_live:
652
 
653
  st.divider()
654
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
655
  # ── DATA LOAD ─────────────────────────────────────────────────
656
- all_data = load_stream_data("chat_messages")
 
 
 
657
  data = all_data[-msg_limit:] if len(all_data) > msg_limit else all_data
658
 
659
  if not all_data:
@@ -677,7 +886,190 @@ df["topic"] = df["topic"].apply(clean_topic) if "topic" in df.columns el
677
  all_df["sentiment"] = all_df["sentiment"].apply(clean_sentiment)
678
  all_df["topic"] = all_df["topic"].apply(clean_topic) if "topic" in all_df.columns else "General"
679
 
680
- # ── ALERT BANNERS ─────────────────────────────────────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
681
  if alert_enabled:
682
  alert = check_alert(all_df, threshold=alert_threshold, window=alert_window)
683
  total_now = len(all_df)
@@ -778,6 +1170,26 @@ with v5:
778
  unsafe_allow_html=True
779
  )
780
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
781
 
782
  # ── WINDOW METRICS ────────────────────────────────────────────
783
  st.divider()
@@ -797,19 +1209,19 @@ c2.metric("Positive", pos, f"{pos/total*100:.1f}%")
797
  c3.metric("Neutral", neu, f"{neu/total*100:.1f}%")
798
  c4.metric("Negative", neg, f"{neg/total*100:.1f}%")
799
 
800
- # ── SENTIMENT CHARTS ──────────────────────────────────────────
801
  st.divider()
802
- col_l, col_r = st.columns(2)
803
 
804
- with col_l:
805
  st.markdown('<div class="chart-wrap">', unsafe_allow_html=True)
806
- st.markdown('<div class="chart-title">Sentiment Distribution</div><div class="chart-sub">Message count by sentiment class</div>', unsafe_allow_html=True)
807
  fig_bar = go.Figure(go.Bar(
808
  x=["Positive", "Neutral", "Negative"],
809
- y=[pos, neu, neg],
810
  marker_color=["#22c55e", "#eab308", "#ef4444"],
811
  marker_line_width=0,
812
- text=[pos, neu, neg],
813
  textposition="outside",
814
  textfont=dict(size=12),
815
  hovertemplate="<b>%{x}</b><br>Count: %{y}<extra></extra>",
@@ -820,18 +1232,18 @@ with col_l:
820
  with bar_hdr:
821
  show_bar_data = st.checkbox("View data", key="show_bar")
822
  with bar_dl:
823
- bar_df = pd.DataFrame({"Sentiment": ["Positive", "Neutral", "Negative"], "Count": [pos, neu, neg]})
824
  csv_download(bar_df, "Download CSV", "sentiment_distribution.csv")
825
  if show_bar_data:
826
  st.dataframe(bar_df, width='stretch', hide_index=True)
827
  st.markdown('</div>', unsafe_allow_html=True)
828
 
829
- with col_r:
830
  st.markdown('<div class="chart-wrap">', unsafe_allow_html=True)
831
- st.markdown('<div class="chart-title">Sentiment Breakdown</div><div class="chart-sub">Proportional share per class</div>', unsafe_allow_html=True)
832
  fig_pie = go.Figure(go.Pie(
833
  labels=["Positive", "Neutral", "Negative"],
834
- values=[pos, neu, neg],
835
  marker_colors=["#22c55e", "#eab308", "#ef4444"],
836
  hole=0.58,
837
  textinfo="percent",
@@ -840,7 +1252,7 @@ with col_r:
840
  fig_pie.update_layout(
841
  **{**plotly_layout(260),
842
  "showlegend": True,
843
- "legend": dict(orientation="h", y=-0.08, font=dict(size=11))}
844
  )
845
  st.plotly_chart(fig_pie, width='stretch', config={"displayModeBar": False})
846
  pie_hdr, pie_dl = st.columns([1, 1])
@@ -849,14 +1261,54 @@ with col_r:
849
  with pie_dl:
850
  pie_df = pd.DataFrame({
851
  "Sentiment": ["Positive", "Neutral", "Negative"],
852
- "Count": [pos, neu, neg],
853
- "Percentage": [f"{pos/total*100:.1f}%", f"{neu/total*100:.1f}%", f"{neg/total*100:.1f}%"]
854
  })
855
  csv_download(pie_df, "Download CSV", "sentiment_breakdown.csv")
856
  if show_pie_data:
857
  st.dataframe(pie_df, width='stretch', hide_index=True)
858
  st.markdown('</div>', unsafe_allow_html=True)
859
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
860
  # ── Confidence trend ──────────────────────────────────────────
861
  if "confidence" in df.columns:
862
  st.divider()
@@ -979,6 +1431,210 @@ if show_topic_data:
979
  st.dataframe(topic_df, width='stretch', hide_index=True)
980
  st.markdown('</div>', unsafe_allow_html=True)
981
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
982
 
983
  # ── ENGAGEMENT SCORE ─────────────────────────────────────────
984
  st.divider()
@@ -1020,65 +1676,202 @@ st.markdown(
1020
  unsafe_allow_html=True
1021
  )
1022
 
1023
- _contrib_json = json.dumps([{"author": m.get("author",""), "sentiment": m.get("sentiment","Neutral")} for m in all_data])
1024
  contributors = compute_top_contributors(_contrib_json)
1025
 
1026
  if contributors:
1027
  max_count = contributors[0]["count"]
1028
- lc1, lc2 = st.columns([3, 2])
1029
- with lc1:
1030
- rank_icons = {1: "πŸ₯‡", 2: "πŸ₯ˆ", 3: "πŸ₯‰"}
1031
- rank_classes = {1: "gold", 2: "silver", 3: "bronze"}
1032
- for rank, c in enumerate(contributors, 1):
1033
- bar_pct = int(c["count"] / max(max_count, 1) * 100)
1034
- rank_cls = rank_classes.get(rank, "")
1035
- rank_icon = rank_icons.get(rank, f"#{rank}")
1036
- author = c["author"]
1037
- count = c["count"]
1038
- pos_pct = c["pos_pct"]
1039
- neu_pct = c["neu_pct"]
1040
- neg_pct = c["neg_pct"]
1041
- html = (
1042
- f'<div class="leaderboard-row">'
1043
- f'<div class="lb-rank {rank_cls}">{rank_icon}</div>'
1044
- f'<div class="lb-author">{author}</div>'
1045
- f'<div class="lb-bar"><div class="lb-bar-fill" style="width:{bar_pct}%;background:var(--accent);"></div></div>'
1046
- f'<div class="lb-sent">'
1047
- f'<span class="lb-dot" style="background:#22c55e;" title="Positive {pos_pct}%"></span>'
1048
- f'<span class="lb-dot" style="background:#eab308;" title="Neutral {neu_pct}%"></span>'
1049
- f'<span class="lb-dot" style="background:#ef4444;" title="Negative {neg_pct}%"></span>'
1050
- f'</div>'
1051
- f'<div class="lb-count">{count} msgs</div>'
1052
- f'</div>'
1053
- )
1054
- st.markdown(html, unsafe_allow_html=True)
1055
- with lc2:
1056
- # Stacked bar of top 5 contributors
1057
- top5 = contributors[:5]
1058
- fig_lb = go.Figure()
1059
- for sent, color in [("pos_pct","#22c55e"),("neu_pct","#eab308"),("neg_pct","#ef4444")]:
1060
- fig_lb.add_trace(go.Bar(
1061
- y=[c["author"][:18] for c in top5],
1062
- x=[c[sent] for c in top5],
1063
- name=sent.replace("_pct","").capitalize(),
1064
- orientation="h",
1065
- marker_color=color,
1066
- hovertemplate="%{y}: %{x}%<extra></extra>",
1067
- ))
1068
- layout_lb = plotly_layout(260)
1069
- layout_lb["barmode"] = "stack"
1070
- layout_lb["showlegend"] = True
1071
- layout_lb["legend"] = dict(orientation="h", y=1.1, font=dict(size=10))
1072
- layout_lb["xaxis"]["range"] = [0, 100]
1073
- layout_lb["xaxis"]["ticksuffix"] = "%"
1074
- fig_lb.update_layout(**layout_lb)
1075
- st.plotly_chart(fig_lb, width='stretch', config={"displayModeBar": False})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1076
 
1077
  contrib_df = pd.DataFrame(contributors)
1078
  csv_download(contrib_df, "Download CSV", "top_contributors.csv")
1079
  else:
1080
  st.info("Not enough data yet.")
1081
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1082
  # ── WORD CLOUD ────────────────────────────────────────────────
1083
  st.divider()
1084
  st.markdown(
@@ -1154,81 +1947,154 @@ if len(active_streams) > 1:
1154
  unsafe_allow_html=True
1155
  )
1156
 
1157
- def stream_summary_chart(stream_df, color):
1158
- counts_s = stream_df["sentiment"].value_counts().to_dict()
1159
- p = counts_s.get("Positive", 0)
1160
- n = counts_s.get("Neutral", 0)
1161
- g = counts_s.get("Negative", 0)
1162
- t = max(p + n + g, 1)
1163
- fig = go.Figure(go.Bar(
1164
- x=["Positive", "Neutral", "Negative"],
1165
- y=[p, n, g],
1166
- marker_color=["#22c55e", "#eab308", "#ef4444"],
1167
- marker_line_width=0,
1168
- text=[p, n, g],
1169
- textposition="outside",
1170
- hovertemplate="<b>%{x}</b><br>%{y}<extra></extra>",
1171
- ))
1172
- fig.update_layout(**plotly_layout(200))
1173
- return fig, p, n, g, t
1174
-
1175
- # Render in rows of up to 3 columns
1176
- chunk_size = 3
1177
- for row_start in range(0, n_streams, chunk_size):
1178
- row_streams = active_streams[row_start:row_start + chunk_size]
1179
- cols = st.columns(len(row_streams))
1180
- for col, stream in zip(cols, row_streams):
1181
- sidx = st.session_state.streams.index(stream)
1182
- color = STREAM_COLORS[sidx]
1183
- slabel = STREAM_NAMES[sidx]
1184
- s_data = load_stream_data(stream["redis_key"])
1185
- if not s_data:
1186
- col.info(f"No data yet for Stream {slabel}")
1187
- continue
1188
- s_df = pd.DataFrame(s_data)
1189
- s_df["sentiment"] = s_df["sentiment"].apply(clean_sentiment)
1190
- s_df["topic"] = s_df["topic"].apply(clean_topic) if "topic" in s_df.columns else "General"
1191
- fig, p, n, g, t = stream_summary_chart(s_df, color)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1192
  with col:
1193
  st.markdown(
1194
  f'<span class="compare-label" style="background:{color}18;color:{color};border:1px solid {color}44;">'
1195
- f'Stream {slabel} β€” {stream["redis_key"]}</span>',
1196
  unsafe_allow_html=True
1197
  )
1198
- st.plotly_chart(fig, width='stretch', config={"displayModeBar": False})
1199
  st.markdown(
1200
- f'<div style="font-size:0.78rem;color:var(--text-3);margin-bottom:8px;">'
1201
- f'{t} msgs Β· <span style="color:#22c55e;">{p/t*100:.1f}% pos</span> Β· '
1202
- f'<span style="color:#ef4444;">{g/t*100:.1f}% neg</span></div>',
 
 
 
 
 
 
 
 
 
 
 
1203
  unsafe_allow_html=True
1204
  )
1205
-
1206
- # Overlay line chart β€” positive ratio over time for all streams
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1207
  st.markdown('<div class="chart-wrap" style="margin-top:14px;">', unsafe_allow_html=True)
1208
- st.markdown('<div class="chart-title">Positive Ratio Over Time</div><div class="chart-sub">Rolling positive % per stream</div>', unsafe_allow_html=True)
1209
  fig_overlay = go.Figure()
1210
- for stream in active_streams:
1211
- sidx = st.session_state.streams.index(stream)
1212
- color = STREAM_COLORS[sidx]
1213
- slabel = STREAM_NAMES[sidx]
1214
- s_data = load_stream_data(stream["redis_key"])
1215
- if not s_data:
1216
- continue
1217
- s_df = pd.DataFrame(s_data)
1218
- s_df["sentiment"] = s_df["sentiment"].apply(clean_sentiment)
1219
- s_df["is_pos"] = (s_df["sentiment"] == "Positive").astype(int)
1220
- s_df["rolling"] = s_df["is_pos"].rolling(10, min_periods=1).mean() * 100
1221
  fig_overlay.add_trace(go.Scatter(
1222
- x=list(range(len(s_df))),
1223
- y=s_df["rolling"],
1224
  mode="lines",
1225
- name=f"Stream {slabel}",
1226
  line=dict(color=color, width=2),
1227
  hovertemplate=f"Stream {slabel} msg %{{x}}: %{{y:.1f}}%<extra></extra>",
1228
  ))
1229
- layout_ov = plotly_layout(200)
1230
  layout_ov["showlegend"] = True
1231
- layout_ov["legend"] = dict(orientation="h", y=1.1, font=dict(size=11))
1232
  layout_ov["yaxis"]["range"] = [0, 100]
1233
  fig_overlay.update_layout(**layout_ov)
1234
  st.plotly_chart(fig_overlay, width='stretch', config={"displayModeBar": False})
@@ -1270,84 +2136,6 @@ if st.session_state.pinned_messages:
1270
  st.rerun()
1271
 
1272
 
1273
- # ── LIVE CHAT FEED ────────────────────────────────────────────
1274
- st.divider()
1275
- st.markdown('<div class="sec-hdr"><span class="sec-ttl">Live Chat Feed</span></div>', unsafe_allow_html=True)
1276
-
1277
- f1, f2, f3 = st.columns([1, 1, 2])
1278
- with f1:
1279
- sentiment_filter = st.selectbox("Sentiment", ["All", "Positive", "Neutral", "Negative"])
1280
- with f2:
1281
- topic_filter = st.selectbox("Topic", ["All"] + TOPIC_LABELS)
1282
- with f3:
1283
- search_term = st.text_input("Search messages", placeholder="Filter by keyword...")
1284
-
1285
- filtered = df.copy()
1286
- if sentiment_filter != "All":
1287
- filtered = filtered[filtered["sentiment"] == sentiment_filter]
1288
- if topic_filter != "All":
1289
- filtered = filtered[filtered["topic"] == topic_filter]
1290
- if search_term:
1291
- filtered = filtered[filtered["text"].str.contains(search_term, case=False, na=False)]
1292
-
1293
- feed_hdr, feed_dl = st.columns([3, 1])
1294
- with feed_hdr:
1295
- st.markdown(
1296
- f'<div style="font-size:0.78rem;color:var(--text-3);margin-bottom:12px;">Showing {len(filtered)} of {len(df)} messages</div>',
1297
- unsafe_allow_html=True
1298
- )
1299
- with feed_dl:
1300
- if not filtered.empty:
1301
- export_cols = [c for c in ["author", "text", "sentiment", "confidence", "topic", "time"] if c in filtered.columns]
1302
- csv_download(filtered[export_cols], "Download Feed CSV", "chat_feed.csv")
1303
-
1304
- SENT_ICON = {"Positive": "🟒", "Negative": "πŸ”΄", "Neutral": "🟑"}
1305
-
1306
- # Build a set of pinned texts for quick lookup
1307
- pinned_texts = {m.get("text", "") for m in st.session_state.pinned_messages}
1308
-
1309
- for i, (_, row) in enumerate(filtered.iloc[::-1].iterrows()):
1310
- s = row.get("sentiment", "Neutral")
1311
- conf_pct = int(row.get("confidence", 0) * 100)
1312
- topic = clean_topic(row.get("topic", "General"))
1313
- t_color = TOPIC_COLOR.get(topic, "#6b7280")
1314
- s_color = SENT_COLORS.get(s, "#6b7280")
1315
- s_icon = SENT_ICON.get(s, "βšͺ")
1316
- conf_color = "#22c55e" if conf_pct >= 70 else "#eab308" if conf_pct >= 40 else "#ef4444"
1317
- msg_text = row.get("text", "")
1318
- is_pinned = msg_text in pinned_texts
1319
-
1320
- card_class = f"chat-card chat-{s.lower()}" + (" chat-pinned" if is_pinned else "")
1321
-
1322
- msg_col, pin_col = st.columns([11, 1])
1323
- with msg_col:
1324
- st.markdown(
1325
- f'<div class="{card_class}">'
1326
- f'<div class="chat-author">{s_icon} {row.get("author", "Unknown")}'
1327
- + (' <span style="font-size:0.7rem;color:#eab308;">πŸ“Œ</span>' if is_pinned else '') +
1328
- f'</div>'
1329
- f'<div class="chat-text">{msg_text}</div>'
1330
- f'<div class="chat-badges">'
1331
- f'<span class="badge" style="color:{s_color};border-color:{s_color}33;">{s}</span>'
1332
- f'<span class="badge" style="color:{conf_color};">Confidence: {conf_pct}%</span>'
1333
- f'<span class="badge" style="color:{t_color};border-color:{t_color}33;">{topic}</span>'
1334
- f'</div></div>',
1335
- unsafe_allow_html=True
1336
- )
1337
- with pin_col:
1338
- if is_pinned:
1339
- if st.button("πŸ“Œ", key=f"unpin_feed_{i}", help="Unpin this message"):
1340
- st.session_state.pinned_messages = [
1341
- m for m in st.session_state.pinned_messages if m.get("text") != msg_text
1342
- ]
1343
- st.rerun()
1344
- else:
1345
- if st.button("πŸ“", key=f"pin_{i}", help="Pin this message"):
1346
- msg_dict = row.to_dict()
1347
- if msg_dict not in st.session_state.pinned_messages:
1348
- st.session_state.pinned_messages.append(msg_dict)
1349
- st.rerun()
1350
-
1351
  # ── AUTO REFRESH ──────────────────────────────────────────────
1352
  if auto_refresh:
1353
  time.sleep(refresh_rate)
 
25
 
26
  r = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, db=REDIS_DB, decode_responses=True)
27
 
28
+ TOPIC_LABELS = ["Appreciation", "Question", "Request/Feedback", "Promo", "Spam", "General", "MCQ Answer"]
29
  TOPIC_COLOR = {
30
  "Appreciation": "#f59e0b", "Question": "#3b82f6",
31
+ "Request/Feedback": "#8b5cf6",
32
  "Promo": "#ec4899", "Spam": "#ef4444", "General": "#6b7280",
33
  "MCQ Answer": "#10b981"
34
  }
 
198
  .empty-icon{font-size:3.5rem;margin-bottom:16px;}
199
  .empty-title{font-size:1.1rem;color:var(--text-2);font-weight:700;}
200
  .empty-sub{font-size:0.84rem;color:var(--text-3);margin-top:6px;}
201
+
202
+ [data-testid="stSidebar"] [role="radiogroup"] { display:flex; flex-direction:row; flex-wrap:nowrap; gap:4px; }
203
+ [data-testid="stSidebar"] [role="radiogroup"] label { flex:1; display:flex; align-items:center; justify-content:center; background:var(--bg-card); border:1px solid var(--pill-border); border-radius:8px; padding:6px 2px; cursor:pointer; transition:background 0.15s,border 0.15s; }
204
+ [data-testid="stSidebar"] [role="radiogroup"] label:hover { background:var(--pill-bg); border-color:var(--accent); }
205
+ [data-testid="stSidebar"] [role="radiogroup"] label[data-checked="true"],
206
+ [data-testid="stSidebar"] [role="radiogroup"] label:has(input:checked) { background:linear-gradient(135deg,var(--accent),var(--accent2)); border-color:var(--accent); }
207
+ [data-testid="stSidebar"] [role="radiogroup"] label p,
208
+ [data-testid="stSidebar"] [role="radiogroup"] label span { font-size:0.82rem !important; font-weight:700 !important; color:var(--text-1) !important; white-space:nowrap !important; }
209
+ [data-testid="stSidebar"] [role="radiogroup"] label:has(input:checked) p,
210
+ [data-testid="stSidebar"] [role="radiogroup"] label:has(input:checked) span { color:#fff !important; }
211
+ [data-testid="stSidebar"] [role="radiogroup"] input[type="radio"] { display:none !important; }
212
+ [data-testid="stSidebar"] [data-testid="stWidgetLabel"]:has(+ [role="radiogroup"]) { color:var(--text-2) !important; font-size:0.75rem !important; margin-bottom:4px; }
213
  </style>"""
214
 
215
  st.markdown(THEME_JS, unsafe_allow_html=True)
 
392
 
393
  @st.cache_data(ttl=10, show_spinner=False)
394
  def compute_top_contributors(all_data_json: str, top_n: int = 10) -> list[dict]:
395
+ """Return top N authors by message count with sentiment + topic breakdown."""
396
  import json as _j
397
  from collections import Counter
398
  msgs = _j.loads(all_data_json)
399
  if not msgs:
400
  return []
401
 
402
+ TOPICS = ["Appreciation", "Question", "Request/Feedback", "Promo", "Spam", "General", "MCQ Answer"]
403
  author_data: dict[str, dict] = {}
404
  for m in msgs:
405
  a = m.get("author", "Unknown")
406
  if a not in author_data:
407
+ author_data[a] = {
408
+ "count": 0,
409
+ "Positive": 0, "Neutral": 0, "Negative": 0,
410
+ **{t: 0 for t in TOPICS},
411
+ }
412
  author_data[a]["count"] += 1
413
  s = m.get("sentiment", "Neutral")
414
+ if s in ("Positive", "Neutral", "Negative"):
415
  author_data[a][s] += 1
416
+ t = m.get("topic", "General")
417
+ if t not in TOPICS:
418
+ t = "General"
419
+ author_data[a][t] += 1
420
 
421
  sorted_authors = sorted(author_data.items(), key=lambda x: x[1]["count"], reverse=True)[:top_n]
422
  result = []
 
425
  result.append({
426
  "author": author,
427
  "count": d["count"],
428
+ "pos_pct": round(d["Positive"] / total * 100),
429
+ "neu_pct": round(d["Neutral"] / total * 100),
430
+ "neg_pct": round(d["Negative"] / total * 100),
431
+ "t_appr": round(d["Appreciation"] / total * 100),
432
+ "t_ques": round(d["Question"] / total * 100),
433
+ "t_rf": round(d["Request/Feedback"] / total * 100),
434
+ "t_promo": round(d["Promo"] / total * 100),
435
+ "t_spam": round(d["Spam"] / total * 100),
436
+ "t_gen": round(d["General"] / total * 100),
437
+ "t_mcq": round(d["MCQ Answer"] / total * 100),
438
  })
439
  return result
440
 
 
488
  return None
489
 
490
 
491
+ @st.cache_data(ttl=10, show_spinner=False)
492
+ def detect_repeat_spammers(all_data_json: str, window_sec: int = 15, min_repeats: int = 2) -> list[dict]:
493
+ """
494
+ Detect users who send the same (or near-identical) message multiple times
495
+ within `window_sec` seconds. Returns list of spam burst dicts sorted by
496
+ repeat count descending.
497
+ """
498
+ import json as _j
499
+ import re as _re
500
+
501
+ msgs = _j.loads(all_data_json)
502
+ if not msgs:
503
+ return []
504
+
505
+ def _normalize(t: str) -> str:
506
+ return _re.sub(r"[^\w]", "", t.lower().strip())
507
+
508
+ bursts: dict[tuple, dict] = {}
509
+ for m in msgs:
510
+ author = m.get("author", "Unknown")
511
+ text = m.get("text", "").strip()
512
+ if not text:
513
+ continue
514
+ norm = _normalize(text)
515
+ if len(norm) < 4:
516
+ continue
517
+ ts_str = m.get("time", "")
518
+ try:
519
+ ts = datetime.fromisoformat(ts_str)
520
+ except Exception:
521
+ continue
522
+ key = (author, norm)
523
+ if key not in bursts:
524
+ bursts[key] = {
525
+ "author": author,
526
+ "text": text,
527
+ "topic": m.get("topic", "General"),
528
+ "sentiment": m.get("sentiment", "Neutral"),
529
+ "timestamps": [],
530
+ }
531
+ bursts[key]["timestamps"].append(ts)
532
+
533
+ results = []
534
+ for key, burst in bursts.items():
535
+ times = sorted(burst["timestamps"])
536
+ max_in_window = 1
537
+ for i in range(len(times)):
538
+ count_in_window = sum(
539
+ 1 for t in times[i:]
540
+ if (t - times[i]).total_seconds() <= window_sec
541
+ )
542
+ max_in_window = max(max_in_window, count_in_window)
543
+
544
+ if max_in_window >= min_repeats:
545
+ results.append({
546
+ "author": burst["author"],
547
+ "text": burst["text"],
548
+ "topic": burst["topic"],
549
+ "sentiment": burst["sentiment"],
550
+ "count": len(times),
551
+ "max_burst": max_in_window,
552
+ "first_seen": times[0].strftime("%H:%M:%S"),
553
+ "last_seen": times[-1].strftime("%H:%M:%S"),
554
+ })
555
+
556
+ return sorted(results, key=lambda x: x["max_burst"], reverse=True)
557
+
558
+
559
  # ── SESSION STATE INIT ────────────────────────────────────────
560
  MAX_STREAMS = 5
561
  STREAM_COLORS = ["#7c3aed", "#10b981", "#f59e0b", "#3b82f6", "#ec4899"]
 
567
  st.session_state.alert_dismissed = False
568
  if "last_alert_count" not in st.session_state:
569
  st.session_state.last_alert_count = 0
570
+ if "last_view" not in st.session_state:
571
+ st.session_state.last_view = "πŸ’¬ Comments"
572
  # Multi-stream: list of dicts {video_id, redis_key, label, proc}
573
  if "streams" not in st.session_state:
574
  st.session_state.streams = [
 
587
 
588
  # ── Display Settings ──
589
  st.markdown('<p style="font-size:0.68rem;font-weight:700;color:var(--accent);text-transform:uppercase;letter-spacing:0.1em;margin-bottom:8px;">Display Settings</p>', unsafe_allow_html=True)
590
+ active_view = st.radio(
591
+ "View",
592
+ options=["πŸ’¬ Comments", "πŸ“Š Stats & Info"],
593
+ index=0,
594
+ horizontal=True,
595
+ key="active_view",
596
+ )
597
+ refresh_rate = st.radio(
598
+ "Refresh interval (s)",
599
+ options=[10, 20, 30, 40, 50, 60],
600
+ index=0,
601
+ horizontal=True,
602
+ key="refresh_rate",
603
+ )
604
+ msg_limit = st.slider("Message window", 10, 400, 50, step=10)
605
  auto_refresh = st.toggle("Live auto-refresh", value=True)
606
  st.divider()
607
 
 
665
  st.session_state.streams[idx]["proc"] = proc
666
  st.session_state.streams[idx]["video_id"] = vid
667
  st.session_state.streams[idx]["redis_key"] = rkey
668
+ # Fetch and store title for ALL streams (used in header pills)
669
+ _title = fetch_video_title(vid)
670
+ st.session_state.streams[idx]["video_title"] = _title or vid
671
  if idx == 0:
672
  update_config_video_id(vid)
673
+ r.set("video_title", _title) if _title else r.delete("video_title")
 
674
  st.session_state.alert_dismissed = False
675
  st.success(f"Stream {label} started β†’ `{rkey}`")
676
  else:
 
726
  st.rerun()
727
  st.divider()
728
 
729
+ # ── Download Data ──
730
+ st.markdown('<p style="font-size:0.68rem;font-weight:700;color:var(--accent);text-transform:uppercase;letter-spacing:0.1em;margin-bottom:8px;">Download Data</p>', unsafe_allow_html=True)
731
+ _active_streams = [s for s in st.session_state.streams if s.get("redis_key")]
732
+ if _active_streams:
733
+ for _s in _active_streams:
734
+ _rkey = _s["redis_key"]
735
+ _slabel = _s["label"]
736
+ _all_raws = r.lrange(_rkey, 0, -1)
737
+ _dl_rows = []
738
+ for _raw in _all_raws:
739
+ try:
740
+ _dl_rows.append(json.loads(_raw))
741
+ except Exception:
742
+ pass
743
+ if _dl_rows:
744
+ _dl_df = pd.DataFrame(_dl_rows)
745
+ _ts = datetime.now().strftime("%Y%m%d_%H%M%S")
746
+ _fname = f"livepulse_{_rkey}_{_ts}.csv"
747
+ _csv_bytes = _dl_df.to_csv(index=False).encode("utf-8")
748
+ st.download_button(
749
+ label=f"⬇ {_slabel} ({len(_dl_rows)} msgs)",
750
+ data=_csv_bytes,
751
+ file_name=_fname,
752
+ mime="text/csv",
753
+ key=f"dl_{_rkey}",
754
+ )
755
+ else:
756
+ st.markdown(f'<div style="font-size:0.72rem;color:var(--text-3);">{_slabel}: no data yet</div>', unsafe_allow_html=True)
757
+ else:
758
+ st.markdown('<div style="font-size:0.72rem;color:var(--text-3);">No active streams</div>', unsafe_allow_html=True)
759
+ st.divider()
760
+
761
  # ── Danger Zone ──
762
  st.markdown('<p style="font-size:0.68rem;font-weight:700;color:#ef4444;text-transform:uppercase;letter-spacing:0.1em;margin-bottom:8px;">Danger Zone</p>', unsafe_allow_html=True)
763
  if st.button("πŸ—‘ Clear all data", width='stretch'):
 
777
 
778
  # ── PAGE HEADER ───────────────────────────────────────────────
779
  _video_title = r.get("video_title")
780
+
781
+ # Build subtitle showing ALL active stream titles
782
+ _all_titles = []
783
+ for _si, _ss in enumerate(st.session_state.streams):
784
+ _st = _ss.get("video_title") or _ss.get("video_id")
785
+ _sk = _ss.get("redis_key", "")
786
+ _sp = _ss.get("proc")
787
+ _sr = _sp is not None and _sp.poll() is None
788
+ if _st and (r.llen(_sk) > 0 or _sr):
789
+ _all_titles.append(f"β–Ά {_st}")
790
+ if _all_titles:
791
+ _subtitle = " Β· ".join(_all_titles)
792
+ else:
793
+ _subtitle = "Real-time sentiment Β· topic classification Β· engagement insights"
794
+
795
+ # Build active stream pills for header
796
+ _active_stream_pills = ""
797
+ for _hi, _hs in enumerate(st.session_state.streams):
798
+ _hkey = _hs.get("redis_key", "")
799
+ _hproc = _hs.get("proc")
800
+ _hrunning = _hproc is not None and _hproc.poll() is None
801
+ if r.llen(_hkey) > 0 or _hrunning:
802
+ _hcolor = STREAM_COLORS[_hi]
803
+ _hlabel = STREAM_NAMES[_hi]
804
+ _htitle = (
805
+ _hs.get("video_title")
806
+ or _hs.get("video_id")
807
+ or _hkey
808
+ or f"Stream {_hlabel}"
809
+ )
810
+ _hdot = f'<span style="display:inline-block;width:7px;height:7px;background:{"#22c55e" if _hrunning else "#ef4444"};border-radius:50%;margin-right:5px;vertical-align:middle;"></span>'
811
+ _active_stream_pills += (
812
+ f'<span style="display:inline-flex;align-items:center;background:{_hcolor}18;'
813
+ f'border:1px solid {_hcolor}44;border-radius:20px;padding:3px 12px;'
814
+ f'font-size:0.75rem;font-weight:700;color:{_hcolor};margin-right:8px;">'
815
+ f'{_hdot}Stream {_hlabel} Β· {str(_htitle)[:22]}</span>'
816
+ )
817
 
818
  col_title, col_live = st.columns([7, 1])
819
  with col_title:
 
821
  '<div style="padding:8px 0 4px;">'
822
  '<div style="font-size:2rem;font-weight:800;color:var(--text-1);letter-spacing:-0.04em;">YouTube Live Chat Analytics</div>'
823
  f'<div style="font-size:1.25rem;color:var(--accent-text);font-weight:600;margin-top:6px;">{_subtitle}</div>'
824
+ + (f'<div style="margin-top:10px;">{_active_stream_pills}</div>' if _active_stream_pills else '') +
825
  '</div>', unsafe_allow_html=True
826
  )
827
  with col_live:
 
834
 
835
  st.divider()
836
 
837
+ # ── PRIMARY STREAM SELECTOR ───────────────────────────────────
838
+ _streams_with_data = [
839
+ s for s in st.session_state.streams
840
+ if r.llen(s.get("redis_key", "")) > 0 or (s.get("proc") is not None and s.get("proc").poll() is None)
841
+ ]
842
+ if len(_streams_with_data) > 1:
843
+ _ps_options = {}
844
+ for _pss in _streams_with_data:
845
+ _psi_real = st.session_state.streams.index(_pss)
846
+ _pst = _pss.get("video_title") or _pss.get("video_id") or _pss.get("redis_key")
847
+ _psl = f"Stream {STREAM_NAMES[_psi_real]} β€” {str(_pst)[:35]}"
848
+ _ps_options[_psl] = _pss["redis_key"]
849
+ _ps_col, _ = st.columns([2, 3])
850
+ with _ps_col:
851
+ _selected_primary_label = st.selectbox(
852
+ "πŸ“Š Dashboard data source",
853
+ list(_ps_options.keys()),
854
+ key="primary_stream_select",
855
+ help="Switch which stream's data powers the main dashboard stats and charts"
856
+ )
857
+ _primary_key = _ps_options[_selected_primary_label]
858
+ else:
859
+ _primary_key = st.session_state.streams[0]["redis_key"]
860
+
861
  # ── DATA LOAD ─────────────────────────────────────────────────
862
+ _current_len = r.llen(_primary_key)
863
+ # Cap cumulative load at 50k β€” enough for accurate stats, avoids 100k+ slowdowns
864
+ _CUMULATIVE_CAP = 50_000
865
+ all_data = load_stream_data(_primary_key, limit=_CUMULATIVE_CAP if _current_len > _CUMULATIVE_CAP else None)
866
  data = all_data[-msg_limit:] if len(all_data) > msg_limit else all_data
867
 
868
  if not all_data:
 
886
  all_df["sentiment"] = all_df["sentiment"].apply(clean_sentiment)
887
  all_df["topic"] = all_df["topic"].apply(clean_topic) if "topic" in all_df.columns else "General"
888
 
889
+ # ── VIEW ROUTING ──────────────────────────────────────────────
890
+ # Read directly from session state to get the current widget value
891
+ _active_view = st.session_state.get("active_view", "πŸ’¬ Comments")
892
+ _show_stats = _active_view == "πŸ“Š Stats & Info"
893
+ _show_comments = _active_view == "πŸ’¬ Comments"
894
+
895
+ if _show_comments:
896
+ st.markdown('<div class="sec-hdr"><span class="sec-ttl">Live Chat Feed</span></div>', unsafe_allow_html=True)
897
+
898
+ # Build stream options
899
+ _feed_stream_options = {}
900
+ for _fs in st.session_state.streams:
901
+ _fkey = _fs.get("redis_key", "")
902
+ _flen = r.llen(_fkey)
903
+ if _flen > 0:
904
+ _fidx = st.session_state.streams.index(_fs)
905
+ _flabel = f"Stream {STREAM_NAMES[_fidx]} β€” {_fs.get('video_id', _fkey)[:20]}"
906
+ _feed_stream_options[_flabel] = _fkey
907
+
908
+ _cf0, _cf1, _cf2, _cf3, _cf4 = st.columns([1, 1, 1, 1, 2])
909
+ with _cf0:
910
+ if len(_feed_stream_options) > 1:
911
+ _selected_stream_label = st.selectbox(
912
+ "Stream", list(_feed_stream_options.keys()), key="feed_stream_select"
913
+ )
914
+ _feed_key = _feed_stream_options[_selected_stream_label]
915
+ else:
916
+ _feed_key = st.session_state.streams[0]["redis_key"]
917
+ if _feed_stream_options:
918
+ st.markdown(
919
+ f'<div style="font-size:0.75rem;color:var(--text-2);padding-top:28px;">'
920
+ f'{list(_feed_stream_options.keys())[0]}</div>',
921
+ unsafe_allow_html=True
922
+ )
923
+
924
+ if _feed_key == st.session_state.streams[0]["redis_key"]:
925
+ _feed_df = df.copy()
926
+ else:
927
+ _feed_raw = load_stream_data(_feed_key, limit=msg_limit)
928
+ _feed_df = pd.DataFrame(_feed_raw) if _feed_raw else pd.DataFrame()
929
+ if not _feed_df.empty:
930
+ _feed_df["sentiment"] = _feed_df["sentiment"].apply(clean_sentiment)
931
+ _feed_df["topic"] = _feed_df["topic"].apply(clean_topic) if "topic" in _feed_df.columns else "General"
932
+
933
+ with _cf1:
934
+ _sentiment_filter = st.selectbox("Sentiment", ["All", "Positive", "Neutral", "Negative"])
935
+ with _cf2:
936
+ _topic_filter = st.selectbox("Topic", ["All"] + TOPIC_LABELS)
937
+ with _cf3:
938
+ _all_action_types = [
939
+ "General Appreciation", "Testimonials", "Faculty Request", "Faculty Feedback",
940
+ "Content requests", "Content Feedback", "Academic / Lecture / Concept Doubts",
941
+ "Academic requests", "Study Materials, Deliverables & Learning Resources",
942
+ "Access & Support", "Batch details / structure / offerings (incl faculty)",
943
+ "Schedule & logistics (Batch)", "Information- Exam", "Information- Post Exam",
944
+ "Eligibility & audience fit - Can I take this?", "Suitability & Sufficiency (Is this enough?)",
945
+ "Guidance- What should I take/do?", "Language Request", "Language medium",
946
+ "Pricing, discounts, scholarships, offer validity", "Fees + Financial Queries",
947
+ "Product/feature requests (non-content)", "Offline expansion & event-city requests",
948
+ "Offers + Events", "General Feedback", "Others", "N/A",
949
+ ]
950
+ _action_type_filter = st.selectbox("Action Type", ["All"] + _all_action_types)
951
+ with _cf4:
952
+ _search_term = st.text_input("Search messages", placeholder="Filter by keyword...")
953
+
954
+ _filtered = _feed_df.copy() if not _feed_df.empty else pd.DataFrame()
955
+ _any_filter = (
956
+ _sentiment_filter != "All"
957
+ or _topic_filter != "All"
958
+ or _action_type_filter != "All"
959
+ or bool(_search_term)
960
+ )
961
+
962
+ if _any_filter:
963
+ _full_raw = load_stream_data(_feed_key)
964
+ if _full_raw:
965
+ _full_df = pd.DataFrame(_full_raw)
966
+ _full_df["sentiment"] = _full_df["sentiment"].apply(clean_sentiment)
967
+ _full_df["topic"] = _full_df["topic"].apply(clean_topic) if "topic" in _full_df.columns else "General"
968
+ _filtered = _full_df.copy()
969
+ if _sentiment_filter != "All":
970
+ _filtered = _filtered[_filtered["sentiment"] == _sentiment_filter]
971
+ if _topic_filter != "All":
972
+ _filtered = _filtered[_filtered["topic"] == _topic_filter]
973
+ if _action_type_filter != "All":
974
+ if "action_type" in _filtered.columns:
975
+ _filtered = _filtered[_filtered["action_type"] == _action_type_filter]
976
+ if _search_term:
977
+ _filtered = _filtered[_filtered["text"].str.contains(_search_term, case=False, na=False)]
978
+ if len(_filtered) > msg_limit:
979
+ _filtered = _filtered.iloc[-msg_limit:]
980
+ else:
981
+ _filtered = pd.DataFrame()
982
+ _total_scanned = len(_full_raw) if _full_raw else 0
983
+ else:
984
+ if not _filtered.empty:
985
+ if _sentiment_filter != "All":
986
+ _filtered = _filtered[_filtered["sentiment"] == _sentiment_filter]
987
+ if _topic_filter != "All":
988
+ _filtered = _filtered[_filtered["topic"] == _topic_filter]
989
+ if _action_type_filter != "All":
990
+ if "action_type" in _filtered.columns:
991
+ _filtered = _filtered[_filtered["action_type"] == _action_type_filter]
992
+ if _search_term:
993
+ _filtered = _filtered[_filtered["text"].str.contains(_search_term, case=False, na=False)]
994
+ _total_scanned = len(_feed_df)
995
+
996
+ _feed_hdr, _feed_dl = st.columns([3, 1])
997
+ with _feed_hdr:
998
+ if _any_filter:
999
+ st.markdown(
1000
+ f'<div style="font-size:0.78rem;color:var(--text-3);margin-bottom:12px;">'
1001
+ f'Showing {len(_filtered)} matching (last {msg_limit} from {_total_scanned} total)</div>',
1002
+ unsafe_allow_html=True
1003
+ )
1004
+ else:
1005
+ st.markdown(
1006
+ f'<div style="font-size:0.78rem;color:var(--text-3);margin-bottom:12px;">'
1007
+ f'Showing {len(_filtered)} of {len(_feed_df)} messages</div>',
1008
+ unsafe_allow_html=True
1009
+ )
1010
+ with _feed_dl:
1011
+ if not _filtered.empty:
1012
+ _export_cols = [c for c in ["author", "text", "sentiment", "confidence", "topic", "time"] if c in _filtered.columns]
1013
+ csv_download(_filtered[_export_cols], "Download Feed CSV", "chat_feed.csv")
1014
+
1015
+ _SENT_ICON = {"Positive": "🟒", "Negative": "πŸ”΄", "Neutral": "🟑"}
1016
+ _pinned_texts = {m.get("text", "") for m in st.session_state.pinned_messages}
1017
+
1018
+ for _i, (_, _row) in enumerate(_filtered.iloc[::-1].iterrows()):
1019
+ _s = _row.get("sentiment", "Neutral")
1020
+ _conf_pct = int(_row.get("confidence", 0) * 100)
1021
+ _topic = clean_topic(_row.get("topic", "General"))
1022
+ _t_color = TOPIC_COLOR.get(_topic, "#6b7280")
1023
+ _s_color = SENT_COLORS.get(_s, "#6b7280")
1024
+ _s_icon = _SENT_ICON.get(_s, "βšͺ")
1025
+ _conf_color = "#22c55e" if _conf_pct >= 70 else "#eab308" if _conf_pct >= 40 else "#ef4444"
1026
+ _msg_text = _row.get("text", "")
1027
+ import re as _re2
1028
+ _display_text = _re2.sub(r":[a-zA-Z0-9_\-]+:", "", _msg_text).strip() or _msg_text
1029
+ _is_pinned = _msg_text in _pinned_texts
1030
+ _action_type = _row.get("action_type", "N/A") or "N/A"
1031
+ _card_class = f"chat-card chat-{_s.lower()}" + (" chat-pinned" if _is_pinned else "")
1032
+
1033
+ _msg_col, _pin_col = st.columns([11, 1])
1034
+ with _msg_col:
1035
+ _ab = (
1036
+ f'<span class="badge" style="color:#a78bfa;border-color:#a78bfa33;">🏷 {_action_type}</span>'
1037
+ if _action_type not in ("N/A", "", None) else ""
1038
+ )
1039
+ st.markdown(
1040
+ f'<div class="{_card_class}">'
1041
+ f'<div class="chat-author">{_s_icon} {_row.get("author", "Unknown")}'
1042
+ + (' <span style="font-size:0.7rem;color:#eab308;">πŸ“Œ</span>' if _is_pinned else '') +
1043
+ f'</div>'
1044
+ f'<div class="chat-text">{_display_text}</div>'
1045
+ f'<div class="chat-badges">'
1046
+ f'<span class="badge" style="color:{_s_color};border-color:{_s_color}33;">{_s}</span>'
1047
+ f'<span class="badge" style="color:{_conf_color};">Confidence: {_conf_pct}%</span>'
1048
+ f'<span class="badge" style="color:{_t_color};border-color:{_t_color}33;">{_topic}</span>'
1049
+ f'{_ab}'
1050
+ f'</div></div>',
1051
+ unsafe_allow_html=True
1052
+ )
1053
+ with _pin_col:
1054
+ if _is_pinned:
1055
+ if st.button("πŸ“Œ", key=f"unpin_feed_{_i}", help="Unpin this message"):
1056
+ st.session_state.pinned_messages = [
1057
+ m for m in st.session_state.pinned_messages if m.get("text") != _msg_text
1058
+ ]
1059
+ st.rerun()
1060
+ else:
1061
+ if st.button("πŸ“", key=f"pin_{_i}", help="Pin this message"):
1062
+ _msg_dict = _row.to_dict()
1063
+ if _msg_dict not in st.session_state.pinned_messages:
1064
+ st.session_state.pinned_messages.append(_msg_dict)
1065
+ st.rerun()
1066
+
1067
+ if auto_refresh:
1068
+ time.sleep(refresh_rate)
1069
+ st.rerun()
1070
+ st.stop()
1071
+
1072
+ # ── ALERT BANNERS (Stats view only) ───────────────────────────
1073
  if alert_enabled:
1074
  alert = check_alert(all_df, threshold=alert_threshold, window=alert_window)
1075
  total_now = len(all_df)
 
1170
  unsafe_allow_html=True
1171
  )
1172
 
1173
+ # ── CUMULATIVE TOPIC ──────────────────────────────────────────
1174
+ st.divider()
1175
+ st.markdown(
1176
+ '<div class="sec-hdr"><span class="sec-ttl">Cumulative Topic</span><span class="sec-pill">All Time</span></div>',
1177
+ unsafe_allow_html=True
1178
+ )
1179
+ _topic_colors_list = ["#f59e0b", "#3b82f6", "#ec4899", "#ef4444", "#6b7280", "#10b981"]
1180
+ _ct_cols = st.columns(len(TOPIC_LABELS))
1181
+ for _ci, (_lbl, _clr) in enumerate(zip(TOPIC_LABELS, _topic_colors_list)):
1182
+ _cnt = int((all_df["topic"] == _lbl).sum()) if "topic" in all_df.columns else 0
1183
+ _pct = _cnt / max(c_total, 1) * 100
1184
+ with _ct_cols[_ci]:
1185
+ st.markdown(
1186
+ f'<div class="stat-card"><div class="stat-accent" style="background:{_clr};"></div>'
1187
+ f'<div class="stat-number" style="color:{_clr};font-size:1.8rem;">{_cnt}</div>'
1188
+ f'<div class="stat-label">{_lbl}</div>'
1189
+ f'<div class="stat-sub">{_pct:.1f}% of msgs</div></div>',
1190
+ unsafe_allow_html=True
1191
+ )
1192
+
1193
 
1194
  # ── WINDOW METRICS ────────────────────────────────────────────
1195
  st.divider()
 
1209
  c3.metric("Neutral", neu, f"{neu/total*100:.1f}%")
1210
  c4.metric("Negative", neg, f"{neg/total*100:.1f}%")
1211
 
1212
+ # ── SENTIMENT + TOPIC CHARTS (ALL TIME) ──────────────────────
1213
  st.divider()
1214
+ col_s1, col_s2, col_t1, col_t2 = st.columns(4)
1215
 
1216
+ with col_s1:
1217
  st.markdown('<div class="chart-wrap">', unsafe_allow_html=True)
1218
+ st.markdown('<div class="chart-title">Sentiment Distribution</div><div class="chart-sub">All-time message count by sentiment class</div>', unsafe_allow_html=True)
1219
  fig_bar = go.Figure(go.Bar(
1220
  x=["Positive", "Neutral", "Negative"],
1221
+ y=[c_pos, c_neu, c_neg],
1222
  marker_color=["#22c55e", "#eab308", "#ef4444"],
1223
  marker_line_width=0,
1224
+ text=[c_pos, c_neu, c_neg],
1225
  textposition="outside",
1226
  textfont=dict(size=12),
1227
  hovertemplate="<b>%{x}</b><br>Count: %{y}<extra></extra>",
 
1232
  with bar_hdr:
1233
  show_bar_data = st.checkbox("View data", key="show_bar")
1234
  with bar_dl:
1235
+ bar_df = pd.DataFrame({"Sentiment": ["Positive", "Neutral", "Negative"], "Count": [c_pos, c_neu, c_neg]})
1236
  csv_download(bar_df, "Download CSV", "sentiment_distribution.csv")
1237
  if show_bar_data:
1238
  st.dataframe(bar_df, width='stretch', hide_index=True)
1239
  st.markdown('</div>', unsafe_allow_html=True)
1240
 
1241
+ with col_s2:
1242
  st.markdown('<div class="chart-wrap">', unsafe_allow_html=True)
1243
+ st.markdown('<div class="chart-title">Sentiment Donut</div><div class="chart-sub">All-time proportional share per class</div>', unsafe_allow_html=True)
1244
  fig_pie = go.Figure(go.Pie(
1245
  labels=["Positive", "Neutral", "Negative"],
1246
+ values=[c_pos, c_neu, c_neg],
1247
  marker_colors=["#22c55e", "#eab308", "#ef4444"],
1248
  hole=0.58,
1249
  textinfo="percent",
 
1252
  fig_pie.update_layout(
1253
  **{**plotly_layout(260),
1254
  "showlegend": True,
1255
+ "legend": dict(orientation="h", y=-0.08, font=dict(size=11, color="#f1f5f9"))}
1256
  )
1257
  st.plotly_chart(fig_pie, width='stretch', config={"displayModeBar": False})
1258
  pie_hdr, pie_dl = st.columns([1, 1])
 
1261
  with pie_dl:
1262
  pie_df = pd.DataFrame({
1263
  "Sentiment": ["Positive", "Neutral", "Negative"],
1264
+ "Count": [c_pos, c_neu, c_neg],
1265
+ "Percentage": [f"{c_pos/c_total*100:.1f}%", f"{c_neu/c_total*100:.1f}%", f"{c_neg/c_total*100:.1f}%"]
1266
  })
1267
  csv_download(pie_df, "Download CSV", "sentiment_breakdown.csv")
1268
  if show_pie_data:
1269
  st.dataframe(pie_df, width='stretch', hide_index=True)
1270
  st.markdown('</div>', unsafe_allow_html=True)
1271
 
1272
+ with col_t1:
1273
+ st.markdown('<div class="chart-wrap">', unsafe_allow_html=True)
1274
+ st.markdown('<div class="chart-title">Topic Distribution</div><div class="chart-sub">Message count by topic class</div>', unsafe_allow_html=True)
1275
+ _tc_vals = [int((all_df["topic"] == l).sum()) if "topic" in all_df.columns else 0 for l in TOPIC_LABELS]
1276
+ _tc_colors = ["#f59e0b", "#3b82f6", "#ec4899", "#ef4444", "#6b7280", "#10b981"]
1277
+ fig_tbar = go.Figure(go.Bar(
1278
+ x=TOPIC_LABELS,
1279
+ y=_tc_vals,
1280
+ marker_color=_tc_colors,
1281
+ marker_line_width=0,
1282
+ text=_tc_vals,
1283
+ textposition="outside",
1284
+ textfont=dict(size=11),
1285
+ hovertemplate="<b>%{x}</b><br>Count: %{y}<extra></extra>",
1286
+ ))
1287
+ _tbar_layout = plotly_layout(260)
1288
+ _tbar_layout["xaxis"]["tickfont"] = dict(size=9)
1289
+ fig_tbar.update_layout(**_tbar_layout)
1290
+ st.plotly_chart(fig_tbar, width='stretch', config={"displayModeBar": False})
1291
+ st.markdown('</div>', unsafe_allow_html=True)
1292
+
1293
+ with col_t2:
1294
+ st.markdown('<div class="chart-wrap">', unsafe_allow_html=True)
1295
+ st.markdown('<div class="chart-title">Topic Donut</div><div class="chart-sub">Proportional share per topic</div>', unsafe_allow_html=True)
1296
+ fig_tpie = go.Figure(go.Pie(
1297
+ labels=TOPIC_LABELS,
1298
+ values=_tc_vals,
1299
+ marker_colors=_tc_colors,
1300
+ hole=0.58,
1301
+ textinfo="percent",
1302
+ hovertemplate="<b>%{label}</b><br>%{value} messages (%{percent})<extra></extra>",
1303
+ ))
1304
+ fig_tpie.update_layout(
1305
+ **{**plotly_layout(260),
1306
+ "showlegend": True,
1307
+ "legend": dict(orientation="h", y=-0.08, font=dict(size=10, color="#f1f5f9"))}
1308
+ )
1309
+ st.plotly_chart(fig_tpie, width='stretch', config={"displayModeBar": False})
1310
+ st.markdown('</div>', unsafe_allow_html=True)
1311
+
1312
  # ── Confidence trend ──────────────────────────────────────────
1313
  if "confidence" in df.columns:
1314
  st.divider()
 
1431
  st.dataframe(topic_df, width='stretch', hide_index=True)
1432
  st.markdown('</div>', unsafe_allow_html=True)
1433
 
1434
+ # ── Topic Sentiment breakdown ──────────────────────────────────
1435
+ st.markdown('<div class="chart-wrap" style="margin-top:16px;">', unsafe_allow_html=True)
1436
+ st.markdown('<div class="chart-title">Sentiment by Topic</div><div class="chart-sub">% positive / neutral / negative within each topic category</div>', unsafe_allow_html=True)
1437
+ _topic_sent_data = []
1438
+ for _lbl in TOPIC_LABELS:
1439
+ _mask = all_df["topic"] == _lbl
1440
+ _total = int(_mask.sum())
1441
+ if _total == 0:
1442
+ _topic_sent_data.append({"topic": _lbl, "pos": 0, "neu": 0, "neg": 0})
1443
+ continue
1444
+ _sub = all_df[_mask]
1445
+ _topic_sent_data.append({
1446
+ "topic": _lbl,
1447
+ "pos": round((_sub["sentiment"] == "Positive").sum() / _total * 100),
1448
+ "neu": round((_sub["sentiment"] == "Neutral").sum() / _total * 100),
1449
+ "neg": round((_sub["sentiment"] == "Negative").sum() / _total * 100),
1450
+ })
1451
+ fig_ts = go.Figure()
1452
+ for _sk, _sl, _sc in [("neg", "Neg", "#ef4444"), ("neu", "Neu", "#eab308"), ("pos", "Pos", "#22c55e")]:
1453
+ fig_ts.add_trace(go.Bar(
1454
+ y=[d["topic"] for d in _topic_sent_data],
1455
+ x=[d[_sk] for d in _topic_sent_data],
1456
+ name=_sl,
1457
+ orientation="h",
1458
+ marker_color=_sc,
1459
+ hovertemplate="<b>%{y}</b><br>" + _sl + ": %{x}%<extra></extra>",
1460
+ ))
1461
+ _layout_ts = plotly_layout(260)
1462
+ _layout_ts["barmode"] = "stack"
1463
+ _layout_ts["showlegend"] = True
1464
+ _layout_ts["legend"] = dict(orientation="h", y=1.08, x=0.35, font=dict(size=11))
1465
+ _layout_ts["xaxis"]["range"] = [0, 100]
1466
+ _layout_ts["xaxis"]["ticksuffix"] = "%"
1467
+ _layout_ts["yaxis"]["autorange"] = "reversed"
1468
+ fig_ts.update_layout(**_layout_ts)
1469
+ st.plotly_chart(fig_ts, width='stretch', config={"displayModeBar": False})
1470
+ st.markdown('</div>', unsafe_allow_html=True)
1471
+
1472
+
1473
+ # ── ACTION TYPE CHARTS ────────────────────────────────────────
1474
+ st.divider()
1475
+ st.markdown(
1476
+ '<div class="sec-hdr"><span class="sec-ttl">Action Type Analysis</span><span class="sec-pill">Last 100 msgs</span></div>',
1477
+ unsafe_allow_html=True
1478
+ )
1479
+
1480
+ # Category groupings
1481
+ _QUESTION_ACTIONS = [
1482
+ "Access & Support",
1483
+ "Academic / Lecture / Concept Doubts",
1484
+ "Study Materials, Deliverables & Learning Resources",
1485
+ "Batch details / structure / offerings (incl faculty)",
1486
+ "Schedule & logistics (Batch)",
1487
+ "Guidance- What should I take/do?",
1488
+ "Suitability & Sufficiency (Is this enough?)",
1489
+ "Eligibility & audience fit - Can I take this?",
1490
+ "Information- Exam",
1491
+ "Information- Post Exam",
1492
+ ]
1493
+ _REQUEST_ACTIONS = [
1494
+ "Content requests",
1495
+ "Content Feedback",
1496
+ "Faculty Request",
1497
+ "Faculty Feedback",
1498
+ "Academic requests",
1499
+ "Language Request",
1500
+ "Language medium",
1501
+ "Product/feature requests (non-content)",
1502
+ "Offline expansion & event-city requests",
1503
+ "General Feedback",
1504
+ "Others",
1505
+ ]
1506
+ _SHORT_ACTION = {
1507
+ "Access & Support": "Access & Support",
1508
+ "Academic / Lecture / Concept Doubts": "Academic Doubts",
1509
+ "Study Materials, Deliverables & Learning Resources": "Study Materials & Learning Resources",
1510
+ "Batch details / structure / offerings (incl faculty)": "Batch Details & Offerings",
1511
+ "Schedule & logistics (Batch)": "Batch Schedule & Logistics",
1512
+ "Guidance- What should I take/do?": "Guidance (What Should I Take/Do?)",
1513
+ "Suitability & Sufficiency (Is this enough?)": "Suitability & Sufficiency (Is This Enough?)",
1514
+ "Eligibility & audience fit - Can I take this?": "Eligibility (Can I Take This?)",
1515
+ "Information- Exam": "Exam Information",
1516
+ "Information- Post Exam": "Post Exam Information",
1517
+ "Content requests": "Content requests",
1518
+ "Content Feedback": "Content Feedback",
1519
+ "Faculty Request": "Faculty Request",
1520
+ "Faculty Feedback": "Faculty Feedback",
1521
+ "Academic requests": "Academic requests",
1522
+ "Language Request": "Language Request",
1523
+ "Language medium": "Language Medium",
1524
+ "Product/feature requests (non-content)": "Non Content Product Requests",
1525
+ "Offline expansion & event-city requests": "Offline Expansion & Event Requests",
1526
+ "General Feedback": "General Feedback",
1527
+ "Others": "Others",
1528
+ }
1529
+
1530
+ # Compute counts from last 100 messages
1531
+ _at_counts: dict[str, int] = {}
1532
+ if "action_type" in all_df.columns:
1533
+ for _at in _QUESTION_ACTIONS + _REQUEST_ACTIONS:
1534
+ _at_counts[_at] = int((all_df.tail(100)["action_type"] == _at).sum())
1535
+ else:
1536
+ _at_counts = {_at: 0 for _at in _QUESTION_ACTIONS + _REQUEST_ACTIONS}
1537
+
1538
+ _q_data = {k: _at_counts.get(k, 0) for k in _QUESTION_ACTIONS if _at_counts.get(k, 0) > 0}
1539
+ _rf_data = {k: _at_counts.get(k, 0) for k in _REQUEST_ACTIONS if _at_counts.get(k, 0) > 0}
1540
+ _q_total = sum(_q_data.values())
1541
+ _rf_total = sum(_rf_data.values())
1542
+
1543
+ _at_col1, _at_col2 = st.columns(2)
1544
+
1545
+ with _at_col1:
1546
+ st.markdown(
1547
+ f'<div class="chart-wrap"><div class="chart-title">Type of Questions</div>'
1548
+ f'<div class="chart-sub">({_q_total} comments)</div>',
1549
+ unsafe_allow_html=True
1550
+ )
1551
+ if _q_data:
1552
+ _q_sorted = sorted(_q_data.items(), key=lambda x: x[1], reverse=True)
1553
+ _q_labels = [_SHORT_ACTION.get(k, k) for k, _ in _q_sorted]
1554
+ _q_vals = [v for _, v in _q_sorted]
1555
+ fig_q = go.Figure(go.Bar(
1556
+ x=_q_labels, y=_q_vals,
1557
+ marker_color="#4a90d9",
1558
+ marker_line_width=0,
1559
+ text=_q_vals, textposition="outside",
1560
+ textfont=dict(size=11, color="#fff"),
1561
+ hovertemplate="<b>%{x}</b><br>Comments: %{y}<extra></extra>",
1562
+ ))
1563
+ fig_q.update_layout(**plotly_layout(280))
1564
+ st.plotly_chart(fig_q, width='stretch', config={"displayModeBar": False})
1565
+ else:
1566
+ st.markdown('<div style="text-align:center;padding:40px;color:var(--text-3);">No data yet</div>', unsafe_allow_html=True)
1567
+ st.markdown('</div>', unsafe_allow_html=True)
1568
+
1569
+ with _at_col2:
1570
+ st.markdown(
1571
+ f'<div class="chart-wrap"><div class="chart-title">Type of Requests &amp; Feedback</div>'
1572
+ f'<div class="chart-sub">({_rf_total} comments)</div>',
1573
+ unsafe_allow_html=True
1574
+ )
1575
+ if _rf_data:
1576
+ _rf_sorted = sorted(_rf_data.items(), key=lambda x: x[1], reverse=True)
1577
+ _rf_labels = [_SHORT_ACTION.get(k, k) for k, _ in _rf_sorted]
1578
+ _rf_vals = [v for _, v in _rf_sorted]
1579
+ fig_rf = go.Figure(go.Bar(
1580
+ x=_rf_labels, y=_rf_vals,
1581
+ marker_color="#f5a623",
1582
+ marker_line_width=0,
1583
+ text=_rf_vals, textposition="outside",
1584
+ textfont=dict(size=11, color="#fff"),
1585
+ hovertemplate="<b>%{x}</b><br>Comments: %{y}<extra></extra>",
1586
+ ))
1587
+ fig_rf.update_layout(**plotly_layout(280))
1588
+ st.plotly_chart(fig_rf, width='stretch', config={"displayModeBar": False})
1589
+ else:
1590
+ st.markdown('<div style="text-align:center;padding:40px;color:var(--text-3);">No data yet</div>', unsafe_allow_html=True)
1591
+ st.markdown('</div>', unsafe_allow_html=True)
1592
+
1593
+ # Top-5 horizontal bar panels
1594
+ _top5_col1, _top5_col2 = st.columns(2)
1595
+
1596
+ def _hbar_rows_html(data: dict, color: str, max_val: int) -> str:
1597
+ html = ""
1598
+ for cat, count in sorted(data.items(), key=lambda x: x[1], reverse=True)[:5]:
1599
+ pct = round(count / max(max_val, 1) * 100)
1600
+ label = _SHORT_ACTION.get(cat, cat)
1601
+ html += (
1602
+ f'<div style="display:flex;align-items:center;gap:10px;margin-bottom:10px;">'
1603
+ f'<div style="width:170px;font-size:12px;text-align:right;opacity:0.85;line-height:1.3;">{label}</div>'
1604
+ f'<div style="flex:1;height:22px;border-radius:4px;background:rgba(255,255,255,0.06);overflow:hidden;">'
1605
+ f'<div style="width:{pct}%;height:100%;background:{color};border-radius:4px;'
1606
+ f'display:flex;align-items:center;justify-content:flex-end;padding-right:6px;'
1607
+ f'font-size:11px;font-weight:700;color:#fff;">{pct}%</div>'
1608
+ f'</div></div>'
1609
+ )
1610
+ return html
1611
+
1612
+ with _top5_col1:
1613
+ st.markdown(
1614
+ '<div class="chart-wrap">'
1615
+ '<div class="chart-title">Top 5 <span style="color:#60a5fa;">Questions</span> Students Ask</div>'
1616
+ '<div class="chart-sub">Type of action count for Questions across tagged videos.</div>',
1617
+ unsafe_allow_html=True
1618
+ )
1619
+ if _q_data:
1620
+ st.markdown(_hbar_rows_html(_q_data, "#f87171", max(_q_data.values(), default=1)), unsafe_allow_html=True)
1621
+ else:
1622
+ st.markdown('<div style="text-align:center;padding:20px;color:var(--text-3);">No data yet</div>', unsafe_allow_html=True)
1623
+ st.markdown('</div>', unsafe_allow_html=True)
1624
+
1625
+ with _top5_col2:
1626
+ st.markdown(
1627
+ '<div class="chart-wrap">'
1628
+ '<div class="chart-title">Top 5 Types of <span style="color:#f87171;">Requests &amp; Feedback</span> Students Give</div>'
1629
+ '<div class="chart-sub">Type of action count for Request/Feedback across tagged videos.</div>',
1630
+ unsafe_allow_html=True
1631
+ )
1632
+ if _rf_data:
1633
+ st.markdown(_hbar_rows_html(_rf_data, "#f87171", max(_rf_data.values(), default=1)), unsafe_allow_html=True)
1634
+ else:
1635
+ st.markdown('<div style="text-align:center;padding:20px;color:var(--text-3);">No data yet</div>', unsafe_allow_html=True)
1636
+ st.markdown('</div>', unsafe_allow_html=True)
1637
+
1638
 
1639
  # ── ENGAGEMENT SCORE ─────────────────────────────────────────
1640
  st.divider()
 
1676
  unsafe_allow_html=True
1677
  )
1678
 
1679
+ _contrib_json = json.dumps([{"author": m.get("author",""), "sentiment": m.get("sentiment","Neutral"), "topic": m.get("topic","General")} for m in all_data])
1680
  contributors = compute_top_contributors(_contrib_json)
1681
 
1682
  if contributors:
1683
  max_count = contributors[0]["count"]
1684
+ rank_icons = {1: "πŸ₯‡", 2: "πŸ₯ˆ", 3: "πŸ₯‰"}
1685
+ rank_classes = {1: "gold", 2: "silver", 3: "bronze"}
1686
+ for rank, c in enumerate(contributors, 1):
1687
+ bar_pct = int(c["count"] / max(max_count, 1) * 100)
1688
+ rank_cls = rank_classes.get(rank, "")
1689
+ rank_icon = rank_icons.get(rank, f"#{rank}")
1690
+ author = c["author"]
1691
+ count = c["count"]
1692
+ pos_pct = c["pos_pct"]
1693
+ neu_pct = c["neu_pct"]
1694
+ neg_pct = c["neg_pct"]
1695
+ html = (
1696
+ f'<div class="leaderboard-row">'
1697
+ f'<div class="lb-rank {rank_cls}">{rank_icon}</div>'
1698
+ f'<div class="lb-author">{author}</div>'
1699
+ f'<div class="lb-bar"><div class="lb-bar-fill" style="width:{bar_pct}%;background:var(--accent);"></div></div>'
1700
+ f'<div class="lb-sent">'
1701
+ f'<span class="lb-dot" style="background:#22c55e;" title="Positive {pos_pct}%"></span>'
1702
+ f'<span class="lb-dot" style="background:#eab308;" title="Neutral {neu_pct}%"></span>'
1703
+ f'<span class="lb-dot" style="background:#ef4444;" title="Negative {neg_pct}%"></span>'
1704
+ f'</div>'
1705
+ f'<div class="lb-count">{count} msgs</div>'
1706
+ f'</div>'
1707
+ )
1708
+ st.markdown(html, unsafe_allow_html=True)
1709
+
1710
+ # ── Combined Sentiment + Topic dual-bar chart ──────────────
1711
+ st.markdown('<div class="chart-wrap" style="margin-top:16px;">', unsafe_allow_html=True)
1712
+ st.markdown(
1713
+ '<div class="chart-title">Sentiment &amp; Topic Breakdown β€” Top Contributors</div>'
1714
+ '<div class="chart-sub">Top bar = sentiment (Neg/Neu/Pos) Β· Bottom bar = topic mix Β· right = message count</div>',
1715
+ unsafe_allow_html=True
1716
+ )
1717
+
1718
+ # Each user occupies 2 numeric slots: sentiment at i*2+0.3, topic at i*2-0.3
1719
+ # Tick label sits at i*2 (midpoint) showing the name once
1720
+ n = len(contributors)
1721
+ y_sent_num = [i * 2 + 0.3 for i in range(n)]
1722
+ y_topic_num = [i * 2 - 0.3 for i in range(n)]
1723
+ tick_vals = [i * 2 for i in range(n)]
1724
+ tick_text = [c["author"][:22] for c in contributors]
1725
+
1726
+ fig_combo = go.Figure()
1727
+
1728
+ # ── Sentiment traces ──
1729
+ for key, label, color in [
1730
+ ("neg_pct", "Neg", "#ef4444"),
1731
+ ("neu_pct", "Neu", "#eab308"),
1732
+ ("pos_pct", "Pos", "#22c55e"),
1733
+ ]:
1734
+ fig_combo.add_trace(go.Bar(
1735
+ name=label,
1736
+ y=y_sent_num,
1737
+ x=[c[key] for c in contributors],
1738
+ orientation="h",
1739
+ marker_color=color,
1740
+ legendgroup="sent",
1741
+ legendgrouptitle_text="Sentiment" if key == "neg_pct" else None,
1742
+ width=0.5,
1743
+ hovertemplate="<b>" + label + "</b>: %{x}%<extra></extra>",
1744
+ ))
1745
+
1746
+ # ── Topic traces ──
1747
+ for key, label, color in [
1748
+ ("t_appr", "Appreciation", "#f59e0b"),
1749
+ ("t_ques", "Question", "#3b82f6"),
1750
+ ("t_rf", "Request/Feedback","#8b5cf6"),
1751
+ ("t_promo", "Promo", "#ec4899"),
1752
+ ("t_spam", "Spam", "#ef4444"),
1753
+ ("t_gen", "General", "#6b7280"),
1754
+ ("t_mcq", "MCQ Answer", "#10b981"),
1755
+ ]:
1756
+ fig_combo.add_trace(go.Bar(
1757
+ name=label,
1758
+ y=y_topic_num,
1759
+ x=[c[key] for c in contributors],
1760
+ orientation="h",
1761
+ marker_color=color,
1762
+ legendgroup="topic",
1763
+ legendgrouptitle_text="Topic" if key == "t_appr" else None,
1764
+ width=0.5,
1765
+ hovertemplate="<b>" + label + "</b>: %{x}%<extra></extra>",
1766
+ ))
1767
+
1768
+ # ── Message count annotations (right of sentiment bar) ──
1769
+ annotations = []
1770
+ for i, c in enumerate(contributors):
1771
+ annotations.append(dict(
1772
+ x=102, y=y_sent_num[i],
1773
+ text=f"<b>{c['count']} msgs</b>",
1774
+ showarrow=False,
1775
+ xanchor="left",
1776
+ font=dict(size=10, color="#94a3b8"),
1777
+ xref="x", yref="y",
1778
+ ))
1779
+
1780
+ chart_h = max(400, n * 56)
1781
+ layout_combo = plotly_layout(chart_h)
1782
+ layout_combo["barmode"] = "stack"
1783
+ layout_combo["bargap"] = 0.1
1784
+ layout_combo["showlegend"] = True
1785
+ layout_combo["legend"] = dict(
1786
+ orientation="h", y=1.0, x=0,
1787
+ font=dict(size=12, color="#f1f5f9"),
1788
+ title_font=dict(size=12, color="#a78bfa"),
1789
+ groupclick="toggleitem",
1790
+ yanchor="bottom",
1791
+ xanchor="left",
1792
+ bgcolor="rgba(0,0,0,0)",
1793
+ )
1794
+ layout_combo["margin"] = dict(l=10, r=80, t=80, b=10)
1795
+ layout_combo["xaxis"]["range"] = [0, 115]
1796
+ layout_combo["xaxis"]["ticksuffix"] = "%"
1797
+ layout_combo["yaxis"] = dict(
1798
+ tickvals=tick_vals,
1799
+ ticktext=tick_text,
1800
+ tickfont=dict(size=10),
1801
+ autorange="reversed",
1802
+ showgrid=False,
1803
+ zeroline=False,
1804
+ showline=False,
1805
+ )
1806
+ layout_combo["annotations"] = annotations
1807
+ fig_combo.update_layout(**layout_combo)
1808
+
1809
+ st.plotly_chart(fig_combo, width='stretch', config={"displayModeBar": False})
1810
+ st.markdown('</div>', unsafe_allow_html=True)
1811
 
1812
  contrib_df = pd.DataFrame(contributors)
1813
  csv_download(contrib_df, "Download CSV", "top_contributors.csv")
1814
  else:
1815
  st.info("Not enough data yet.")
1816
 
1817
+ # ── REPEAT SPAMMERS ───────────────────────────────────────────
1818
+ st.divider()
1819
+ st.markdown(
1820
+ '<div class="sec-hdr"><span class="sec-ttl">Repeat Spammers</span><span class="sec-pill">All Time</span></div>',
1821
+ unsafe_allow_html=True
1822
+ )
1823
+ rs_col1, rs_col2 = st.columns([1, 1])
1824
+ with rs_col1:
1825
+ rs_window = st.slider("Time window (sec)", 5, 60, 15, key="rs_window")
1826
+ with rs_col2:
1827
+ rs_min = st.slider("Min repeats to flag", 2, 10, 2, key="rs_min")
1828
+
1829
+ _rs_json = json.dumps([{
1830
+ "author": m.get("author",""), "text": m.get("text",""),
1831
+ "topic": m.get("topic","General"), "sentiment": m.get("sentiment","Neutral"),
1832
+ "time": m.get("time","")
1833
+ } for m in all_data])
1834
+ repeat_spammers = detect_repeat_spammers(_rs_json, window_sec=rs_window, min_repeats=rs_min)
1835
+
1836
+ if repeat_spammers:
1837
+ st.markdown(
1838
+ f'<div style="font-size:0.78rem;color:var(--text-3);margin-bottom:12px;">'
1839
+ f'Found <b style="color:var(--text-1);">{len(repeat_spammers)}</b> users repeating the same message '
1840
+ f'β‰₯{rs_min}Γ— within {rs_window}s</div>',
1841
+ unsafe_allow_html=True
1842
+ )
1843
+ for rs in repeat_spammers:
1844
+ _t_color = TOPIC_COLOR.get(rs["topic"], "#6b7280")
1845
+ _s_color = SENT_COLORS.get(rs["sentiment"], "#6b7280")
1846
+ _burst = rs["max_burst"]
1847
+ _total = rs["count"]
1848
+ _severity = "#ef4444" if _burst >= 5 else "#eab308" if _burst >= 3 else "#f59e0b"
1849
+ st.markdown(
1850
+ f'<div class="chat-card" style="border-left:3px solid {_severity};">'
1851
+ f'<div style="display:flex;align-items:center;justify-content:space-between;margin-bottom:6px;">'
1852
+ f'<div class="chat-author">⚠️ {rs["author"]}</div>'
1853
+ f'<div style="display:flex;gap:6px;">'
1854
+ f'<span class="badge" style="color:{_severity};border-color:{_severity}44;">'
1855
+ f'πŸ” {_burst}Γ— in {rs_window}s</span>'
1856
+ f'<span class="badge" style="color:var(--text-3);">{_total} total</span>'
1857
+ f'</div></div>'
1858
+ f'<div class="chat-text">"{rs["text"]}"</div>'
1859
+ f'<div class="chat-badges">'
1860
+ f'<span class="badge" style="color:{_s_color};border-color:{_s_color}33;">{rs["sentiment"]}</span>'
1861
+ f'<span class="badge" style="color:{_t_color};border-color:{_t_color}33;">{rs["topic"]}</span>'
1862
+ f'<span class="badge">First: {rs["first_seen"]}</span>'
1863
+ f'<span class="badge">Last: {rs["last_seen"]}</span>'
1864
+ f'</div></div>',
1865
+ unsafe_allow_html=True
1866
+ )
1867
+ rs_df = pd.DataFrame(repeat_spammers)
1868
+ csv_download(rs_df, "Download CSV", "repeat_spammers.csv")
1869
+ else:
1870
+ st.markdown(
1871
+ '<div style="font-size:0.84rem;color:var(--text-3);padding:12px 0;">No repeat spammers detected in current window.</div>',
1872
+ unsafe_allow_html=True
1873
+ )
1874
+
1875
  # ── WORD CLOUD ────────────────────────────────────────────────
1876
  st.divider()
1877
  st.markdown(
 
1947
  unsafe_allow_html=True
1948
  )
1949
 
1950
+ # ── Load all stream data ONCE (fix double-load) ───────────
1951
+ _stream_cache: dict[str, dict] = {}
1952
+ for _s in active_streams:
1953
+ _rkey = _s["redis_key"]
1954
+ _raw = load_stream_data(_rkey)
1955
+ if not _raw:
1956
+ continue
1957
+ _sdf = pd.DataFrame(_raw)
1958
+ _sdf["sentiment"] = _sdf["sentiment"].apply(clean_sentiment)
1959
+ _sdf["topic"] = _sdf["topic"].apply(clean_topic) if "topic" in _sdf.columns else "General"
1960
+ _sc = _sdf["sentiment"].value_counts().to_dict()
1961
+ _p = _sc.get("Positive", 0)
1962
+ _n = _sc.get("Neutral", 0)
1963
+ _g = _sc.get("Negative", 0)
1964
+ _t = max(_p + _n + _g, 1)
1965
+ _tc = {lbl: int((_sdf["topic"] == lbl).sum()) for lbl in TOPIC_LABELS}
1966
+ _top_topic = max(_tc, key=_tc.get)
1967
+ _eng_json = json.dumps([
1968
+ {"sentiment": m.get("sentiment","Neutral"),
1969
+ "topic": m.get("topic","General"),
1970
+ "time": m.get("time","")} for m in _raw
1971
+ ])
1972
+ _eng = compute_engagement(_eng_json)
1973
+ _title = _s.get("video_title") or _s.get("video_id") or _rkey
1974
+ _stream_cache[_rkey] = {
1975
+ "df": _sdf, "raw": _raw,
1976
+ "p": _p, "n": _n, "g": _g, "t": _t,
1977
+ "tc": _tc, "top_topic": _top_topic,
1978
+ "eng": _eng, "title": _title,
1979
+ "sidx": st.session_state.streams.index(_s),
1980
+ }
1981
+
1982
+ # ── Head-to-head comparison table ─────────────────────────
1983
+ st.markdown('<div class="chart-wrap" style="margin-bottom:16px;">', unsafe_allow_html=True)
1984
+ st.markdown('<div class="chart-title">Head-to-Head Summary</div><div class="chart-sub">All active streams at a glance</div>', unsafe_allow_html=True)
1985
+ _hth_rows = []
1986
+ for _s in active_streams:
1987
+ _rkey = _s["redis_key"]
1988
+ if _rkey not in _stream_cache:
1989
+ continue
1990
+ _c = _stream_cache[_rkey]
1991
+ _sidx = _c["sidx"]
1992
+ _hth_rows.append({
1993
+ "Stream": f"Stream {STREAM_NAMES[_sidx]}",
1994
+ "Title": _c["title"][:30],
1995
+ "Messages": _c["t"],
1996
+ "Positive %": f"{_c['p']/_c['t']*100:.1f}%",
1997
+ "Neutral %": f"{_c['n']/_c['t']*100:.1f}%",
1998
+ "Negative %": f"{_c['g']/_c['t']*100:.1f}%",
1999
+ "Top Topic": _c["top_topic"],
2000
+ "Engagement": f"{_c['eng']['score']}/100 {_c['eng']['grade']}",
2001
+ })
2002
+ if _hth_rows:
2003
+ st.dataframe(pd.DataFrame(_hth_rows), hide_index=True, use_container_width=True)
2004
+ st.markdown('</div>', unsafe_allow_html=True)
2005
+
2006
+ # ── Per-stream sentiment + topic + engagement cards ────────
2007
+ chunk_size = 2
2008
+ _cached_keys = [_s["redis_key"] for _s in active_streams if _s["redis_key"] in _stream_cache]
2009
+ for row_start in range(0, len(_cached_keys), chunk_size):
2010
+ row_keys = _cached_keys[row_start:row_start + chunk_size]
2011
+ cols = st.columns(len(row_keys))
2012
+ for col, _rkey in zip(cols, row_keys):
2013
+ _c = _stream_cache[_rkey]
2014
+ _sidx = _c["sidx"]
2015
+ color = STREAM_COLORS[_sidx]
2016
+ slabel = STREAM_NAMES[_sidx]
2017
+ _p, _n, _g, _t = _c["p"], _c["n"], _c["g"], _c["t"]
2018
+ _eng = _c["eng"]
2019
+ _tc = _c["tc"]
2020
  with col:
2021
  st.markdown(
2022
  f'<span class="compare-label" style="background:{color}18;color:{color};border:1px solid {color}44;">'
2023
+ f'Stream {slabel} Β· {_c["title"][:25]}</span>',
2024
  unsafe_allow_html=True
2025
  )
2026
+ _ec = "#22c55e" if _eng["score"] >= 70 else "#eab308" if _eng["score"] >= 40 else "#ef4444"
2027
  st.markdown(
2028
+ f'<div style="display:flex;gap:10px;margin:6px 0 10px;flex-wrap:wrap;">'
2029
+ f'<div style="background:var(--bg-card);border:1px solid {_ec}44;border-radius:12px;padding:8px 14px;">'
2030
+ f'<div style="font-size:1.4rem;font-weight:800;color:{_ec};">{_eng["score"]}</div>'
2031
+ f'<div style="font-size:0.68rem;color:var(--text-3);text-transform:uppercase;">Engagement</div>'
2032
+ f'</div>'
2033
+ f'<div style="background:var(--bg-card);border:1px solid var(--border);border-radius:12px;padding:8px 14px;">'
2034
+ f'<div style="font-size:1.4rem;font-weight:800;color:var(--text-1);">{_t}</div>'
2035
+ f'<div style="font-size:0.68rem;color:var(--text-3);text-transform:uppercase;">Messages</div>'
2036
+ f'</div>'
2037
+ f'<div style="background:var(--bg-card);border:1px solid var(--border);border-radius:12px;padding:8px 14px;">'
2038
+ f'<div style="font-size:1.4rem;font-weight:800;color:#22c55e;">{_p/_t*100:.0f}%</div>'
2039
+ f'<div style="font-size:0.68rem;color:var(--text-3);text-transform:uppercase;">Positive</div>'
2040
+ f'</div>'
2041
+ f'</div>',
2042
  unsafe_allow_html=True
2043
  )
2044
+ st.markdown('<div class="chart-wrap" style="margin-bottom:8px;">', unsafe_allow_html=True)
2045
+ st.markdown('<div class="chart-title" style="font-size:0.78rem;">Sentiment</div>', unsafe_allow_html=True)
2046
+ fig_s = go.Figure(go.Bar(
2047
+ x=["Pos", "Neu", "Neg"],
2048
+ y=[_p, _n, _g],
2049
+ marker_color=["#22c55e", "#eab308", "#ef4444"],
2050
+ marker_line_width=0,
2051
+ text=[_p, _n, _g],
2052
+ textposition="outside",
2053
+ hovertemplate="<b>%{x}</b>: %{y}<extra></extra>",
2054
+ ))
2055
+ fig_s.update_layout(**plotly_layout(180))
2056
+ st.plotly_chart(fig_s, width='stretch', config={"displayModeBar": False})
2057
+ st.markdown('</div>', unsafe_allow_html=True)
2058
+ st.markdown('<div class="chart-wrap">', unsafe_allow_html=True)
2059
+ st.markdown('<div class="chart-title" style="font-size:0.78rem;">Topic Breakdown</div>', unsafe_allow_html=True)
2060
+ _tc_colors = ["#f59e0b","#3b82f6","#ec4899","#ef4444","#6b7280","#10b981"]
2061
+ fig_t = go.Figure(go.Bar(
2062
+ x=TOPIC_LABELS,
2063
+ y=[_tc[l] for l in TOPIC_LABELS],
2064
+ marker_color=_tc_colors,
2065
+ marker_line_width=0,
2066
+ text=[_tc[l] for l in TOPIC_LABELS],
2067
+ textposition="outside",
2068
+ hovertemplate="<b>%{x}</b>: %{y}<extra></extra>",
2069
+ ))
2070
+ _tl = plotly_layout(180)
2071
+ _tl["xaxis"]["tickfont"] = dict(size=8)
2072
+ fig_t.update_layout(**_tl)
2073
+ st.plotly_chart(fig_t, width='stretch', config={"displayModeBar": False})
2074
+ st.markdown('</div>', unsafe_allow_html=True)
2075
+
2076
+ # ── Overlay: positive ratio over time (all streams) ────────
2077
  st.markdown('<div class="chart-wrap" style="margin-top:14px;">', unsafe_allow_html=True)
2078
+ st.markdown('<div class="chart-title">Positive Ratio Over Time</div><div class="chart-sub">Rolling positive % per stream (synced refresh)</div>', unsafe_allow_html=True)
2079
  fig_overlay = go.Figure()
2080
+ for _rkey, _c in _stream_cache.items():
2081
+ _sidx = _c["sidx"]
2082
+ color = STREAM_COLORS[_sidx]
2083
+ slabel = STREAM_NAMES[_sidx]
2084
+ _sdf = _c["df"].copy()
2085
+ _sdf["is_pos"] = (_sdf["sentiment"] == "Positive").astype(int)
2086
+ _sdf["rolling"] = _sdf["is_pos"].rolling(10, min_periods=1).mean() * 100
 
 
 
 
2087
  fig_overlay.add_trace(go.Scatter(
2088
+ x=list(range(len(_sdf))),
2089
+ y=_sdf["rolling"],
2090
  mode="lines",
2091
+ name=f"Stream {slabel} Β· {_c['title'][:20]}",
2092
  line=dict(color=color, width=2),
2093
  hovertemplate=f"Stream {slabel} msg %{{x}}: %{{y:.1f}}%<extra></extra>",
2094
  ))
2095
+ layout_ov = plotly_layout(220)
2096
  layout_ov["showlegend"] = True
2097
+ layout_ov["legend"] = dict(orientation="h", y=1.08, font=dict(size=11, color="#f1f5f9"))
2098
  layout_ov["yaxis"]["range"] = [0, 100]
2099
  fig_overlay.update_layout(**layout_ov)
2100
  st.plotly_chart(fig_overlay, width='stretch', config={"displayModeBar": False})
 
2136
  st.rerun()
2137
 
2138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2139
  # ── AUTO REFRESH ──────────────────────────────────────────────
2140
  if auto_refresh:
2141
  time.sleep(refresh_rate)