# pages/stats.py """ Stats & Info view — all analytics charts, engagement, contributors, word cloud. Imports shared infrastructure from app.py via sys.path manipulation. All session state values are set by app.py before this page runs. """ import streamlit as st import json import pandas as pd import plotly.graph_objects as go import time import sys import os sys.path.insert(0, os.path.dirname(os.path.dirname(__file__))) from shared import ( store_llen, load_stream_data, clean_sentiment, clean_topic, csv_download, plotly_layout, compute_velocity, build_heatmap_data, check_alert, compute_engagement, compute_top_contributors, compute_word_freq, check_spam_alert, detect_repeat_spammers, TOPIC_LABELS, TOPIC_COLOR, SENT_COLORS, STREAM_NAMES, STREAM_COLORS, ) # -- Get shared state from session ---------------------------- auto_refresh = st.session_state.get("auto_refresh", True) refresh_rate = st.session_state.get("refresh_rate", 10) msg_limit = st.session_state.get("msg_limit", 50) alert_enabled = st.session_state.get("alert_enabled", True) alert_threshold = st.session_state.get("alert_threshold", 0.4) alert_window = st.session_state.get("alert_window", 15) spam_alert_on = st.session_state.get("spam_alert_on", True) spam_threshold = st.session_state.get("spam_threshold", 0.3) _primary_key = st.session_state.get("_primary_key", "chat_messages") # -- Load data ------------------------------------------------ _CUMULATIVE_CAP = 50_000 _current_len = store_llen(_primary_key) all_data = load_stream_data(_primary_key, limit=_CUMULATIVE_CAP if _current_len > _CUMULATIVE_CAP else None) data = all_data[-msg_limit:] if len(all_data) > msg_limit else all_data if not all_data: st.markdown( '

📭

' '

No messages yet

' '

Set a video ID in the sidebar, then click ▶ Start

' '

', unsafe_allow_html=True ) if auto_refresh: time.sleep(refresh_rate) st.rerun() st.stop() df = pd.DataFrame(data) all_df = pd.DataFrame(all_data) df["sentiment"] = df["sentiment"].apply(clean_sentiment) df["topic"] = df["topic"].apply(clean_topic) if "topic" in df.columns else "General" all_df["sentiment"] = all_df["sentiment"].apply(clean_sentiment) all_df["topic"] = all_df["topic"].apply(clean_topic) if "topic" in all_df.columns else "General" # -- ALERT BANNERS -------------------------------------------- if alert_enabled: alert = check_alert(all_df, threshold=alert_threshold, window=alert_window) total_now = len(all_df) if total_now != st.session_state.last_alert_count: st.session_state.last_alert_count = total_now if alert: st.session_state.alert_dismissed = False if alert and not st.session_state.alert_dismissed: a1, a2 = st.columns([8, 1]) with a1: st.markdown( f'', unsafe_allow_html=True ) with a2: if st.button("✕ Dismiss", key="dismiss_alert"): st.session_state.alert_dismissed = True st.rerun() if spam_alert_on: spam_alert = check_spam_alert(all_df, threshold=spam_threshold, window=alert_window) if spam_alert and not st.session_state.get("spam_dismissed", False): s1, s2 = st.columns([8, 1]) with s1: st.markdown( f'

' f'🛡️' f'

' f'

Spam surge detected — {spam_alert["spam_ratio"]*100:.0f}% spam in last {spam_alert["window"]} messages

' f'

{spam_alert["count"]} spam messages detected. Chat may be under flood attack.

' f'

', unsafe_allow_html=True ) with s2: if st.button("✕", key="dismiss_spam"): st.session_state.spam_dismissed = True st.rerun() elif not spam_alert: st.session_state.spam_dismissed = False # -- CUMULATIVE STATS ----------------------------------------- all_counts = all_df["sentiment"].value_counts().to_dict() c_pos = all_counts.get("Positive", 0) c_neu = all_counts.get("Neutral", 0) c_neg = all_counts.get("Negative", 0) c_total = max(c_pos + c_neu + c_neg, 1) velocity = compute_velocity(json.dumps([{"sentiment": m.get("sentiment","Neutral")} for m in all_data])) st.markdown( '

Cumulative SentimentAll Time

', unsafe_allow_html=True ) v1, v2, v3, v4, v5 = st.columns([1, 1, 1, 1, 1]) with v1: st.markdown( f'

' f'

{c_pos}

Positive

{c_pos/c_total*100:.1f}% of total

', unsafe_allow_html=True ) with v2: st.markdown( f'

' f'

{c_neu}

Neutral

{c_neu/c_total*100:.1f}% of total

', unsafe_allow_html=True ) with v3: st.markdown( f'

' f'

{c_neg}

Negative

{c_neg/c_total*100:.1f}% of total

', unsafe_allow_html=True ) with v4: st.markdown( f'

' f'

{c_total}

Total

all time

', unsafe_allow_html=True ) with v5: vc = velocity["color"] st.markdown( f'

' f'

{velocity["direction"]}

' f'

{velocity["label"]}

' f'

Sentiment Velocity
' f'{velocity["delta"]:+.0%} pos shift

' f'

', unsafe_allow_html=True ) # -- CUMULATIVE TOPIC ----------------------------------------- st.divider() st.markdown( '

Cumulative TopicAll Time

', unsafe_allow_html=True ) _topic_colors_list = ["#f59e0b", "#3b82f6", "#ec4899", "#ef4444", "#6b7280", "#10b981"] _ct_cols = st.columns(len(TOPIC_LABELS)) for _ci, (_lbl, _clr) in enumerate(zip(TOPIC_LABELS, _topic_colors_list)): _cnt = int((all_df["topic"] == _lbl).sum()) if "topic" in all_df.columns else 0 _pct = _cnt / max(c_total, 1) * 100 with _ct_cols[_ci]: st.markdown( f'

' f'

{_cnt}

' f'

{_lbl}

' f'

{_pct:.1f}% of msgs

', unsafe_allow_html=True ) # -- ENGAGEMENT SCORE (moved here — after topic, before window) ---- _eng_json = json.dumps([{"sentiment": m.get("sentiment","Neutral"), "topic": m.get("topic","General"), "time": m.get("time","")} for m in all_data]) eng = compute_engagement(_eng_json) st.divider() st.markdown( '

Engagement ScoreLive

', unsafe_allow_html=True ) ec1, ec2, ec3, ec4 = st.columns([2, 1, 1, 1]) with ec1: score_color = "#22c55e" if eng["score"] >= 70 else "#eab308" if eng["score"] >= 40 else "#ef4444" bar_w = eng["score"] st.markdown( f'

' f'

{eng["score"]}

' f'

Engagement Score / 100 \u2014 {eng["grade"]}

' f'

Msg rate {eng["rate"]}/min

' f'

Positive {eng["pos_ratio"]*100:.0f}%

' f'

Questions {eng["q_density"]*100:.0f}%

' f'

', unsafe_allow_html=True ) with ec2: st.metric("Msgs/min", f"{eng['rate']:.1f}") with ec3: st.metric("Positive ratio", f"{eng['pos_ratio']*100:.0f}%") with ec4: st.metric("Question density", f"{eng['q_density']*100:.0f}%") # -- WINDOW METRICS ------------------------------------------- st.divider() counts = df["sentiment"].value_counts().to_dict() pos = counts.get("Positive", 0) neu = counts.get("Neutral", 0) neg = counts.get("Negative", 0) total = max(pos + neu + neg, 1) st.markdown( f'

Window SnapshotLast {msg_limit} msgs

', unsafe_allow_html=True ) c1, c2, c3, c4 = st.columns(4) c1.metric("Messages", total) c2.metric("Positive", pos, f"{pos/total*100:.1f}%") c3.metric("Neutral", neu, f"{neu/total*100:.1f}%") c4.metric("Negative", neg, f"{neg/total*100:.1f}%") # -- SENTIMENT + TOPIC CHARTS (ALL TIME) ---------------------- st.divider() col_s1, col_s2, col_t1, col_t2 = st.columns(4) with col_s1: st.markdown('

', unsafe_allow_html=True) st.markdown('

Sentiment Distribution

All-time message count by sentiment class

', unsafe_allow_html=True) fig_bar = go.Figure(go.Bar( x=["Positive", "Neutral", "Negative"], y=[c_pos, c_neu, c_neg], marker_color=["#22c55e", "#eab308", "#ef4444"], marker_line_width=0, text=[c_pos, c_neu, c_neg], textposition="outside", textfont=dict(size=12), hovertemplate="%{x}
Count: %{y}", )) fig_bar.update_layout(**plotly_layout(260)) st.plotly_chart(fig_bar, config={"displayModeBar": False}) bar_hdr, bar_dl = st.columns([1, 1]) with bar_hdr: show_bar_data = st.checkbox("View data", key="show_bar") with bar_dl: bar_df = pd.DataFrame({"Sentiment": ["Positive", "Neutral", "Negative"], "Count": [c_pos, c_neu, c_neg]}) csv_download(bar_df, "Download CSV", "sentiment_distribution.csv") if show_bar_data: st.dataframe(bar_df, hide_index=True) st.markdown('

', unsafe_allow_html=True) with col_s2: st.markdown('

', unsafe_allow_html=True) st.markdown('

Sentiment Donut

All-time proportional share per class

', unsafe_allow_html=True) fig_pie = go.Figure(go.Pie( labels=["Positive", "Neutral", "Negative"], values=[c_pos, c_neu, c_neg], marker_colors=["#22c55e", "#eab308", "#ef4444"], hole=0.58, textinfo="percent", hovertemplate="%{label}
%{value} messages (%{percent})", )) fig_pie.update_layout( **{**plotly_layout(260), "showlegend": True, "legend": dict(orientation="h", y=-0.08, font=dict(size=11, color="#f1f5f9"))} ) st.plotly_chart(fig_pie, config={"displayModeBar": False}) pie_hdr, pie_dl = st.columns([1, 1]) with pie_hdr: show_pie_data = st.checkbox("View data", key="show_pie") with pie_dl: pie_df = pd.DataFrame({ "Sentiment": ["Positive", "Neutral", "Negative"], "Count": [c_pos, c_neu, c_neg], "Percentage": [f"{c_pos/c_total*100:.1f}%", f"{c_neu/c_total*100:.1f}%", f"{c_neg/c_total*100:.1f}%"] }) csv_download(pie_df, "Download CSV", "sentiment_breakdown.csv") if show_pie_data: st.dataframe(pie_df, hide_index=True) st.markdown('

', unsafe_allow_html=True) with col_t1: st.markdown('

', unsafe_allow_html=True) st.markdown('

Topic Distribution

All-time message count by topic class

', unsafe_allow_html=True) _tc_vals = [int((all_df["topic"] == l).sum()) if "topic" in all_df.columns else 0 for l in TOPIC_LABELS] _tc_colors = ["#f59e0b", "#3b82f6", "#ec4899", "#ef4444", "#6b7280", "#10b981"] fig_tbar = go.Figure(go.Bar( x=TOPIC_LABELS, y=_tc_vals, marker_color=_tc_colors, marker_line_width=0, text=_tc_vals, textposition="outside", textfont=dict(size=11), hovertemplate="%{x}
Count: %{y}", )) _tbar_layout = plotly_layout(260) _tbar_layout["xaxis"]["tickfont"] = dict(size=9) fig_tbar.update_layout(**_tbar_layout) st.plotly_chart(fig_tbar, config={"displayModeBar": False}) st.markdown('

', unsafe_allow_html=True) with col_t2: st.markdown('

', unsafe_allow_html=True) st.markdown('

Topic Donut

All-time proportional share per topic

', unsafe_allow_html=True) fig_tpie = go.Figure(go.Pie( labels=TOPIC_LABELS, values=_tc_vals, marker_colors=_tc_colors, hole=0.58, textinfo="percent", hovertemplate="%{label}
%{value} messages (%{percent})", )) fig_tpie.update_layout( **{**plotly_layout(260), "showlegend": True, "legend": dict(orientation="h", y=-0.08, font=dict(size=10, color="#f1f5f9"))} ) st.plotly_chart(fig_tpie, config={"displayModeBar": False}) st.markdown('

', unsafe_allow_html=True) # -- SENTIMENT HEATMAP OVER TIME ------------------------------ st.divider() st.markdown( '

Sentiment HeatmapOver Time

', unsafe_allow_html=True ) heatmap_data = build_heatmap_data(json.dumps([{"time": m.get("time",""), "sentiment": m.get("sentiment","Neutral")} for m in all_data]), bucket_minutes=1) if not heatmap_data.empty: st.markdown('

', unsafe_allow_html=True) st.markdown('

Sentiment Over Time

Message volume per sentiment per minute bucket

', unsafe_allow_html=True) fig_heat = go.Figure() for sent, color in [("Positive", "#22c55e"), ("Neutral", "#eab308"), ("Negative", "#ef4444")]: fig_heat.add_trace(go.Scatter( x=heatmap_data["bucket"], y=heatmap_data[sent], name=sent, mode="lines+markers", line=dict(color=color, width=2), marker=dict(size=4), fill="tozeroy" if sent == "Negative" else None, fillcolor=color.replace(")", ",0.08)").replace("rgb", "rgba") if sent == "Negative" else None, hovertemplate=f"{sent}
%{{x}}
Count: %{{y}}", )) layout = plotly_layout(220) layout["showlegend"] = True layout["legend"] = dict(orientation="h", y=1.08, font=dict(size=11)) layout["xaxis"]["tickformat"] = "%H:%M" fig_heat.update_layout(**layout) st.plotly_chart(fig_heat, config={"displayModeBar": False}) heat_hdr, heat_dl = st.columns([1, 1]) with heat_hdr: show_heat_data = st.checkbox("View data", key="show_heat") with heat_dl: csv_download(heatmap_data.rename(columns={"bucket": "time_bucket"}), "Download CSV", "sentiment_heatmap.csv") if show_heat_data: st.dataframe(heatmap_data.rename(columns={"bucket": "time_bucket"}), hide_index=True) st.markdown('

', unsafe_allow_html=True) else: st.info("Not enough timestamped data for heatmap yet.") # -- TOPIC DISTRIBUTION --------------------------------------- st.divider() st.markdown( '

Topic DistributionAll Time

', unsafe_allow_html=True ) topic_counts = { label: int((all_df["topic"] == label).sum()) for label in TOPIC_LABELS } pills = '

' for label in TOPIC_LABELS: color = TOPIC_COLOR[label] count = topic_counts[label] pills += ( f'

' f'

{count}

' f'

{label}

' f'

' ) pills += '

' st.markdown(pills, unsafe_allow_html=True) st.markdown('

', unsafe_allow_html=True) st.markdown('

Topic Breakdown

All-time message count per topic category

', unsafe_allow_html=True) fig_topic = go.Figure(go.Bar( x=TOPIC_LABELS, y=[topic_counts[l] for l in TOPIC_LABELS], marker_color=[TOPIC_COLOR[l] for l in TOPIC_LABELS], marker_line_width=0, text=[topic_counts[l] for l in TOPIC_LABELS], textposition="outside", textfont=dict(size=11), hovertemplate="%{x}
Count: %{y}", )) fig_topic.update_layout(**plotly_layout(250)) st.plotly_chart(fig_topic, config={"displayModeBar": False}) topic_hdr, topic_dl = st.columns([1, 1]) with topic_hdr: show_topic_data = st.checkbox("View data", key="show_topic") with topic_dl: topic_df = pd.DataFrame({"Topic": TOPIC_LABELS, "Count": [topic_counts[l] for l in TOPIC_LABELS]}) csv_download(topic_df, "Download CSV", "topic_distribution.csv") if show_topic_data: st.dataframe(topic_df, hide_index=True) st.markdown('

', unsafe_allow_html=True) # -- Topic Sentiment breakdown -------------------------------- st.markdown('

', unsafe_allow_html=True) st.markdown('

Sentiment by Topic

% positive / neutral / negative within each topic category

', unsafe_allow_html=True) _topic_sent_data = [] for _lbl in TOPIC_LABELS: _mask = all_df["topic"] == _lbl _total = int(_mask.sum()) if _total == 0: _topic_sent_data.append({"topic": _lbl, "pos": 0, "neu": 0, "neg": 0}) continue _sub = all_df[_mask] _topic_sent_data.append({ "topic": _lbl, "pos": round((_sub["sentiment"] == "Positive").sum() / _total * 100), "neu": round((_sub["sentiment"] == "Neutral").sum() / _total * 100), "neg": round((_sub["sentiment"] == "Negative").sum() / _total * 100), }) fig_ts = go.Figure() for _sk, _sl, _sc in [("neg", "Neg", "#ef4444"), ("neu", "Neu", "#eab308"), ("pos", "Pos", "#22c55e")]: fig_ts.add_trace(go.Bar( y=[d["topic"] for d in _topic_sent_data], x=[d[_sk] for d in _topic_sent_data], name=_sl, orientation="h", marker_color=_sc, hovertemplate="%{y}
" + _sl + ": %{x}%", )) _layout_ts = plotly_layout(260) _layout_ts["barmode"] = "stack" _layout_ts["showlegend"] = True _layout_ts["legend"] = dict(orientation="h", y=1.08, x=0.35, font=dict(size=11)) _layout_ts["xaxis"]["range"] = [0, 100] _layout_ts["xaxis"]["ticksuffix"] = "%" _layout_ts["yaxis"]["autorange"] = "reversed" fig_ts.update_layout(**_layout_ts) st.plotly_chart(fig_ts, config={"displayModeBar": False}) st.markdown('

', unsafe_allow_html=True) # -- ACTION TYPE CHARTS --------------------------------------- st.divider() st.markdown( '

Action Type AnalysisLast 100 msgs

', unsafe_allow_html=True ) _QUESTION_ACTIONS_APP = [ "Access & Support", "Academic / Lecture / Concept Doubts", "Study Materials, Deliverables & Learning Resources", "Batch details / structure / offerings (incl faculty)", "Schedule & logistics (Batch)", "Guidance- What should I take/do?", "Suitability & Sufficiency (Is this enough?)", "Eligibility & audience fit - Can I take this?", "Information- Exam", "Information- Post Exam", ] _REQUEST_ACTIONS_APP = [ "Content requests", "Content Feedback", "Faculty Request", "Faculty Feedback", "Academic requests", "Language Request", "Language medium", "Product/feature requests (non-content)", "Offline expansion & event-city requests", "General Feedback", "Others", ] _SHORT_ACTION_APP = { "Access & Support": "Access & Support", "Academic / Lecture / Concept Doubts": "Academic Doubts", "Study Materials, Deliverables & Learning Resources": "Study Materials & Learning Resources", "Batch details / structure / offerings (incl faculty)": "Batch Details & Offerings", "Schedule & logistics (Batch)": "Batch Schedule & Logistics", "Guidance- What should I take/do?": "Guidance (What Should I Take/Do?)", "Suitability & Sufficiency (Is this enough?)": "Suitability & Sufficiency", "Eligibility & audience fit - Can I take this?": "Eligibility (Can I Take This?)", "Information- Exam": "Exam Information", "Information- Post Exam": "Post Exam Information", "Content requests": "Content requests", "Content Feedback": "Content Feedback", "Faculty Request": "Faculty Request", "Faculty Feedback": "Faculty Feedback", "Academic requests": "Academic requests", "Language Request": "Language Request", "Language medium": "Language Medium", "Product/feature requests (non-content)": "Non Content Product Requests", "Offline expansion & event-city requests": "Offline Expansion & Event Requests", "General Feedback": "General Feedback", "Others": "Others", } _at_counts_app: dict[str, int] = {} if "action_type" in all_df.columns: for _at in _QUESTION_ACTIONS_APP + _REQUEST_ACTIONS_APP: _at_counts_app[_at] = int((all_df.tail(100)["action_type"] == _at).sum()) else: _at_counts_app = {_at: 0 for _at in _QUESTION_ACTIONS_APP + _REQUEST_ACTIONS_APP} _q_data_app = {k: _at_counts_app.get(k, 0) for k in _QUESTION_ACTIONS_APP if _at_counts_app.get(k, 0) > 0} _rf_data_app = {k: _at_counts_app.get(k, 0) for k in _REQUEST_ACTIONS_APP if _at_counts_app.get(k, 0) > 0} _q_total_app = sum(_q_data_app.values()) _rf_total_app = sum(_rf_data_app.values()) _at_col1_app, _at_col2_app = st.columns(2) with _at_col1_app: st.markdown( f'

Type of Questions

' f'

({_q_total_app} comments)

', unsafe_allow_html=True ) if _q_data_app: _q_sorted_app = sorted(_q_data_app.items(), key=lambda x: x[1], reverse=True) fig_q_app = go.Figure(go.Bar( x=[_SHORT_ACTION_APP.get(k, k) for k, _ in _q_sorted_app], y=[v for _, v in _q_sorted_app], marker_color="#4a90d9", marker_line_width=0, text=[v for _, v in _q_sorted_app], textposition="outside", textfont=dict(size=11, color="#fff"), hovertemplate="%{x}
Comments: %{y}", )) fig_q_app.update_layout(**plotly_layout(280)) st.plotly_chart(fig_q_app, config={"displayModeBar": False}) else: st.markdown('

No data yet

', unsafe_allow_html=True) st.markdown('

', unsafe_allow_html=True) with _at_col2_app: st.markdown( f'

Type of Requests & Feedback

' f'

({_rf_total_app} comments)

', unsafe_allow_html=True ) if _rf_data_app: _rf_sorted_app = sorted(_rf_data_app.items(), key=lambda x: x[1], reverse=True) fig_rf_app = go.Figure(go.Bar( x=[_SHORT_ACTION_APP.get(k, k) for k, _ in _rf_sorted_app], y=[v for _, v in _rf_sorted_app], marker_color="#f5a623", marker_line_width=0, text=[v for _, v in _rf_sorted_app], textposition="outside", textfont=dict(size=11, color="#fff"), hovertemplate="%{x}
Comments: %{y}", )) fig_rf_app.update_layout(**plotly_layout(280)) st.plotly_chart(fig_rf_app, config={"displayModeBar": False}) else: st.markdown('

No data yet

', unsafe_allow_html=True) st.markdown('

', unsafe_allow_html=True) # Top-5 horizontal bar panels _top5_col1_app, _top5_col2_app = st.columns(2) def _hbar_rows_html_app(data: dict, color: str, max_val: int) -> str: html = "" for cat, count in sorted(data.items(), key=lambda x: x[1], reverse=True)[:5]: pct = round(count / max(max_val, 1) * 100) label = _SHORT_ACTION_APP.get(cat, cat) html += ( f'

' f'

{label}

' f'

{pct}%

' f'

' ) return html with _top5_col1_app: st.markdown( '

' '

Top 5 Questions Students Ask

' '

Type of action count for Questions across tagged videos.

', unsafe_allow_html=True ) if _q_data_app: st.markdown(_hbar_rows_html_app(_q_data_app, "#f87171", max(_q_data_app.values(), default=1)), unsafe_allow_html=True) else: st.markdown('

No data yet

', unsafe_allow_html=True) st.markdown('

', unsafe_allow_html=True) with _top5_col2_app: st.markdown( '

' '

Top 5 Types of Requests & Feedback Students Give

' '

Type of action count for Request/Feedback across tagged videos.

', unsafe_allow_html=True ) if _rf_data_app: st.markdown(_hbar_rows_html_app(_rf_data_app, "#f87171", max(_rf_data_app.values(), default=1)), unsafe_allow_html=True) else: st.markdown('

No data yet

', unsafe_allow_html=True) st.markdown('

', unsafe_allow_html=True) # -- TOP CONTRIBUTORS ----------------------------------------- st.divider() st.markdown( '

Top ContributorsAll Time

', unsafe_allow_html=True ) _contrib_json = json.dumps([{"author": m.get("author",""), "sentiment": m.get("sentiment","Neutral"), "topic": m.get("topic","General")} for m in all_data]) contributors = compute_top_contributors(_contrib_json) if contributors: max_count = contributors[0]["count"] rank_icons = {1: "🥇", 2: "🥈", 3: "🥉"} rank_classes = {1: "gold", 2: "silver", 3: "bronze"} for rank, c in enumerate(contributors, 1): bar_pct = int(c["count"] / max(max_count, 1) * 100) rank_cls = rank_classes.get(rank, "") rank_icon = rank_icons.get(rank, f"#{rank}") author = c["author"] count = c["count"] pos_pct = c["pos_pct"] neu_pct = c["neu_pct"] neg_pct = c["neg_pct"] html = ( f'

' f'

{rank_icon}

' f'

{author}

' f'

' f'' f'' f'' f'

' f'

{count} msgs

' f'

' ) st.markdown(html, unsafe_allow_html=True) st.markdown('

', unsafe_allow_html=True) st.markdown( '

Sentiment & Topic Breakdown — Top Contributors

' '

Top bar = sentiment (Neg/Neu/Pos) · Bottom bar = topic mix · right = message count

', unsafe_allow_html=True ) n = len(contributors) y_sent_num = [i * 2 + 0.3 for i in range(n)] y_topic_num = [i * 2 - 0.3 for i in range(n)] tick_vals = [i * 2 for i in range(n)] tick_text = [c["author"][:22] for c in contributors] fig_combo = go.Figure() for key, label, color in [ ("neg_pct", "Neg", "#ef4444"), ("neu_pct", "Neu", "#eab308"), ("pos_pct", "Pos", "#22c55e"), ]: fig_combo.add_trace(go.Bar( name=label, y=y_sent_num, x=[c[key] for c in contributors], orientation="h", marker_color=color, legendgroup="sent", legendgrouptitle_text="Sentiment" if key == "neg_pct" else None, width=0.5, hovertemplate="" + label + ": %{x}%", )) for key, label, color in [ ("t_appr", "Appreciation", "#f59e0b"), ("t_ques", "Question", "#3b82f6"), ("t_rf", "Request/Feedback","#8b5cf6"), ("t_promo", "Promo", "#ec4899"), ("t_spam", "Spam", "#ef4444"), ("t_gen", "General", "#6b7280"), ("t_mcq", "MCQ Answer", "#10b981"), ]: fig_combo.add_trace(go.Bar( name=label, y=y_topic_num, x=[c[key] for c in contributors], orientation="h", marker_color=color, legendgroup="topic", legendgrouptitle_text="Topic" if key == "t_appr" else None, width=0.5, hovertemplate="" + label + ": %{x}%", )) annotations = [] for i, c in enumerate(contributors): annotations.append(dict( x=102, y=y_sent_num[i], text=f"{c['count']} msgs", showarrow=False, xanchor="left", font=dict(size=10, color="#94a3b8"), xref="x", yref="y", )) chart_h = max(400, n * 56) layout_combo = plotly_layout(chart_h) layout_combo["barmode"] = "stack" layout_combo["bargap"] = 0.1 layout_combo["showlegend"] = True layout_combo["legend"] = dict( orientation="h", y=1.0, x=0, font=dict(size=12, color="#f1f5f9"), title_font=dict(size=12, color="#a78bfa"), groupclick="toggleitem", yanchor="bottom", xanchor="left", bgcolor="rgba(0,0,0,0)", ) layout_combo["margin"] = dict(l=10, r=80, t=80, b=10) layout_combo["xaxis"]["range"] = [0, 115] layout_combo["xaxis"]["ticksuffix"] = "%" layout_combo["yaxis"] = dict( tickvals=tick_vals, ticktext=tick_text, tickfont=dict(size=10), autorange="reversed", showgrid=False, zeroline=False, showline=False, ) layout_combo["annotations"] = annotations fig_combo.update_layout(**layout_combo) st.plotly_chart(fig_combo, config={"displayModeBar": False}) st.markdown('

', unsafe_allow_html=True) contrib_df = pd.DataFrame(contributors) csv_download(contrib_df, "Download CSV", "top_contributors.csv") else: st.info("Not enough data yet.") # -- REPEAT SPAMMERS ----------------------------------------- st.divider() st.markdown( '

Repeat SpammersAll Time

', unsafe_allow_html=True ) rs_col1, rs_col2 = st.columns([1, 1]) with rs_col1: rs_window = st.slider("Time window (sec)", 5, 60, 15, key="rs_window") with rs_col2: rs_min = st.slider("Min repeats to flag", 2, 10, 2, key="rs_min") _rs_json = json.dumps([{ "author": m.get("author",""), "text": m.get("text",""), "topic": m.get("topic","General"), "sentiment": m.get("sentiment","Neutral"), "time": m.get("time","") } for m in all_data]) repeat_spammers = detect_repeat_spammers(_rs_json, window_sec=rs_window, min_repeats=rs_min) if repeat_spammers: st.markdown( f'

' f'Found {len(repeat_spammers)} users repeating the same message ' f'>=>{rs_min}x within {rs_window}s

', unsafe_allow_html=True ) for rs in repeat_spammers: _t_color = TOPIC_COLOR.get(rs["topic"], "#6b7280") _s_color = SENT_COLORS.get(rs["sentiment"], "#6b7280") _burst = rs["max_burst"] _total = rs["count"] _severity = "#ef4444" if _burst >= 5 else "#eab308" if _burst >= 3 else "#f59e0b" st.markdown( f'

' f'

⚠️ {rs["author"]}

' f'

' f'' f'🔁 {_burst}x in {rs_window}s' f'{_total} total' f'

' f'

"{rs["text"]}"

' f'

' f'{rs["sentiment"]}' f'{rs["topic"]}' f'First: {rs["first_seen"]}' f'Last: {rs["last_seen"]}' f'

', unsafe_allow_html=True ) rs_df = pd.DataFrame(repeat_spammers) csv_download(rs_df, "Download CSV", "repeat_spammers.csv") else: st.markdown( '

No repeat spammers detected in current window.

', unsafe_allow_html=True ) # -- MULTI-STREAM COMPARISON ---------------------------------- active_streams = [s for s in st.session_state.streams if store_llen(s["redis_key"]) > 0] if len(active_streams) > 1: st.divider() n_streams = len(active_streams) st.markdown( f'

Multi-Stream Comparison' f'{n_streams} streams

', unsafe_allow_html=True ) _stream_cache: dict[str, dict] = {} for _s in active_streams: _rkey = _s["redis_key"] _raw = load_stream_data(_rkey) if not _raw: continue _sdf = pd.DataFrame(_raw) _sdf["sentiment"] = _sdf["sentiment"].apply(clean_sentiment) _sdf["topic"] = _sdf["topic"].apply(clean_topic) if "topic" in _sdf.columns else "General" _sc = _sdf["sentiment"].value_counts().to_dict() _p = _sc.get("Positive", 0) _n = _sc.get("Neutral", 0) _g = _sc.get("Negative", 0) _t = max(_p + _n + _g, 1) _tc = {lbl: int((_sdf["topic"] == lbl).sum()) for lbl in TOPIC_LABELS} _top_topic = max(_tc, key=_tc.get) _eng_json = json.dumps([ {"sentiment": m.get("sentiment","Neutral"), "topic": m.get("topic","General"), "time": m.get("time","")} for m in _raw ]) _eng = compute_engagement(_eng_json) _title = _s.get("video_title") or _s.get("video_id") or _rkey _stream_cache[_rkey] = { "df": _sdf, "raw": _raw, "p": _p, "n": _n, "g": _g, "t": _t, "tc": _tc, "top_topic": _top_topic, "eng": _eng, "title": _title, "sidx": st.session_state.streams.index(_s), } st.markdown('

', unsafe_allow_html=True) st.markdown('

Head-to-Head Summary

All active streams at a glance

', unsafe_allow_html=True) _hth_rows = [] for _s in active_streams: _rkey = _s["redis_key"] if _rkey not in _stream_cache: continue _c = _stream_cache[_rkey] _sidx = _c["sidx"] _hth_rows.append({ "Stream": f"Stream {STREAM_NAMES[_sidx]}", "Title": _c["title"][:30], "Messages": _c["t"], "Positive %": f"{_c['p']/_c['t']*100:.1f}%", "Neutral %": f"{_c['n']/_c['t']*100:.1f}%", "Negative %": f"{_c['g']/_c['t']*100:.1f}%", "Top Topic": _c["top_topic"], "Engagement": f"{_c['eng']['score']}/100 {_c['eng']['grade']}", }) if _hth_rows: st.dataframe(pd.DataFrame(_hth_rows), hide_index=True, use_container_width=True) st.markdown('

', unsafe_allow_html=True) chunk_size = 2 _cached_keys = [_s["redis_key"] for _s in active_streams if _s["redis_key"] in _stream_cache] for row_start in range(0, len(_cached_keys), chunk_size): row_keys = _cached_keys[row_start:row_start + chunk_size] cols = st.columns(len(row_keys)) for col, _rkey in zip(cols, row_keys): _c = _stream_cache[_rkey] _sidx = _c["sidx"] color = STREAM_COLORS[_sidx] slabel = STREAM_NAMES[_sidx] _p, _n, _g, _t = _c["p"], _c["n"], _c["g"], _c["t"] _eng = _c["eng"] _tc = _c["tc"] with col: st.markdown( f'' f'Stream {slabel} · {_c["title"][:25]}', unsafe_allow_html=True ) _ec = "#22c55e" if _eng["score"] >= 70 else "#eab308" if _eng["score"] >= 40 else "#ef4444" st.markdown( f'

' f'

{_eng["score"]}

' f'

Engagement

' f'

{_t}

' f'

Messages

' f'

{_p/_t*100:.0f}%

' f'

Positive

' f'

', unsafe_allow_html=True ) st.markdown('

', unsafe_allow_html=True) st.markdown('

Sentiment

', unsafe_allow_html=True) fig_s = go.Figure(go.Bar( x=["Pos", "Neu", "Neg"], y=[_p, _n, _g], marker_color=["#22c55e", "#eab308", "#ef4444"], marker_line_width=0, text=[_p, _n, _g], textposition="outside", hovertemplate="%{x}: %{y}", )) fig_s.update_layout(**plotly_layout(180)) st.plotly_chart(fig_s, config={"displayModeBar": False}) st.markdown('

', unsafe_allow_html=True) st.markdown('

Topic Breakdown

', unsafe_allow_html=True) _tc_colors = ["#f59e0b","#3b82f6","#ec4899","#ef4444","#6b7280","#10b981"] fig_t = go.Figure(go.Bar( x=TOPIC_LABELS, y=[_tc[l] for l in TOPIC_LABELS], marker_color=_tc_colors, marker_line_width=0, text=[_tc[l] for l in TOPIC_LABELS], textposition="outside", hovertemplate="%{x}: %{y}", )) _tl = plotly_layout(180) _tl["xaxis"]["tickfont"] = dict(size=8) fig_t.update_layout(**_tl) st.plotly_chart(fig_t, config={"displayModeBar": False}) st.markdown('

', unsafe_allow_html=True) st.markdown('

Positive Ratio Over Time

Rolling positive % per stream (synced refresh)

', unsafe_allow_html=True) fig_overlay = go.Figure() for _rkey, _c in _stream_cache.items(): _sidx = _c["sidx"] color = STREAM_COLORS[_sidx] slabel = STREAM_NAMES[_sidx] _sdf = _c["df"].copy() _sdf["is_pos"] = (_sdf["sentiment"] == "Positive").astype(int) _sdf["rolling"] = _sdf["is_pos"].rolling(10, min_periods=1).mean() * 100 fig_overlay.add_trace(go.Scatter( x=list(range(len(_sdf))), y=_sdf["rolling"], mode="lines", name=f"Stream {slabel} · {_c['title'][:20]}", line=dict(color=color, width=2), hovertemplate=f"Stream {slabel} msg %{{x}}: %{{y:.1f}}%", )) layout_ov = plotly_layout(220) layout_ov["showlegend"] = True layout_ov["legend"] = dict(orientation="h", y=1.08, font=dict(size=11, color="#f1f5f9")) layout_ov["yaxis"]["range"] = [0, 100] fig_overlay.update_layout(**layout_ov) st.plotly_chart(fig_overlay, config={"displayModeBar": False}) st.markdown('

', unsafe_allow_html=True) elif len(st.session_state.streams) > 1: st.divider() st.info("Add video IDs to your extra stream slots and click ▶ Start to enable multi-stream comparison.") # -- AUTO REFRESH --------------------------------------------- if auto_refresh: time.sleep(refresh_rate) st.rerun()