# pages/stats.py
"""
Stats & Info view — all analytics charts, engagement, contributors, word cloud.
Imports shared infrastructure from app.py via sys.path manipulation.
All session state values are set by app.py before this page runs.
"""
import streamlit as st
import json
import pandas as pd
import plotly.graph_objects as go
import time
import sys
import os
sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
from shared import (
store_llen, load_stream_data,
clean_sentiment, clean_topic, csv_download, plotly_layout,
compute_velocity, build_heatmap_data, check_alert, compute_engagement,
compute_top_contributors, compute_word_freq, check_spam_alert, detect_repeat_spammers,
TOPIC_LABELS, TOPIC_COLOR, SENT_COLORS, STREAM_NAMES, STREAM_COLORS,
)
# -- Get shared state from session ----------------------------
auto_refresh = st.session_state.get("auto_refresh", True)
refresh_rate = st.session_state.get("refresh_rate", 10)
msg_limit = st.session_state.get("msg_limit", 50)
alert_enabled = st.session_state.get("alert_enabled", True)
alert_threshold = st.session_state.get("alert_threshold", 0.4)
alert_window = st.session_state.get("alert_window", 15)
spam_alert_on = st.session_state.get("spam_alert_on", True)
spam_threshold = st.session_state.get("spam_threshold", 0.3)
_primary_key = st.session_state.get("_primary_key", "chat_messages")
# -- Load data ------------------------------------------------
_CUMULATIVE_CAP = 50_000
_current_len = store_llen(_primary_key)
all_data = load_stream_data(_primary_key, limit=_CUMULATIVE_CAP if _current_len > _CUMULATIVE_CAP else None)
data = all_data[-msg_limit:] if len(all_data) > msg_limit else all_data
if not all_data:
st.markdown(
'
📭
'
'
No messages yet
'
'
Set a video ID in the sidebar, then click ▶ Start
'
'
', unsafe_allow_html=True
)
if auto_refresh:
time.sleep(refresh_rate)
st.rerun()
st.stop()
df = pd.DataFrame(data)
all_df = pd.DataFrame(all_data)
df["sentiment"] = df["sentiment"].apply(clean_sentiment)
df["topic"] = df["topic"].apply(clean_topic) if "topic" in df.columns else "General"
all_df["sentiment"] = all_df["sentiment"].apply(clean_sentiment)
all_df["topic"] = all_df["topic"].apply(clean_topic) if "topic" in all_df.columns else "General"
# -- ALERT BANNERS --------------------------------------------
if alert_enabled:
alert = check_alert(all_df, threshold=alert_threshold, window=alert_window)
total_now = len(all_df)
if total_now != st.session_state.last_alert_count:
st.session_state.last_alert_count = total_now
if alert:
st.session_state.alert_dismissed = False
if alert and not st.session_state.alert_dismissed:
a1, a2 = st.columns([8, 1])
with a1:
st.markdown(
f''
f'
🚨'
f'
'
f'
Negative sentiment spike — {alert["neg_ratio"]*100:.0f}% negative in last {alert["window"]} messages
'
f'
{alert["count"]} of {alert["window"]} messages are negative. Consider moderating.
'
f'
',
unsafe_allow_html=True
)
with a2:
if st.button("✕ Dismiss", key="dismiss_alert"):
st.session_state.alert_dismissed = True
st.rerun()
if spam_alert_on:
spam_alert = check_spam_alert(all_df, threshold=spam_threshold, window=alert_window)
if spam_alert and not st.session_state.get("spam_dismissed", False):
s1, s2 = st.columns([8, 1])
with s1:
st.markdown(
f''
f'
🛡️'
f'
'
f'
Spam surge detected — {spam_alert["spam_ratio"]*100:.0f}% spam in last {spam_alert["window"]} messages
'
f'
{spam_alert["count"]} spam messages detected. Chat may be under flood attack.
'
f'
',
unsafe_allow_html=True
)
with s2:
if st.button("✕", key="dismiss_spam"):
st.session_state.spam_dismissed = True
st.rerun()
elif not spam_alert:
st.session_state.spam_dismissed = False
# -- CUMULATIVE STATS -----------------------------------------
all_counts = all_df["sentiment"].value_counts().to_dict()
c_pos = all_counts.get("Positive", 0)
c_neu = all_counts.get("Neutral", 0)
c_neg = all_counts.get("Negative", 0)
c_total = max(c_pos + c_neu + c_neg, 1)
velocity = compute_velocity(json.dumps([{"sentiment": m.get("sentiment","Neutral")} for m in all_data]))
st.markdown(
'Cumulative SentimentAll Time
',
unsafe_allow_html=True
)
v1, v2, v3, v4, v5 = st.columns([1, 1, 1, 1, 1])
with v1:
st.markdown(
f''
f'
{c_pos}
Positive
{c_pos/c_total*100:.1f}% of total
',
unsafe_allow_html=True
)
with v2:
st.markdown(
f''
f'
{c_neu}
Neutral
{c_neu/c_total*100:.1f}% of total
',
unsafe_allow_html=True
)
with v3:
st.markdown(
f''
f'
{c_neg}
Negative
{c_neg/c_total*100:.1f}% of total
',
unsafe_allow_html=True
)
with v4:
st.markdown(
f''
f'
{c_total}
Total
all time
',
unsafe_allow_html=True
)
with v5:
vc = velocity["color"]
st.markdown(
f''
f'
{velocity["direction"]}
'
f'
'
f'
{velocity["label"]}
'
f'
Sentiment Velocity
'
f'{velocity["delta"]:+.0%} pos shift
'
f'
',
unsafe_allow_html=True
)
# -- CUMULATIVE TOPIC -----------------------------------------
st.divider()
st.markdown(
'Cumulative TopicAll Time
',
unsafe_allow_html=True
)
_topic_colors_list = ["#f59e0b", "#3b82f6", "#ec4899", "#ef4444", "#6b7280", "#10b981"]
_ct_cols = st.columns(len(TOPIC_LABELS))
for _ci, (_lbl, _clr) in enumerate(zip(TOPIC_LABELS, _topic_colors_list)):
_cnt = int((all_df["topic"] == _lbl).sum()) if "topic" in all_df.columns else 0
_pct = _cnt / max(c_total, 1) * 100
with _ct_cols[_ci]:
st.markdown(
f''
f'
{_cnt}
'
f'
{_lbl}
'
f'
{_pct:.1f}% of msgs
',
unsafe_allow_html=True
)
# -- ENGAGEMENT SCORE (moved here — after topic, before window) ----
_eng_json = json.dumps([{"sentiment": m.get("sentiment","Neutral"), "topic": m.get("topic","General"), "time": m.get("time","")} for m in all_data])
eng = compute_engagement(_eng_json)
st.divider()
st.markdown(
'Engagement ScoreLive
',
unsafe_allow_html=True
)
ec1, ec2, ec3, ec4 = st.columns([2, 1, 1, 1])
with ec1:
score_color = "#22c55e" if eng["score"] >= 70 else "#eab308" if eng["score"] >= 40 else "#ef4444"
bar_w = eng["score"]
st.markdown(
f''
f'
{eng["score"]}
'
f'
Engagement Score / 100 \u2014 {eng["grade"]}
'
f'
'
f'
'
f'
Msg rate {eng["rate"]}/min
'
f'
Positive {eng["pos_ratio"]*100:.0f}%
'
f'
Questions {eng["q_density"]*100:.0f}%
'
f'
',
unsafe_allow_html=True
)
with ec2:
st.metric("Msgs/min", f"{eng['rate']:.1f}")
with ec3:
st.metric("Positive ratio", f"{eng['pos_ratio']*100:.0f}%")
with ec4:
st.metric("Question density", f"{eng['q_density']*100:.0f}%")
# -- WINDOW METRICS -------------------------------------------
st.divider()
counts = df["sentiment"].value_counts().to_dict()
pos = counts.get("Positive", 0)
neu = counts.get("Neutral", 0)
neg = counts.get("Negative", 0)
total = max(pos + neu + neg, 1)
st.markdown(
f'Window SnapshotLast {msg_limit} msgs
',
unsafe_allow_html=True
)
c1, c2, c3, c4 = st.columns(4)
c1.metric("Messages", total)
c2.metric("Positive", pos, f"{pos/total*100:.1f}%")
c3.metric("Neutral", neu, f"{neu/total*100:.1f}%")
c4.metric("Negative", neg, f"{neg/total*100:.1f}%")
# -- SENTIMENT + TOPIC CHARTS (ALL TIME) ----------------------
st.divider()
col_s1, col_s2, col_t1, col_t2 = st.columns(4)
with col_s1:
st.markdown('', unsafe_allow_html=True)
st.markdown('
Sentiment Distribution
All-time message count by sentiment class
', unsafe_allow_html=True)
fig_bar = go.Figure(go.Bar(
x=["Positive", "Neutral", "Negative"],
y=[c_pos, c_neu, c_neg],
marker_color=["#22c55e", "#eab308", "#ef4444"],
marker_line_width=0,
text=[c_pos, c_neu, c_neg],
textposition="outside",
textfont=dict(size=12),
hovertemplate="
%{x}Count: %{y}
",
))
fig_bar.update_layout(**plotly_layout(260))
st.plotly_chart(fig_bar, config={"displayModeBar": False})
bar_hdr, bar_dl = st.columns([1, 1])
with bar_hdr:
show_bar_data = st.checkbox("View data", key="show_bar")
with bar_dl:
bar_df = pd.DataFrame({"Sentiment": ["Positive", "Neutral", "Negative"], "Count": [c_pos, c_neu, c_neg]})
csv_download(bar_df, "Download CSV", "sentiment_distribution.csv")
if show_bar_data:
st.dataframe(bar_df, hide_index=True)
st.markdown('
', unsafe_allow_html=True)
with col_s2:
st.markdown('', unsafe_allow_html=True)
st.markdown('
Sentiment Donut
All-time proportional share per class
', unsafe_allow_html=True)
fig_pie = go.Figure(go.Pie(
labels=["Positive", "Neutral", "Negative"],
values=[c_pos, c_neu, c_neg],
marker_colors=["#22c55e", "#eab308", "#ef4444"],
hole=0.58,
textinfo="percent",
hovertemplate="
%{label}%{value} messages (%{percent})
",
))
fig_pie.update_layout(
**{**plotly_layout(260),
"showlegend": True,
"legend": dict(orientation="h", y=-0.08, font=dict(size=11, color="#f1f5f9"))}
)
st.plotly_chart(fig_pie, config={"displayModeBar": False})
pie_hdr, pie_dl = st.columns([1, 1])
with pie_hdr:
show_pie_data = st.checkbox("View data", key="show_pie")
with pie_dl:
pie_df = pd.DataFrame({
"Sentiment": ["Positive", "Neutral", "Negative"],
"Count": [c_pos, c_neu, c_neg],
"Percentage": [f"{c_pos/c_total*100:.1f}%", f"{c_neu/c_total*100:.1f}%", f"{c_neg/c_total*100:.1f}%"]
})
csv_download(pie_df, "Download CSV", "sentiment_breakdown.csv")
if show_pie_data:
st.dataframe(pie_df, hide_index=True)
st.markdown('
', unsafe_allow_html=True)
with col_t1:
st.markdown('', unsafe_allow_html=True)
st.markdown('
Topic Distribution
All-time message count by topic class
', unsafe_allow_html=True)
_tc_vals = [int((all_df["topic"] == l).sum()) if "topic" in all_df.columns else 0 for l in TOPIC_LABELS]
_tc_colors = ["#f59e0b", "#3b82f6", "#ec4899", "#ef4444", "#6b7280", "#10b981"]
fig_tbar = go.Figure(go.Bar(
x=TOPIC_LABELS,
y=_tc_vals,
marker_color=_tc_colors,
marker_line_width=0,
text=_tc_vals,
textposition="outside",
textfont=dict(size=11),
hovertemplate="
%{x}Count: %{y}
",
))
_tbar_layout = plotly_layout(260)
_tbar_layout["xaxis"]["tickfont"] = dict(size=9)
fig_tbar.update_layout(**_tbar_layout)
st.plotly_chart(fig_tbar, config={"displayModeBar": False})
st.markdown('
', unsafe_allow_html=True)
with col_t2:
st.markdown('', unsafe_allow_html=True)
st.markdown('
Topic Donut
All-time proportional share per topic
', unsafe_allow_html=True)
fig_tpie = go.Figure(go.Pie(
labels=TOPIC_LABELS,
values=_tc_vals,
marker_colors=_tc_colors,
hole=0.58,
textinfo="percent",
hovertemplate="
%{label}%{value} messages (%{percent})
",
))
fig_tpie.update_layout(
**{**plotly_layout(260),
"showlegend": True,
"legend": dict(orientation="h", y=-0.08, font=dict(size=10, color="#f1f5f9"))}
)
st.plotly_chart(fig_tpie, config={"displayModeBar": False})
st.markdown('
', unsafe_allow_html=True)
# -- SENTIMENT HEATMAP OVER TIME ------------------------------
st.divider()
st.markdown(
'Sentiment HeatmapOver Time
',
unsafe_allow_html=True
)
heatmap_data = build_heatmap_data(json.dumps([{"time": m.get("time",""), "sentiment": m.get("sentiment","Neutral")} for m in all_data]), bucket_minutes=1)
if not heatmap_data.empty:
st.markdown('', unsafe_allow_html=True)
st.markdown('
Sentiment Over Time
Message volume per sentiment per minute bucket
', unsafe_allow_html=True)
fig_heat = go.Figure()
for sent, color in [("Positive", "#22c55e"), ("Neutral", "#eab308"), ("Negative", "#ef4444")]:
fig_heat.add_trace(go.Scatter(
x=heatmap_data["bucket"],
y=heatmap_data[sent],
name=sent,
mode="lines+markers",
line=dict(color=color, width=2),
marker=dict(size=4),
fill="tozeroy" if sent == "Negative" else None,
fillcolor=color.replace(")", ",0.08)").replace("rgb", "rgba") if sent == "Negative" else None,
hovertemplate=f"
{sent}%{{x}}
Count: %{{y}}
",
))
layout = plotly_layout(220)
layout["showlegend"] = True
layout["legend"] = dict(orientation="h", y=1.08, font=dict(size=11))
layout["xaxis"]["tickformat"] = "%H:%M"
fig_heat.update_layout(**layout)
st.plotly_chart(fig_heat, config={"displayModeBar": False})
heat_hdr, heat_dl = st.columns([1, 1])
with heat_hdr:
show_heat_data = st.checkbox("View data", key="show_heat")
with heat_dl:
csv_download(heatmap_data.rename(columns={"bucket": "time_bucket"}), "Download CSV", "sentiment_heatmap.csv")
if show_heat_data:
st.dataframe(heatmap_data.rename(columns={"bucket": "time_bucket"}), hide_index=True)
st.markdown('
', unsafe_allow_html=True)
else:
st.info("Not enough timestamped data for heatmap yet.")
# -- TOPIC DISTRIBUTION ---------------------------------------
st.divider()
st.markdown(
'Topic DistributionAll Time
',
unsafe_allow_html=True
)
topic_counts = {
label: int((all_df["topic"] == label).sum())
for label in TOPIC_LABELS
}
pills = ''
for label in TOPIC_LABELS:
color = TOPIC_COLOR[label]
count = topic_counts[label]
pills += (
f'
'
f'
{count}
'
f'
{label}
'
f'
'
)
pills += '
'
st.markdown(pills, unsafe_allow_html=True)
st.markdown('', unsafe_allow_html=True)
st.markdown('
Topic Breakdown
All-time message count per topic category
', unsafe_allow_html=True)
fig_topic = go.Figure(go.Bar(
x=TOPIC_LABELS,
y=[topic_counts[l] for l in TOPIC_LABELS],
marker_color=[TOPIC_COLOR[l] for l in TOPIC_LABELS],
marker_line_width=0,
text=[topic_counts[l] for l in TOPIC_LABELS],
textposition="outside",
textfont=dict(size=11),
hovertemplate="
%{x}Count: %{y}
",
))
fig_topic.update_layout(**plotly_layout(250))
st.plotly_chart(fig_topic, config={"displayModeBar": False})
topic_hdr, topic_dl = st.columns([1, 1])
with topic_hdr:
show_topic_data = st.checkbox("View data", key="show_topic")
with topic_dl:
topic_df = pd.DataFrame({"Topic": TOPIC_LABELS, "Count": [topic_counts[l] for l in TOPIC_LABELS]})
csv_download(topic_df, "Download CSV", "topic_distribution.csv")
if show_topic_data:
st.dataframe(topic_df, hide_index=True)
st.markdown('
', unsafe_allow_html=True)
# -- Topic Sentiment breakdown --------------------------------
st.markdown('', unsafe_allow_html=True)
st.markdown('
Sentiment by Topic
% positive / neutral / negative within each topic category
', unsafe_allow_html=True)
_topic_sent_data = []
for _lbl in TOPIC_LABELS:
_mask = all_df["topic"] == _lbl
_total = int(_mask.sum())
if _total == 0:
_topic_sent_data.append({"topic": _lbl, "pos": 0, "neu": 0, "neg": 0})
continue
_sub = all_df[_mask]
_topic_sent_data.append({
"topic": _lbl,
"pos": round((_sub["sentiment"] == "Positive").sum() / _total * 100),
"neu": round((_sub["sentiment"] == "Neutral").sum() / _total * 100),
"neg": round((_sub["sentiment"] == "Negative").sum() / _total * 100),
})
fig_ts = go.Figure()
for _sk, _sl, _sc in [("neg", "Neg", "#ef4444"), ("neu", "Neu", "#eab308"), ("pos", "Pos", "#22c55e")]:
fig_ts.add_trace(go.Bar(
y=[d["topic"] for d in _topic_sent_data],
x=[d[_sk] for d in _topic_sent_data],
name=_sl,
orientation="h",
marker_color=_sc,
hovertemplate="
%{y}" + _sl + ": %{x}%
",
))
_layout_ts = plotly_layout(260)
_layout_ts["barmode"] = "stack"
_layout_ts["showlegend"] = True
_layout_ts["legend"] = dict(orientation="h", y=1.08, x=0.35, font=dict(size=11))
_layout_ts["xaxis"]["range"] = [0, 100]
_layout_ts["xaxis"]["ticksuffix"] = "%"
_layout_ts["yaxis"]["autorange"] = "reversed"
fig_ts.update_layout(**_layout_ts)
st.plotly_chart(fig_ts, config={"displayModeBar": False})
st.markdown('
', unsafe_allow_html=True)
# -- ACTION TYPE CHARTS ---------------------------------------
st.divider()
st.markdown(
'Action Type AnalysisLast 100 msgs
',
unsafe_allow_html=True
)
_QUESTION_ACTIONS_APP = [
"Access & Support",
"Academic / Lecture / Concept Doubts",
"Study Materials, Deliverables & Learning Resources",
"Batch details / structure / offerings (incl faculty)",
"Schedule & logistics (Batch)",
"Guidance- What should I take/do?",
"Suitability & Sufficiency (Is this enough?)",
"Eligibility & audience fit - Can I take this?",
"Information- Exam",
"Information- Post Exam",
]
_REQUEST_ACTIONS_APP = [
"Content requests",
"Content Feedback",
"Faculty Request",
"Faculty Feedback",
"Academic requests",
"Language Request",
"Language medium",
"Product/feature requests (non-content)",
"Offline expansion & event-city requests",
"General Feedback",
"Others",
]
_SHORT_ACTION_APP = {
"Access & Support": "Access & Support",
"Academic / Lecture / Concept Doubts": "Academic Doubts",
"Study Materials, Deliverables & Learning Resources": "Study Materials & Learning Resources",
"Batch details / structure / offerings (incl faculty)": "Batch Details & Offerings",
"Schedule & logistics (Batch)": "Batch Schedule & Logistics",
"Guidance- What should I take/do?": "Guidance (What Should I Take/Do?)",
"Suitability & Sufficiency (Is this enough?)": "Suitability & Sufficiency",
"Eligibility & audience fit - Can I take this?": "Eligibility (Can I Take This?)",
"Information- Exam": "Exam Information",
"Information- Post Exam": "Post Exam Information",
"Content requests": "Content requests",
"Content Feedback": "Content Feedback",
"Faculty Request": "Faculty Request",
"Faculty Feedback": "Faculty Feedback",
"Academic requests": "Academic requests",
"Language Request": "Language Request",
"Language medium": "Language Medium",
"Product/feature requests (non-content)": "Non Content Product Requests",
"Offline expansion & event-city requests": "Offline Expansion & Event Requests",
"General Feedback": "General Feedback",
"Others": "Others",
}
_at_counts_app: dict[str, int] = {}
if "action_type" in all_df.columns:
for _at in _QUESTION_ACTIONS_APP + _REQUEST_ACTIONS_APP:
_at_counts_app[_at] = int((all_df.tail(100)["action_type"] == _at).sum())
else:
_at_counts_app = {_at: 0 for _at in _QUESTION_ACTIONS_APP + _REQUEST_ACTIONS_APP}
_q_data_app = {k: _at_counts_app.get(k, 0) for k in _QUESTION_ACTIONS_APP if _at_counts_app.get(k, 0) > 0}
_rf_data_app = {k: _at_counts_app.get(k, 0) for k in _REQUEST_ACTIONS_APP if _at_counts_app.get(k, 0) > 0}
_q_total_app = sum(_q_data_app.values())
_rf_total_app = sum(_rf_data_app.values())
_at_col1_app, _at_col2_app = st.columns(2)
with _at_col1_app:
st.markdown(
f'Type of Questions
'
f'
({_q_total_app} comments)
',
unsafe_allow_html=True
)
if _q_data_app:
_q_sorted_app = sorted(_q_data_app.items(), key=lambda x: x[1], reverse=True)
fig_q_app = go.Figure(go.Bar(
x=[_SHORT_ACTION_APP.get(k, k) for k, _ in _q_sorted_app],
y=[v for _, v in _q_sorted_app],
marker_color="#4a90d9", marker_line_width=0,
text=[v for _, v in _q_sorted_app], textposition="outside",
textfont=dict(size=11, color="#fff"),
hovertemplate="
%{x}Comments: %{y}
",
))
fig_q_app.update_layout(**plotly_layout(280))
st.plotly_chart(fig_q_app, config={"displayModeBar": False})
else:
st.markdown('
No data yet
', unsafe_allow_html=True)
st.markdown('
', unsafe_allow_html=True)
with _at_col2_app:
st.markdown(
f'Type of Requests & Feedback
'
f'
({_rf_total_app} comments)
',
unsafe_allow_html=True
)
if _rf_data_app:
_rf_sorted_app = sorted(_rf_data_app.items(), key=lambda x: x[1], reverse=True)
fig_rf_app = go.Figure(go.Bar(
x=[_SHORT_ACTION_APP.get(k, k) for k, _ in _rf_sorted_app],
y=[v for _, v in _rf_sorted_app],
marker_color="#f5a623", marker_line_width=0,
text=[v for _, v in _rf_sorted_app], textposition="outside",
textfont=dict(size=11, color="#fff"),
hovertemplate="
%{x}Comments: %{y}
",
))
fig_rf_app.update_layout(**plotly_layout(280))
st.plotly_chart(fig_rf_app, config={"displayModeBar": False})
else:
st.markdown('
No data yet
', unsafe_allow_html=True)
st.markdown('
', unsafe_allow_html=True)
# Top-5 horizontal bar panels
_top5_col1_app, _top5_col2_app = st.columns(2)
def _hbar_rows_html_app(data: dict, color: str, max_val: int) -> str:
html = ""
for cat, count in sorted(data.items(), key=lambda x: x[1], reverse=True)[:5]:
pct = round(count / max(max_val, 1) * 100)
label = _SHORT_ACTION_APP.get(cat, cat)
html += (
f''
)
return html
with _top5_col1_app:
st.markdown(
''
'
Top 5 Questions Students Ask
'
'
Type of action count for Questions across tagged videos.
',
unsafe_allow_html=True
)
if _q_data_app:
st.markdown(_hbar_rows_html_app(_q_data_app, "#f87171", max(_q_data_app.values(), default=1)), unsafe_allow_html=True)
else:
st.markdown('
No data yet
', unsafe_allow_html=True)
st.markdown('
', unsafe_allow_html=True)
with _top5_col2_app:
st.markdown(
''
'
Top 5 Types of Requests & Feedback Students Give
'
'
Type of action count for Request/Feedback across tagged videos.
',
unsafe_allow_html=True
)
if _rf_data_app:
st.markdown(_hbar_rows_html_app(_rf_data_app, "#f87171", max(_rf_data_app.values(), default=1)), unsafe_allow_html=True)
else:
st.markdown('
No data yet
', unsafe_allow_html=True)
st.markdown('
', unsafe_allow_html=True)
# -- TOP CONTRIBUTORS -----------------------------------------
st.divider()
st.markdown(
'Top ContributorsAll Time
',
unsafe_allow_html=True
)
_contrib_json = json.dumps([{"author": m.get("author",""), "sentiment": m.get("sentiment","Neutral"), "topic": m.get("topic","General")} for m in all_data])
contributors = compute_top_contributors(_contrib_json)
if contributors:
max_count = contributors[0]["count"]
rank_icons = {1: "🥇", 2: "🥈", 3: "🥉"}
rank_classes = {1: "gold", 2: "silver", 3: "bronze"}
for rank, c in enumerate(contributors, 1):
bar_pct = int(c["count"] / max(max_count, 1) * 100)
rank_cls = rank_classes.get(rank, "")
rank_icon = rank_icons.get(rank, f"#{rank}")
author = c["author"]
count = c["count"]
pos_pct = c["pos_pct"]
neu_pct = c["neu_pct"]
neg_pct = c["neg_pct"]
html = (
f''
f'
{rank_icon}
'
f'
{author}
'
f'
'
f'
'
f''
f''
f''
f'
'
f'
{count} msgs
'
f'
'
)
st.markdown(html, unsafe_allow_html=True)
st.markdown('', unsafe_allow_html=True)
st.markdown(
'
Sentiment & Topic Breakdown — Top Contributors
'
'
Top bar = sentiment (Neg/Neu/Pos) · Bottom bar = topic mix · right = message count
',
unsafe_allow_html=True
)
n = len(contributors)
y_sent_num = [i * 2 + 0.3 for i in range(n)]
y_topic_num = [i * 2 - 0.3 for i in range(n)]
tick_vals = [i * 2 for i in range(n)]
tick_text = [c["author"][:22] for c in contributors]
fig_combo = go.Figure()
for key, label, color in [
("neg_pct", "Neg", "#ef4444"),
("neu_pct", "Neu", "#eab308"),
("pos_pct", "Pos", "#22c55e"),
]:
fig_combo.add_trace(go.Bar(
name=label,
y=y_sent_num,
x=[c[key] for c in contributors],
orientation="h",
marker_color=color,
legendgroup="sent",
legendgrouptitle_text="Sentiment" if key == "neg_pct" else None,
width=0.5,
hovertemplate="
" + label + ": %{x}%
",
))
for key, label, color in [
("t_appr", "Appreciation", "#f59e0b"),
("t_ques", "Question", "#3b82f6"),
("t_rf", "Request/Feedback","#8b5cf6"),
("t_promo", "Promo", "#ec4899"),
("t_spam", "Spam", "#ef4444"),
("t_gen", "General", "#6b7280"),
("t_mcq", "MCQ Answer", "#10b981"),
]:
fig_combo.add_trace(go.Bar(
name=label,
y=y_topic_num,
x=[c[key] for c in contributors],
orientation="h",
marker_color=color,
legendgroup="topic",
legendgrouptitle_text="Topic" if key == "t_appr" else None,
width=0.5,
hovertemplate="
" + label + ": %{x}%
",
))
annotations = []
for i, c in enumerate(contributors):
annotations.append(dict(
x=102, y=y_sent_num[i],
text=f"
{c['count']} msgs",
showarrow=False,
xanchor="left",
font=dict(size=10, color="#94a3b8"),
xref="x", yref="y",
))
chart_h = max(400, n * 56)
layout_combo = plotly_layout(chart_h)
layout_combo["barmode"] = "stack"
layout_combo["bargap"] = 0.1
layout_combo["showlegend"] = True
layout_combo["legend"] = dict(
orientation="h", y=1.0, x=0,
font=dict(size=12, color="#f1f5f9"),
title_font=dict(size=12, color="#a78bfa"),
groupclick="toggleitem",
yanchor="bottom",
xanchor="left",
bgcolor="rgba(0,0,0,0)",
)
layout_combo["margin"] = dict(l=10, r=80, t=80, b=10)
layout_combo["xaxis"]["range"] = [0, 115]
layout_combo["xaxis"]["ticksuffix"] = "%"
layout_combo["yaxis"] = dict(
tickvals=tick_vals,
ticktext=tick_text,
tickfont=dict(size=10),
autorange="reversed",
showgrid=False,
zeroline=False,
showline=False,
)
layout_combo["annotations"] = annotations
fig_combo.update_layout(**layout_combo)
st.plotly_chart(fig_combo, config={"displayModeBar": False})
st.markdown('
', unsafe_allow_html=True)
contrib_df = pd.DataFrame(contributors)
csv_download(contrib_df, "Download CSV", "top_contributors.csv")
else:
st.info("Not enough data yet.")
# -- REPEAT SPAMMERS -----------------------------------------
st.divider()
st.markdown(
'Repeat SpammersAll Time
',
unsafe_allow_html=True
)
rs_col1, rs_col2 = st.columns([1, 1])
with rs_col1:
rs_window = st.slider("Time window (sec)", 5, 60, 15, key="rs_window")
with rs_col2:
rs_min = st.slider("Min repeats to flag", 2, 10, 2, key="rs_min")
_rs_json = json.dumps([{
"author": m.get("author",""), "text": m.get("text",""),
"topic": m.get("topic","General"), "sentiment": m.get("sentiment","Neutral"),
"time": m.get("time","")
} for m in all_data])
repeat_spammers = detect_repeat_spammers(_rs_json, window_sec=rs_window, min_repeats=rs_min)
if repeat_spammers:
st.markdown(
f''
f'Found {len(repeat_spammers)} users repeating the same message '
f'>=>{rs_min}x within {rs_window}s
',
unsafe_allow_html=True
)
for rs in repeat_spammers:
_t_color = TOPIC_COLOR.get(rs["topic"], "#6b7280")
_s_color = SENT_COLORS.get(rs["sentiment"], "#6b7280")
_burst = rs["max_burst"]
_total = rs["count"]
_severity = "#ef4444" if _burst >= 5 else "#eab308" if _burst >= 3 else "#f59e0b"
st.markdown(
f''
f'
'
f'
⚠️ {rs["author"]}
'
f'
'
f''
f'🔁 {_burst}x in {rs_window}s'
f'{_total} total'
f'
'
f'
"{rs["text"]}"
'
f'
'
f'{rs["sentiment"]}'
f'{rs["topic"]}'
f'First: {rs["first_seen"]}'
f'Last: {rs["last_seen"]}'
f'
',
unsafe_allow_html=True
)
rs_df = pd.DataFrame(repeat_spammers)
csv_download(rs_df, "Download CSV", "repeat_spammers.csv")
else:
st.markdown(
'No repeat spammers detected in current window.
',
unsafe_allow_html=True
)
# -- MULTI-STREAM COMPARISON ----------------------------------
active_streams = [s for s in st.session_state.streams if store_llen(s["redis_key"]) > 0]
if len(active_streams) > 1:
st.divider()
n_streams = len(active_streams)
st.markdown(
f'Multi-Stream Comparison'
f'{n_streams} streams
',
unsafe_allow_html=True
)
_stream_cache: dict[str, dict] = {}
for _s in active_streams:
_rkey = _s["redis_key"]
_raw = load_stream_data(_rkey)
if not _raw:
continue
_sdf = pd.DataFrame(_raw)
_sdf["sentiment"] = _sdf["sentiment"].apply(clean_sentiment)
_sdf["topic"] = _sdf["topic"].apply(clean_topic) if "topic" in _sdf.columns else "General"
_sc = _sdf["sentiment"].value_counts().to_dict()
_p = _sc.get("Positive", 0)
_n = _sc.get("Neutral", 0)
_g = _sc.get("Negative", 0)
_t = max(_p + _n + _g, 1)
_tc = {lbl: int((_sdf["topic"] == lbl).sum()) for lbl in TOPIC_LABELS}
_top_topic = max(_tc, key=_tc.get)
_eng_json = json.dumps([
{"sentiment": m.get("sentiment","Neutral"),
"topic": m.get("topic","General"),
"time": m.get("time","")} for m in _raw
])
_eng = compute_engagement(_eng_json)
_title = _s.get("video_title") or _s.get("video_id") or _rkey
_stream_cache[_rkey] = {
"df": _sdf, "raw": _raw,
"p": _p, "n": _n, "g": _g, "t": _t,
"tc": _tc, "top_topic": _top_topic,
"eng": _eng, "title": _title,
"sidx": st.session_state.streams.index(_s),
}
st.markdown('', unsafe_allow_html=True)
st.markdown('
Head-to-Head Summary
All active streams at a glance
', unsafe_allow_html=True)
_hth_rows = []
for _s in active_streams:
_rkey = _s["redis_key"]
if _rkey not in _stream_cache:
continue
_c = _stream_cache[_rkey]
_sidx = _c["sidx"]
_hth_rows.append({
"Stream": f"Stream {STREAM_NAMES[_sidx]}",
"Title": _c["title"][:30],
"Messages": _c["t"],
"Positive %": f"{_c['p']/_c['t']*100:.1f}%",
"Neutral %": f"{_c['n']/_c['t']*100:.1f}%",
"Negative %": f"{_c['g']/_c['t']*100:.1f}%",
"Top Topic": _c["top_topic"],
"Engagement": f"{_c['eng']['score']}/100 {_c['eng']['grade']}",
})
if _hth_rows:
st.dataframe(pd.DataFrame(_hth_rows), hide_index=True, use_container_width=True)
st.markdown('
', unsafe_allow_html=True)
chunk_size = 2
_cached_keys = [_s["redis_key"] for _s in active_streams if _s["redis_key"] in _stream_cache]
for row_start in range(0, len(_cached_keys), chunk_size):
row_keys = _cached_keys[row_start:row_start + chunk_size]
cols = st.columns(len(row_keys))
for col, _rkey in zip(cols, row_keys):
_c = _stream_cache[_rkey]
_sidx = _c["sidx"]
color = STREAM_COLORS[_sidx]
slabel = STREAM_NAMES[_sidx]
_p, _n, _g, _t = _c["p"], _c["n"], _c["g"], _c["t"]
_eng = _c["eng"]
_tc = _c["tc"]
with col:
st.markdown(
f''
f'Stream {slabel} · {_c["title"][:25]}',
unsafe_allow_html=True
)
_ec = "#22c55e" if _eng["score"] >= 70 else "#eab308" if _eng["score"] >= 40 else "#ef4444"
st.markdown(
f''
f'
'
f'
{_eng["score"]}
'
f'
Engagement
'
f'
'
f'
'
f'
'
f'
{_p/_t*100:.0f}%
'
f'
Positive
'
f'
'
f'
',
unsafe_allow_html=True
)
st.markdown('', unsafe_allow_html=True)
st.markdown('
Sentiment
', unsafe_allow_html=True)
fig_s = go.Figure(go.Bar(
x=["Pos", "Neu", "Neg"],
y=[_p, _n, _g],
marker_color=["#22c55e", "#eab308", "#ef4444"],
marker_line_width=0,
text=[_p, _n, _g],
textposition="outside",
hovertemplate="
%{x}: %{y}
",
))
fig_s.update_layout(**plotly_layout(180))
st.plotly_chart(fig_s, config={"displayModeBar": False})
st.markdown('
', unsafe_allow_html=True)
st.markdown('', unsafe_allow_html=True)
st.markdown('
Topic Breakdown
', unsafe_allow_html=True)
_tc_colors = ["#f59e0b","#3b82f6","#ec4899","#ef4444","#6b7280","#10b981"]
fig_t = go.Figure(go.Bar(
x=TOPIC_LABELS,
y=[_tc[l] for l in TOPIC_LABELS],
marker_color=_tc_colors,
marker_line_width=0,
text=[_tc[l] for l in TOPIC_LABELS],
textposition="outside",
hovertemplate="
%{x}: %{y}
",
))
_tl = plotly_layout(180)
_tl["xaxis"]["tickfont"] = dict(size=8)
fig_t.update_layout(**_tl)
st.plotly_chart(fig_t, config={"displayModeBar": False})
st.markdown('
', unsafe_allow_html=True)
st.markdown('', unsafe_allow_html=True)
st.markdown('
Positive Ratio Over Time
Rolling positive % per stream (synced refresh)
', unsafe_allow_html=True)
fig_overlay = go.Figure()
for _rkey, _c in _stream_cache.items():
_sidx = _c["sidx"]
color = STREAM_COLORS[_sidx]
slabel = STREAM_NAMES[_sidx]
_sdf = _c["df"].copy()
_sdf["is_pos"] = (_sdf["sentiment"] == "Positive").astype(int)
_sdf["rolling"] = _sdf["is_pos"].rolling(10, min_periods=1).mean() * 100
fig_overlay.add_trace(go.Scatter(
x=list(range(len(_sdf))),
y=_sdf["rolling"],
mode="lines",
name=f"Stream {slabel} · {_c['title'][:20]}",
line=dict(color=color, width=2),
hovertemplate=f"Stream {slabel} msg %{{x}}: %{{y:.1f}}%
",
))
layout_ov = plotly_layout(220)
layout_ov["showlegend"] = True
layout_ov["legend"] = dict(orientation="h", y=1.08, font=dict(size=11, color="#f1f5f9"))
layout_ov["yaxis"]["range"] = [0, 100]
fig_overlay.update_layout(**layout_ov)
st.plotly_chart(fig_overlay, config={"displayModeBar": False})
st.markdown('
', unsafe_allow_html=True)
elif len(st.session_state.streams) > 1:
st.divider()
st.info("Add video IDs to your extra stream slots and click ▶ Start to enable multi-stream comparison.")
# -- AUTO REFRESH ---------------------------------------------
if auto_refresh:
time.sleep(refresh_rate)
st.rerun()