# -*- coding: utf-8 -*- import streamlit as st import redis import json import pandas as pd import plotly.graph_objects as go import plotly.express as px import time import re import sys import os import subprocess from datetime import datetime, timedelta from collections import defaultdict sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'backend')) from config import REDIS_HOST, REDIS_PORT, REDIS_DB st.set_page_config( page_title="LivePulse", layout="wide", page_icon="📡", initial_sidebar_state="expanded" ) r = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, db=REDIS_DB, decode_responses=True) TOPIC_LABELS = ["Appreciation", "Question", "Request/Feedback", "Promo", "Spam", "General", "MCQ Answer"] TOPIC_COLOR = { "Appreciation": "#f59e0b", "Question": "#3b82f6", "Request/Feedback": "#8b5cf6", "Promo": "#ec4899", "Spam": "#ef4444", "General": "#6b7280", "MCQ Answer": "#10b981" } SENT_COLORS = {"Positive": "#22c55e", "Neutral": "#eab308", "Negative": "#ef4444"} # ── JS: detect Streamlit's live theme and set data-livepulse attribute ── THEME_JS = """""" CSS = """""" st.markdown(THEME_JS, unsafe_allow_html=True) st.markdown(CSS, unsafe_allow_html=True) # ── HELPERS ────────────────────────────────────────────────── def extract_video_id(url_or_id): url_or_id = url_or_id.strip() match = re.search(r"(?:v=|/live/|youtu\.be/)([A-Za-z0-9_-]{11})", url_or_id) if match: return match.group(1) if re.match(r"^[A-Za-z0-9_-]{11}$", url_or_id): return url_or_id return url_or_id def update_config_video_id(video_id): config_path = os.path.join(os.path.dirname(__file__), '..', 'backend', 'config.py') with open(config_path, 'r') as f: content = f.read() content = re.sub(r'VIDEO_ID\s*=\s*".*?"', f'VIDEO_ID = "{video_id}"', content) with open(config_path, 'w') as f: f.write(content) def fetch_video_title(video_id): try: import urllib.request url = f"https://www.youtube.com/oembed?url=https://www.youtube.com/watch?v={video_id}&format=json" with urllib.request.urlopen(url, timeout=5) as resp: return json.loads(resp.read())["title"] except Exception: return None def clean_topic(val): if pd.isna(val) or str(val).strip() == "" or str(val).strip().lower() == "nan": return "General" return str(val).strip() def clean_sentiment(val): if str(val).strip() in ("Positive", "Negative", "Neutral"): return str(val).strip() return "Neutral" def plotly_layout(height=280): return dict( paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(0,0,0,0)", height=height, margin=dict(l=10, r=10, t=10, b=10), font=dict(family="Space Grotesk"), xaxis=dict(showgrid=False, zeroline=False, showline=False, tickfont=dict(size=11), title=None), yaxis=dict(showgrid=True, gridcolor="rgba(128,128,128,0.12)", zeroline=False, showline=False, tickfont=dict(size=11), title=None), showlegend=False, hoverlabel=dict(font_family="Space Grotesk", font_size=12), ) def csv_download(df_export, label, filename): csv = df_export.to_csv(index=False).encode("utf-8") st.download_button(label=f"⬇ {label}", data=csv, file_name=filename, mime="text/csv", key=filename) @st.cache_data(ttl=5, show_spinner=False) def load_stream_data(redis_key: str, limit: int | None = None): """Load and parse messages from a Redis key. Cached for 5s to avoid redundant reads.""" if limit: raws = r.lrange(redis_key, -limit, -1) else: raws = r.lrange(redis_key, 0, -1) data = [] for raw in raws: try: data.append(json.loads(raw)) except Exception: pass return data @st.cache_data(ttl=10, show_spinner=False) def compute_velocity(df_all_json: str, window: int = 20) -> dict: """ Compute sentiment velocity. Accepts JSON string for cache key compatibility. """ import json as _json sentiments = [m.get("sentiment", "Neutral") for m in _json.loads(df_all_json)] n = len(sentiments) if n < window * 2: return {"direction": "→", "delta": 0.0, "label": "Stable", "color": "#eab308"} recent = sentiments[-window:] prev = sentiments[-window*2:-window] r_pos = sum(1 for s in recent if s == "Positive") / window p_pos = sum(1 for s in prev if s == "Positive") / window delta = r_pos - p_pos if delta > 0.08: return {"direction": "↑", "delta": delta, "label": "Rising", "color": "#22c55e"} elif delta < -0.08: return {"direction": "↓", "delta": delta, "label": "Falling", "color": "#ef4444"} return {"direction": "→", "delta": delta, "label": "Stable", "color": "#eab308"} @st.cache_data(ttl=10, show_spinner=False) def build_heatmap_data(df_all_json: str, bucket_minutes: int = 1) -> pd.DataFrame: """ Bucket messages into time intervals. Accepts JSON string for cache key compatibility. """ import json as _json records = _json.loads(df_all_json) if not records: return pd.DataFrame() df_t = pd.DataFrame(records) if "time" not in df_t.columns: return pd.DataFrame() df_t["time"] = pd.to_datetime(df_t["time"], errors="coerce") df_t = df_t.dropna(subset=["time"]) if df_t.empty: return pd.DataFrame() df_t["bucket"] = df_t["time"].dt.floor(f"{bucket_minutes}min") grouped = df_t.groupby(["bucket", "sentiment"]).size().unstack(fill_value=0) for col in ["Positive", "Neutral", "Negative"]: if col not in grouped.columns: grouped[col] = 0 grouped = grouped.reset_index() grouped.columns.name = None return grouped[["bucket", "Positive", "Neutral", "Negative"]] def check_alert(df_all: pd.DataFrame, threshold: float = 0.4, window: int = 15) -> dict | None: """Return alert info if negative ratio in last `window` messages exceeds threshold.""" if len(df_all) < window: return None recent = df_all.iloc[-window:] neg_ratio = (recent["sentiment"] == "Negative").mean() if neg_ratio >= threshold: return { "neg_ratio": neg_ratio, "count": int((recent["sentiment"] == "Negative").sum()), "window": window, } return None @st.cache_data(ttl=10, show_spinner=False) def compute_engagement(all_data_json: str, window: int = 50) -> dict: """ Engagement score (0–100) = weighted combo of: - message rate (msgs per minute, last window) - positive ratio (last window) - question density (last window) """ import json as _j msgs = _j.loads(all_data_json) if not msgs: return {"score": 0, "rate": 0.0, "pos_ratio": 0.0, "q_density": 0.0, "grade": "—"} recent = msgs[-window:] n = len(recent) # Message rate: msgs per minute using timestamps rate = 0.0 try: t0 = datetime.fromisoformat(recent[0]["time"]) t1 = datetime.fromisoformat(recent[-1]["time"]) elapsed = max((t1 - t0).total_seconds() / 60, 0.1) rate = round(n / elapsed, 1) except Exception: rate = float(n) pos_ratio = sum(1 for m in recent if m.get("sentiment") == "Positive") / max(n, 1) q_density = sum(1 for m in recent if m.get("topic") == "Question") / max(n, 1) # Normalise rate: cap at 60 msgs/min = 100% rate_norm = min(rate / 60, 1.0) score = round((rate_norm * 0.4 + pos_ratio * 0.4 + q_density * 0.2) * 100) if score >= 70: grade = "🔥 High" elif score >= 40: grade = "⚡ Medium" else: grade = "💤 Low" return {"score": score, "rate": rate, "pos_ratio": pos_ratio, "q_density": q_density, "grade": grade} @st.cache_data(ttl=10, show_spinner=False) def compute_top_contributors(all_data_json: str, top_n: int = 10) -> list[dict]: """Return top N authors by message count with sentiment + topic breakdown.""" import json as _j from collections import Counter msgs = _j.loads(all_data_json) if not msgs: return [] TOPICS = ["Appreciation", "Question", "Request/Feedback", "Promo", "Spam", "General", "MCQ Answer"] author_data: dict[str, dict] = {} for m in msgs: a = m.get("author", "Unknown") if a not in author_data: author_data[a] = { "count": 0, "Positive": 0, "Neutral": 0, "Negative": 0, **{t: 0 for t in TOPICS}, } author_data[a]["count"] += 1 s = m.get("sentiment", "Neutral") if s in ("Positive", "Neutral", "Negative"): author_data[a][s] += 1 t = m.get("topic", "General") if t not in TOPICS: t = "General" author_data[a][t] += 1 sorted_authors = sorted(author_data.items(), key=lambda x: x[1]["count"], reverse=True)[:top_n] result = [] for author, d in sorted_authors: total = max(d["count"], 1) result.append({ "author": author, "count": d["count"], "pos_pct": round(d["Positive"] / total * 100), "neu_pct": round(d["Neutral"] / total * 100), "neg_pct": round(d["Negative"] / total * 100), "t_appr": round(d["Appreciation"] / total * 100), "t_ques": round(d["Question"] / total * 100), "t_rf": round(d["Request/Feedback"] / total * 100), "t_promo": round(d["Promo"] / total * 100), "t_spam": round(d["Spam"] / total * 100), "t_gen": round(d["General"] / total * 100), "t_mcq": round(d["MCQ Answer"] / total * 100), }) return result @st.cache_data(ttl=10, show_spinner=False) def compute_word_freq(all_data_json: str, sentiment_filter: str = "All", topic_filter: str = "All", top_n: int = 60) -> list[tuple[str, int]]: """Return top N (word, count) pairs after filtering stopwords.""" import json as _j from collections import Counter STOPWORDS = { "the","a","an","is","it","in","on","at","to","of","and","or","but","for", "with","this","that","are","was","be","as","by","from","have","has","had", "not","no","so","if","do","did","will","can","just","i","you","he","she", "we","they","my","your","his","her","our","their","me","him","us","them", "what","how","why","when","where","who","which","there","here","been", "would","could","should","may","might","shall","than","then","now","also", "more","very","too","up","out","about","into","over","after","before", "yaar","bhi","hai","hain","ho","kar","ke","ki","ka","ko","se","ne","ye", "vo","woh","aur","nahi","nhi","toh","toh","koi","kuch","ab","ek","hi", } msgs = _j.loads(all_data_json) words: list[str] = [] for m in msgs: if sentiment_filter != "All" and m.get("sentiment") != sentiment_filter: continue if topic_filter != "All" and m.get("topic") != topic_filter: continue text = re.sub(r"[^\w\s]", " ", m.get("text", "").lower()) for w in text.split(): if len(w) > 2 and w not in STOPWORDS and not w.isdigit(): words.append(w) return Counter(words).most_common(top_n) def check_spam_alert(df_all: pd.DataFrame, threshold: float = 0.3, window: int = 20) -> dict | None: """Return alert if spam ratio in last `window` messages exceeds threshold.""" if "topic" not in df_all.columns or len(df_all) < window: return None recent = df_all.iloc[-window:] spam_ratio = (recent["topic"] == "Spam").mean() if spam_ratio >= threshold: return { "spam_ratio": spam_ratio, "count": int((recent["topic"] == "Spam").sum()), "window": window, } return None @st.cache_data(ttl=10, show_spinner=False) def detect_repeat_spammers(all_data_json: str, window_sec: int = 15, min_repeats: int = 2) -> list[dict]: """ Detect users who send the same (or near-identical) message multiple times within `window_sec` seconds. Returns list of spam burst dicts sorted by repeat count descending. """ import json as _j import re as _re msgs = _j.loads(all_data_json) if not msgs: return [] def _normalize(t: str) -> str: return _re.sub(r"[^\w]", "", t.lower().strip()) bursts: dict[tuple, dict] = {} for m in msgs: author = m.get("author", "Unknown") text = m.get("text", "").strip() if not text: continue norm = _normalize(text) if len(norm) < 4: continue ts_str = m.get("time", "") try: ts = datetime.fromisoformat(ts_str) except Exception: continue key = (author, norm) if key not in bursts: bursts[key] = { "author": author, "text": text, "topic": m.get("topic", "General"), "sentiment": m.get("sentiment", "Neutral"), "timestamps": [], } bursts[key]["timestamps"].append(ts) results = [] for key, burst in bursts.items(): times = sorted(burst["timestamps"]) max_in_window = 1 for i in range(len(times)): count_in_window = sum( 1 for t in times[i:] if (t - times[i]).total_seconds() <= window_sec ) max_in_window = max(max_in_window, count_in_window) if max_in_window >= min_repeats: results.append({ "author": burst["author"], "text": burst["text"], "topic": burst["topic"], "sentiment": burst["sentiment"], "count": len(times), "max_burst": max_in_window, "first_seen": times[0].strftime("%H:%M:%S"), "last_seen": times[-1].strftime("%H:%M:%S"), }) return sorted(results, key=lambda x: x["max_burst"], reverse=True) # ── SESSION STATE INIT ──────────────────────────────────────── MAX_STREAMS = 5 STREAM_COLORS = ["#7c3aed", "#10b981", "#f59e0b", "#3b82f6", "#ec4899"] STREAM_NAMES = ["A", "B", "C", "D", "E"] if "pinned_messages" not in st.session_state: st.session_state.pinned_messages = [] if "alert_dismissed" not in st.session_state: st.session_state.alert_dismissed = False if "last_alert_count" not in st.session_state: st.session_state.last_alert_count = 0 if "last_view" not in st.session_state: st.session_state.last_view = "💬 Comments" # Multi-stream: list of dicts {video_id, redis_key, label, proc} if "streams" not in st.session_state: st.session_state.streams = [ {"video_id": "", "redis_key": "chat_messages", "label": "Stream A", "proc": None} ] # ── SIDEBAR ────────────────────────────────────────────────── with st.sidebar: st.markdown( '

' '

📡 LivePulse

' '

YouTube Chat Analytics

' '

', unsafe_allow_html=True ) st.divider() # ── Display Settings ── st.markdown('

Display Settings

', unsafe_allow_html=True) active_view = st.radio( "View", options=["💬 Comments", "📊 Stats & Info"], index=0, horizontal=True, key="active_view", ) refresh_rate = st.radio( "Refresh interval (s)", options=[10, 20, 30, 40, 50, 60], index=0, horizontal=True, key="refresh_rate", ) msg_limit = st.slider("Message window", 10, 400, 50, step=10) auto_refresh = st.toggle("Live auto-refresh", value=True) st.divider() # ── Alert Settings ── st.markdown('

Alert Settings

', unsafe_allow_html=True) alert_enabled = st.toggle("Negative spike alerts", value=True) alert_threshold = st.slider("Neg alert threshold (%)", 20, 80, 40) / 100 alert_window = st.slider("Alert window (msgs)", 5, 30, 15) spam_alert_on = st.toggle("Spam rate alerts", value=True) spam_threshold = st.slider("Spam alert threshold (%)", 10, 60, 30) / 100 st.divider() # ── Multi-Stream Scraper Control ── st.markdown('

Stream Control

', unsafe_allow_html=True) import importlib import config as _cfg importlib.reload(_cfg) # Pre-fill Stream A video_id from config on first load if st.session_state.streams[0]["video_id"] == "": st.session_state.streams[0]["video_id"] = _cfg.VIDEO_ID for idx, stream in enumerate(st.session_state.streams): color = STREAM_COLORS[idx] label = STREAM_NAMES[idx] st.markdown( f'

' f'Stream {label}

', unsafe_allow_html=True ) # Use widget key as the source of truth — never override with value= after first set vid_skey = f"vid_{idx}" rkey_skey = f"rkey_{idx}" if vid_skey not in st.session_state: st.session_state[vid_skey] = stream["video_id"] if rkey_skey not in st.session_state: st.session_state[rkey_skey] = stream["redis_key"] st.text_input("Video ID / URL", placeholder="e.g. eFSK2-QRB0A", key=vid_skey) st.text_input("Redis key", placeholder=f"chat_messages_{label.lower()}", key=rkey_skey) sc1, sc2 = st.columns(2) with sc1: if st.button("▶ Start", key=f"start_{idx}", width='stretch'): vid = extract_video_id(st.session_state[vid_skey]) rkey = st.session_state[rkey_skey].strip() or f"chat_messages_{label.lower()}" if vid: # Stop existing proc for this slot old_proc = st.session_state.streams[idx].get("proc") if old_proc and old_proc.poll() is None: old_proc.terminate() proc = subprocess.Popen( [sys.executable, "-m", "backend.scraper", "--video_id", vid, "--redis_key", rkey], cwd=os.path.abspath(os.path.join(os.path.dirname(__file__), "..")), stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, ) st.session_state.streams[idx]["proc"] = proc st.session_state.streams[idx]["video_id"] = vid st.session_state.streams[idx]["redis_key"] = rkey # Fetch and store title for ALL streams (used in header pills) _title = fetch_video_title(vid) st.session_state.streams[idx]["video_title"] = _title or vid if idx == 0: update_config_video_id(vid) r.set("video_title", _title) if _title else r.delete("video_title") st.session_state.alert_dismissed = False st.success(f"Stream {label} started → `{rkey}`") else: st.error("Invalid video ID") with sc2: if st.button("⏹ Stop", key=f"stop_{idx}", width='stretch'): proc = st.session_state.streams[idx].get("proc") if proc and proc.poll() is None: proc.terminate() st.session_state.streams[idx]["proc"] = None st.success(f"Stream {label} stopped") else: st.warning("Not running") proc = st.session_state.streams[idx].get("proc") running = proc is not None and proc.poll() is None dot_color = "#22c55e" if running else "#ef4444" status = "running" if running else "stopped" st.markdown(f'

● {status}

', unsafe_allow_html=True) st.divider() # ── Add / Remove stream slots ── add_col, rem_col = st.columns(2) with add_col: if len(st.session_state.streams) < MAX_STREAMS: if st.button("＋ Add stream", width='stretch'): n = len(st.session_state.streams) st.session_state.streams.append({ "video_id": "", "redis_key": f"chat_messages_{STREAM_NAMES[n].lower()}", "label": f"Stream {STREAM_NAMES[n]}", "proc": None, }) st.rerun() with rem_col: if len(st.session_state.streams) > 1: if st.button("－ Remove last", width='stretch'): removed = st.session_state.streams.pop() proc = removed.get("proc") if proc and proc.poll() is None: proc.terminate() st.rerun() st.divider() # ── Pinned Messages ── st.markdown('

Pinned Messages

', unsafe_allow_html=True) pin_count = len(st.session_state.pinned_messages) st.markdown(f'

{pin_count} message{"s" if pin_count != 1 else ""} pinned

', unsafe_allow_html=True) if pin_count > 0 and st.button("🗑 Clear pins", width='stretch'): st.session_state.pinned_messages = [] st.rerun() st.divider() # ── Download Data ── st.markdown('

Download Data

', unsafe_allow_html=True) _active_streams = [s for s in st.session_state.streams if s.get("redis_key")] if _active_streams: for _s in _active_streams: _rkey = _s["redis_key"] _slabel = _s["label"] _all_raws = r.lrange(_rkey, 0, -1) _dl_rows = [] for _raw in _all_raws: try: _dl_rows.append(json.loads(_raw)) except Exception: pass if _dl_rows: _dl_df = pd.DataFrame(_dl_rows) _ts = datetime.now().strftime("%Y%m%d_%H%M%S") _fname = f"livepulse_{_rkey}_{_ts}.csv" _csv_bytes = _dl_df.to_csv(index=False).encode("utf-8") st.download_button( label=f"⬇ {_slabel} ({len(_dl_rows)} msgs)", data=_csv_bytes, file_name=_fname, mime="text/csv", key=f"dl_{_rkey}", ) # PDF button removed — use the Export button on the Stats page instead else: st.markdown(f'

{_slabel}: no data yet

', unsafe_allow_html=True) else: st.markdown('

No active streams

', unsafe_allow_html=True) st.divider() # ── Export ── st.markdown('

Export

', unsafe_allow_html=True) st.markdown( '

' '\u26a0\ufe0f Go to Stats & Info tab first, then click.

', unsafe_allow_html=True ) import streamlit.components.v1 as _comp2 _comp2.html("""

""", height=75) st.divider() # ── Danger Zone ── st.markdown('

Danger Zone

', unsafe_allow_html=True) if st.button("🗑 Clear all data", width='stretch'): for s in st.session_state.streams: r.delete(s["redis_key"]) st.session_state.pinned_messages = [] st.session_state.alert_dismissed = False st.success("All stream data cleared.") st.divider() st.markdown( '

' 'Theme follows Streamlit settings
' '☰ → Settings → Theme' '

', unsafe_allow_html=True ) # ── PAGE HEADER ─────────────────────────────────────────────── _video_title = r.get("video_title") # Build subtitle showing ALL active stream titles _all_titles = [] for _si, _ss in enumerate(st.session_state.streams): _st = _ss.get("video_title") or _ss.get("video_id") _sk = _ss.get("redis_key", "") _sp = _ss.get("proc") _sr = _sp is not None and _sp.poll() is None if _st and (r.llen(_sk) > 0 or _sr): _all_titles.append(f"▶ {_st}") if _all_titles: _subtitle = " · ".join(_all_titles) else: _subtitle = "Real-time sentiment · topic classification · engagement insights" # Build active stream pills for header _active_stream_pills = "" for _hi, _hs in enumerate(st.session_state.streams): _hkey = _hs.get("redis_key", "") _hproc = _hs.get("proc") _hrunning = _hproc is not None and _hproc.poll() is None if r.llen(_hkey) > 0 or _hrunning: _hcolor = STREAM_COLORS[_hi] _hlabel = STREAM_NAMES[_hi] _htitle = ( _hs.get("video_title") or _hs.get("video_id") or _hkey or f"Stream {_hlabel}" ) _hdot = f'' _active_stream_pills += ( f'' f'{_hdot}Stream {_hlabel} · {str(_htitle)[:22]}' ) col_title, col_live = st.columns([7, 1]) with col_title: st.markdown( '

' '

YouTube Live Chat Analytics

' f'

{_subtitle}

' + (f'

{_active_stream_pills}

' if _active_stream_pills else '') + '

', unsafe_allow_html=True ) with col_live: st.markdown( '

' '' 'LIVE' '

', unsafe_allow_html=True ) st.divider() # ── PRIMARY STREAM SELECTOR ─────────────────────────────────── _streams_with_data = [ s for s in st.session_state.streams if r.llen(s.get("redis_key", "")) > 0 or (s.get("proc") is not None and s.get("proc").poll() is None) ] if len(_streams_with_data) > 1: _ps_options = {} for _pss in _streams_with_data: _psi_real = st.session_state.streams.index(_pss) _pst = _pss.get("video_title") or _pss.get("video_id") or _pss.get("redis_key") _psl = f"Stream {STREAM_NAMES[_psi_real]} — {str(_pst)[:35]}" _ps_options[_psl] = _pss["redis_key"] _ps_col, _ = st.columns([2, 3]) with _ps_col: _selected_primary_label = st.selectbox( "📊 Dashboard data source", list(_ps_options.keys()), key="primary_stream_select", help="Switch which stream's data powers the main dashboard stats and charts" ) _primary_key = _ps_options[_selected_primary_label] else: _primary_key = st.session_state.streams[0]["redis_key"] # ── DATA LOAD ───────────────────────────────────────────────── _current_len = r.llen(_primary_key) # Cap cumulative load at 50k — enough for accurate stats, avoids 100k+ slowdowns _CUMULATIVE_CAP = 50_000 all_data = load_stream_data(_primary_key, limit=_CUMULATIVE_CAP if _current_len > _CUMULATIVE_CAP else None) data = all_data[-msg_limit:] if len(all_data) > msg_limit else all_data if not all_data: st.markdown( '

' '

📭

' '

No messages yet

' '

Set a video ID in the sidebar, then click ▶ Start

' '

', unsafe_allow_html=True ) if auto_refresh: time.sleep(refresh_rate) st.rerun() st.stop() df = pd.DataFrame(data) all_df = pd.DataFrame(all_data) df["sentiment"] = df["sentiment"].apply(clean_sentiment) df["topic"] = df["topic"].apply(clean_topic) if "topic" in df.columns else "General" all_df["sentiment"] = all_df["sentiment"].apply(clean_sentiment) all_df["topic"] = all_df["topic"].apply(clean_topic) if "topic" in all_df.columns else "General" # ── VIEW ROUTING ────────────────────────────────────────────── # Read directly from session state to get the current widget value _active_view = st.session_state.get("active_view", "💬 Comments") _show_stats = _active_view == "📊 Stats & Info" _show_comments = _active_view == "💬 Comments" if _show_comments: st.markdown('

Live Chat Feed

', unsafe_allow_html=True) # ── PINNED MESSAGES (shown above the feed) ──────────────── if st.session_state.pinned_messages: st.markdown( '

📌 Pinned Messages' f'{len(st.session_state.pinned_messages)} pinned

', unsafe_allow_html=True ) for _pidx, _pmsg in enumerate(st.session_state.pinned_messages): _ps = _pmsg.get("sentiment", "Neutral") _ps_color = SENT_COLORS.get(_ps, "#6b7280") _pt_color = TOPIC_COLOR.get(_pmsg.get("topic", "General"), "#6b7280") _pcol1, _pcol2 = st.columns([10, 1]) with _pcol1: st.markdown( f'

' f'

📌 {_pmsg.get("author", "Unknown")}

' f'

{_pmsg.get("text", "")}

' f'

' f'Pinned' f'{_ps}' f'{_pmsg.get("topic","General")}' f'{_pmsg.get("time","")[:19]}' f'

', unsafe_allow_html=True ) with _pcol2: if st.button("\u2715", key=f"unpin_top_{_pidx}", width='stretch'): st.session_state.pinned_messages.pop(_pidx) st.rerun() st.divider() # Build stream options _feed_stream_options = {} for _fs in st.session_state.streams: _fkey = _fs.get("redis_key", "") _flen = r.llen(_fkey) if _flen > 0: _fidx = st.session_state.streams.index(_fs) _flabel = f"Stream {STREAM_NAMES[_fidx]} — {_fs.get('video_id', _fkey)[:20]}" _feed_stream_options[_flabel] = _fkey _cf0, _cf1, _cf2, _cf3, _cf4 = st.columns([1, 1, 1, 1, 2]) with _cf0: if len(_feed_stream_options) > 1: _selected_stream_label = st.selectbox( "Stream", list(_feed_stream_options.keys()), key="feed_stream_select" ) _feed_key = _feed_stream_options[_selected_stream_label] else: _feed_key = st.session_state.streams[0]["redis_key"] if _feed_stream_options: st.markdown( f'

' f'{list(_feed_stream_options.keys())[0]}

', unsafe_allow_html=True ) if _feed_key == st.session_state.streams[0]["redis_key"]: _feed_df = df.copy() else: _feed_raw = load_stream_data(_feed_key, limit=msg_limit) _feed_df = pd.DataFrame(_feed_raw) if _feed_raw else pd.DataFrame() if not _feed_df.empty: _feed_df["sentiment"] = _feed_df["sentiment"].apply(clean_sentiment) _feed_df["topic"] = _feed_df["topic"].apply(clean_topic) if "topic" in _feed_df.columns else "General" with _cf1: _sentiment_filter = st.selectbox("Sentiment", ["All", "Positive", "Neutral", "Negative"]) with _cf2: _topic_filter = st.selectbox("Topic", ["All"] + TOPIC_LABELS) with _cf3: _all_action_types = [ "General Appreciation", "Testimonials", "Faculty Request", "Faculty Feedback", "Content requests", "Content Feedback", "Academic / Lecture / Concept Doubts", "Academic requests", "Study Materials, Deliverables & Learning Resources", "Access & Support", "Batch details / structure / offerings (incl faculty)", "Schedule & logistics (Batch)", "Information- Exam", "Information- Post Exam", "Eligibility & audience fit - Can I take this?", "Suitability & Sufficiency (Is this enough?)", "Guidance- What should I take/do?", "Language Request", "Language medium", "Pricing, discounts, scholarships, offer validity", "Fees + Financial Queries", "Product/feature requests (non-content)", "Offline expansion & event-city requests", "Offers + Events", "General Feedback", "Others", "N/A", ] _action_type_filter = st.selectbox("Action Type", ["All"] + _all_action_types) with _cf4: _search_term = st.text_input("Search messages", placeholder="Filter by keyword...") _filtered = _feed_df.copy() if not _feed_df.empty else pd.DataFrame() _any_filter = ( _sentiment_filter != "All" or _topic_filter != "All" or _action_type_filter != "All" or bool(_search_term) ) if _any_filter: _full_raw = load_stream_data(_feed_key) if _full_raw: _full_df = pd.DataFrame(_full_raw) _full_df["sentiment"] = _full_df["sentiment"].apply(clean_sentiment) _full_df["topic"] = _full_df["topic"].apply(clean_topic) if "topic" in _full_df.columns else "General" _filtered = _full_df.copy() if _sentiment_filter != "All": _filtered = _filtered[_filtered["sentiment"] == _sentiment_filter] if _topic_filter != "All": _filtered = _filtered[_filtered["topic"] == _topic_filter] if _action_type_filter != "All": if "action_type" in _filtered.columns: _filtered = _filtered[_filtered["action_type"] == _action_type_filter] if _search_term: _filtered = _filtered[_filtered["text"].str.contains(_search_term, case=False, na=False)] if len(_filtered) > msg_limit: _filtered = _filtered.iloc[-msg_limit:] else: _filtered = pd.DataFrame() _total_scanned = len(_full_raw) if _full_raw else 0 else: if not _filtered.empty: if _sentiment_filter != "All": _filtered = _filtered[_filtered["sentiment"] == _sentiment_filter] if _topic_filter != "All": _filtered = _filtered[_filtered["topic"] == _topic_filter] if _action_type_filter != "All": if "action_type" in _filtered.columns: _filtered = _filtered[_filtered["action_type"] == _action_type_filter] if _search_term: _filtered = _filtered[_filtered["text"].str.contains(_search_term, case=False, na=False)] _total_scanned = len(_feed_df) _feed_hdr, _feed_dl = st.columns([3, 1]) with _feed_hdr: if _any_filter: st.markdown( f'

' f'Showing {len(_filtered)} matching messages (scanned all {_total_scanned}, capped at {msg_limit})

', unsafe_allow_html=True ) else: st.markdown( f'

' f'Showing {len(_filtered)} of {len(_feed_df)} messages

', unsafe_allow_html=True ) with _feed_dl: if not _filtered.empty: _export_cols = [c for c in ["author", "text", "sentiment", "confidence", "topic", "time"] if c in _filtered.columns] csv_download(_filtered[_export_cols], "Download Feed CSV", "chat_feed.csv") _SENT_ICON = {"Positive": "🟢", "Negative": "🔴", "Neutral": "🟡"} _pinned_texts = {m.get("text", "") for m in st.session_state.pinned_messages} for _i, (_, _row) in enumerate(_filtered.iloc[::-1].iterrows()): _s = _row.get("sentiment", "Neutral") _conf_pct = int(_row.get("confidence", 0) * 100) _topic = clean_topic(_row.get("topic", "General")) _t_color = TOPIC_COLOR.get(_topic, "#6b7280") _s_color = SENT_COLORS.get(_s, "#6b7280") _s_icon = _SENT_ICON.get(_s, "⚪") _conf_color = "#22c55e" if _conf_pct >= 70 else "#eab308" if _conf_pct >= 40 else "#ef4444" _msg_text = _row.get("text", "") import re as _re2 _display_text = _re2.sub(r":[a-zA-Z0-9_\-]+:", "", _msg_text).strip() or _msg_text _is_pinned = _msg_text in _pinned_texts _action_type = _row.get("action_type", "N/A") or "N/A" _card_class = f"chat-card chat-{_s.lower()}" + (" chat-pinned" if _is_pinned else "") _msg_col, _pin_col = st.columns([11, 1]) with _msg_col: _ab = ( f'🏷 {_action_type}' if _action_type not in ("N/A", "", None) else "" ) st.markdown( f'

' f'

{_s_icon} {_row.get("author", "Unknown")}' + (' 📌' if _is_pinned else '') + f'

' f'

{_display_text}

' f'

' f'{_s}' f'Confidence: {_conf_pct}%' f'{_topic}' f'{_ab}' f'

', unsafe_allow_html=True ) with _pin_col: if _is_pinned: if st.button("📌", key=f"unpin_feed_{_i}", help="Unpin this message"): st.session_state.pinned_messages = [ m for m in st.session_state.pinned_messages if m.get("text") != _msg_text ] st.rerun() else: if st.button("📍", key=f"pin_{_i}", help="Pin this message"): _msg_dict = _row.to_dict() if _msg_dict not in st.session_state.pinned_messages: st.session_state.pinned_messages.append(_msg_dict) st.rerun() if auto_refresh: time.sleep(refresh_rate) st.rerun() st.stop() # ── ALERT BANNERS (Stats view only) ─────────────────────────── if alert_enabled: alert = check_alert(all_df, threshold=alert_threshold, window=alert_window) total_now = len(all_df) if total_now != st.session_state.last_alert_count: st.session_state.last_alert_count = total_now if alert: st.session_state.alert_dismissed = False if alert and not st.session_state.alert_dismissed: a1, a2 = st.columns([8, 1]) with a1: st.markdown( f'', unsafe_allow_html=True ) with a2: if st.button("✕ Dismiss", key="dismiss_alert"): st.session_state.alert_dismissed = True st.rerun() if spam_alert_on: spam_alert = check_spam_alert(all_df, threshold=spam_threshold, window=alert_window) if spam_alert and not st.session_state.get("spam_dismissed", False): s1, s2 = st.columns([8, 1]) with s1: st.markdown( f'

' f'🛡️' f'

' f'

Spam surge detected — {spam_alert["spam_ratio"]*100:.0f}% spam in last {spam_alert["window"]} messages

' f'

{spam_alert["count"]} spam messages detected. Chat may be under flood attack.

' f'

', unsafe_allow_html=True ) with s2: if st.button("✕", key="dismiss_spam"): st.session_state.spam_dismissed = True st.rerun() elif not spam_alert: st.session_state.spam_dismissed = False # ── CUMULATIVE STATS ────────────────────────────────────────── all_counts = all_df["sentiment"].value_counts().to_dict() c_pos = all_counts.get("Positive", 0) c_neu = all_counts.get("Neutral", 0) c_neg = all_counts.get("Negative", 0) c_total = max(c_pos + c_neu + c_neg, 1) # Sentiment velocity velocity = compute_velocity(json.dumps([{"sentiment": m.get("sentiment","Neutral")} for m in all_data])) st.markdown( '

Cumulative SentimentAll Time

', unsafe_allow_html=True ) v1, v2, v3, v4, v5 = st.columns([1, 1, 1, 1, 1]) with v1: st.markdown( f'

' f'

{c_pos}

Positive

{c_pos/c_total*100:.1f}% of total

', unsafe_allow_html=True ) with v2: st.markdown( f'

' f'

{c_neu}

Neutral

{c_neu/c_total*100:.1f}% of total

', unsafe_allow_html=True ) with v3: st.markdown( f'

' f'

{c_neg}

Negative

{c_neg/c_total*100:.1f}% of total

', unsafe_allow_html=True ) with v4: st.markdown( f'

' f'

{c_total}

Total

all time

', unsafe_allow_html=True ) with v5: # Sentiment velocity card vc = velocity["color"] st.markdown( f'

' f'

{velocity["direction"]}

' f'

{velocity["label"]}

' f'

Sentiment Velocity
' f'{velocity["delta"]:+.0%} pos shift

' f'

', unsafe_allow_html=True ) # ── CUMULATIVE TOPIC ────────────────────────────────────────── st.divider() st.markdown( '

Cumulative TopicAll Time

', unsafe_allow_html=True ) _topic_colors_list = ["#f59e0b", "#3b82f6", "#ec4899", "#ef4444", "#6b7280", "#10b981"] _ct_cols = st.columns(len(TOPIC_LABELS)) for _ci, (_lbl, _clr) in enumerate(zip(TOPIC_LABELS, _topic_colors_list)): _cnt = int((all_df["topic"] == _lbl).sum()) if "topic" in all_df.columns else 0 _pct = _cnt / max(c_total, 1) * 100 with _ct_cols[_ci]: st.markdown( f'

' f'

{_cnt}

' f'

{_lbl}

' f'

{_pct:.1f}% of msgs

', unsafe_allow_html=True ) # ── ENGAGEMENT SCORE (moved here — after topic, before window) ── _eng_json = json.dumps([{"sentiment": m.get("sentiment","Neutral"), "topic": m.get("topic","General"), "time": m.get("time","")} for m in all_data]) eng = compute_engagement(_eng_json) st.divider() st.markdown( '

Engagement ScoreLive

', unsafe_allow_html=True ) ec1, ec2, ec3, ec4 = st.columns([2, 1, 1, 1]) with ec1: score_color = "#22c55e" if eng["score"] >= 70 else "#eab308" if eng["score"] >= 40 else "#ef4444" bar_w = eng["score"] st.markdown( f'

' f'

{eng["score"]}

' f'

Engagement Score / 100 \u2014 {eng["grade"]}

' f'

Msg rate {eng["rate"]}/min

' f'

Positive {eng["pos_ratio"]*100:.0f}%

' f'

Questions {eng["q_density"]*100:.0f}%

' f'

', unsafe_allow_html=True ) with ec2: st.metric("Msgs/min", f"{eng['rate']:.1f}") with ec3: st.metric("Positive ratio", f"{eng['pos_ratio']*100:.0f}%") with ec4: st.metric("Question density", f"{eng['q_density']*100:.0f}%") # ── WINDOW METRICS ──────────────────────────────────────────── st.divider() counts = df["sentiment"].value_counts().to_dict() pos = counts.get("Positive", 0) neu = counts.get("Neutral", 0) neg = counts.get("Negative", 0) total = max(pos + neu + neg, 1) st.markdown( f'

Window SnapshotLast {msg_limit} msgs

', unsafe_allow_html=True ) c1, c2, c3, c4 = st.columns(4) c1.metric("Messages", total) c2.metric("Positive", pos, f"{pos/total*100:.1f}%") c3.metric("Neutral", neu, f"{neu/total*100:.1f}%") c4.metric("Negative", neg, f"{neg/total*100:.1f}%") # ── SENTIMENT + TOPIC CHARTS (ALL TIME) ────────────────────── st.divider() col_s1, col_s2, col_t1, col_t2 = st.columns(4) with col_s1: st.markdown('

', unsafe_allow_html=True) st.markdown('

Sentiment Distribution

All-time message count by sentiment class

', unsafe_allow_html=True) fig_bar = go.Figure(go.Bar( x=["Positive", "Neutral", "Negative"], y=[c_pos, c_neu, c_neg], marker_color=["#22c55e", "#eab308", "#ef4444"], marker_line_width=0, text=[c_pos, c_neu, c_neg], textposition="outside", textfont=dict(size=12), hovertemplate="%{x}
Count: %{y}", )) fig_bar.update_layout(**plotly_layout(260)) st.plotly_chart(fig_bar, width='stretch', config={"displayModeBar": False}) bar_hdr, bar_dl = st.columns([1, 1]) with bar_hdr: show_bar_data = st.checkbox("View data", key="show_bar") with bar_dl: bar_df = pd.DataFrame({"Sentiment": ["Positive", "Neutral", "Negative"], "Count": [c_pos, c_neu, c_neg]}) csv_download(bar_df, "Download CSV", "sentiment_distribution.csv") if show_bar_data: st.dataframe(bar_df, width='stretch', hide_index=True) st.markdown('

', unsafe_allow_html=True) with col_s2: st.markdown('

', unsafe_allow_html=True) st.markdown('

Sentiment Donut

All-time proportional share per class

', unsafe_allow_html=True) fig_pie = go.Figure(go.Pie( labels=["Positive", "Neutral", "Negative"], values=[c_pos, c_neu, c_neg], marker_colors=["#22c55e", "#eab308", "#ef4444"], hole=0.58, textinfo="percent", hovertemplate="%{label}
%{value} messages (%{percent})", )) fig_pie.update_layout( **{**plotly_layout(260), "showlegend": True, "legend": dict(orientation="h", y=-0.08, font=dict(size=11, color="#f1f5f9"))} ) st.plotly_chart(fig_pie, width='stretch', config={"displayModeBar": False}) pie_hdr, pie_dl = st.columns([1, 1]) with pie_hdr: show_pie_data = st.checkbox("View data", key="show_pie") with pie_dl: pie_df = pd.DataFrame({ "Sentiment": ["Positive", "Neutral", "Negative"], "Count": [c_pos, c_neu, c_neg], "Percentage": [f"{c_pos/c_total*100:.1f}%", f"{c_neu/c_total*100:.1f}%", f"{c_neg/c_total*100:.1f}%"] }) csv_download(pie_df, "Download CSV", "sentiment_breakdown.csv") if show_pie_data: st.dataframe(pie_df, width='stretch', hide_index=True) st.markdown('

', unsafe_allow_html=True) with col_t1: st.markdown('

', unsafe_allow_html=True) st.markdown('

Topic Distribution

Message count by topic class

', unsafe_allow_html=True) _tc_vals = [int((all_df["topic"] == l).sum()) if "topic" in all_df.columns else 0 for l in TOPIC_LABELS] _tc_colors = ["#f59e0b", "#3b82f6", "#ec4899", "#ef4444", "#6b7280", "#10b981"] fig_tbar = go.Figure(go.Bar( x=TOPIC_LABELS, y=_tc_vals, marker_color=_tc_colors, marker_line_width=0, text=_tc_vals, textposition="outside", textfont=dict(size=11), hovertemplate="%{x}
Count: %{y}", )) _tbar_layout = plotly_layout(260) _tbar_layout["xaxis"]["tickfont"] = dict(size=9) fig_tbar.update_layout(**_tbar_layout) st.plotly_chart(fig_tbar, width='stretch', config={"displayModeBar": False}) st.markdown('

', unsafe_allow_html=True) with col_t2: st.markdown('

', unsafe_allow_html=True) st.markdown('

Topic Donut

Proportional share per topic

', unsafe_allow_html=True) fig_tpie = go.Figure(go.Pie( labels=TOPIC_LABELS, values=_tc_vals, marker_colors=_tc_colors, hole=0.58, textinfo="percent", hovertemplate="%{label}
%{value} messages (%{percent})", )) fig_tpie.update_layout( **{**plotly_layout(260), "showlegend": True, "legend": dict(orientation="h", y=-0.08, font=dict(size=10, color="#f1f5f9"))} ) st.plotly_chart(fig_tpie, width='stretch', config={"displayModeBar": False}) st.markdown('

', unsafe_allow_html=True) # ── SENTIMENT HEATMAP OVER TIME ─────────────────────────────── st.divider() st.markdown( '

Sentiment HeatmapOver Time

', unsafe_allow_html=True ) heatmap_data = build_heatmap_data(json.dumps([{"time": m.get("time",""), "sentiment": m.get("sentiment","Neutral")} for m in all_data]), bucket_minutes=1) if not heatmap_data.empty: st.markdown('

', unsafe_allow_html=True) st.markdown('

Sentiment Over Time

Message volume per sentiment per minute bucket

', unsafe_allow_html=True) fig_heat = go.Figure() for sent, color in [("Positive", "#22c55e"), ("Neutral", "#eab308"), ("Negative", "#ef4444")]: fig_heat.add_trace(go.Scatter( x=heatmap_data["bucket"], y=heatmap_data[sent], name=sent, mode="lines+markers", line=dict(color=color, width=2), marker=dict(size=4), hovertemplate=f"{sent}
%{{x}}
Count: %{{y}}", )) layout = plotly_layout(220) layout["showlegend"] = True layout["legend"] = dict(orientation="h", y=1.08, font=dict(size=11)) layout["xaxis"]["tickformat"] = "%H:%M" fig_heat.update_layout(**layout) st.plotly_chart(fig_heat, width='stretch', config={"displayModeBar": False}) heat_hdr, heat_dl = st.columns([1, 1]) with heat_hdr: show_heat_data = st.checkbox("View data", key="show_heat") with heat_dl: csv_download(heatmap_data.rename(columns={"bucket": "time_bucket"}), "Download CSV", "sentiment_heatmap.csv") if show_heat_data: st.dataframe(heatmap_data.rename(columns={"bucket": "time_bucket"}), width='stretch', hide_index=True) st.markdown('

', unsafe_allow_html=True) else: st.info("Not enough timestamped data for heatmap yet.") # ── TOPIC DISTRIBUTION ──────────────────────────────────────── st.divider() st.markdown( '

Topic DistributionAll Time

', unsafe_allow_html=True ) topic_counts = { label: int((all_df["topic"] == label).sum()) for label in TOPIC_LABELS } pills = '

' for label in TOPIC_LABELS: color = TOPIC_COLOR[label] count = topic_counts[label] pills += ( f'

' f'

{count}

' f'

{label}

' f'

' ) pills += '

' st.markdown(pills, unsafe_allow_html=True) st.markdown('

', unsafe_allow_html=True) st.markdown('

Topic Breakdown

All-time message count per topic category

', unsafe_allow_html=True) fig_topic = go.Figure(go.Bar( x=TOPIC_LABELS, y=[topic_counts[l] for l in TOPIC_LABELS], marker_color=[TOPIC_COLOR[l] for l in TOPIC_LABELS], marker_line_width=0, text=[topic_counts[l] for l in TOPIC_LABELS], textposition="outside", textfont=dict(size=11), hovertemplate="%{x}
Count: %{y}", )) fig_topic.update_layout(**plotly_layout(250)) st.plotly_chart(fig_topic, width='stretch', config={"displayModeBar": False}) topic_hdr, topic_dl = st.columns([1, 1]) with topic_hdr: show_topic_data = st.checkbox("View data", key="show_topic") with topic_dl: topic_df = pd.DataFrame({"Topic": TOPIC_LABELS, "Count": [topic_counts[l] for l in TOPIC_LABELS]}) csv_download(topic_df, "Download CSV", "topic_distribution.csv") if show_topic_data: st.dataframe(topic_df, width='stretch', hide_index=True) st.markdown('

', unsafe_allow_html=True) # ── Topic Sentiment breakdown ────────────────────────────────── st.markdown('

', unsafe_allow_html=True) st.markdown('

Sentiment by Topic

% positive / neutral / negative within each topic category

', unsafe_allow_html=True) _topic_sent_data = [] for _lbl in TOPIC_LABELS: _mask = all_df["topic"] == _lbl _total = int(_mask.sum()) if _total == 0: _topic_sent_data.append({"topic": _lbl, "pos": 0, "neu": 0, "neg": 0}) continue _sub = all_df[_mask] _topic_sent_data.append({ "topic": _lbl, "pos": round((_sub["sentiment"] == "Positive").sum() / _total * 100), "neu": round((_sub["sentiment"] == "Neutral").sum() / _total * 100), "neg": round((_sub["sentiment"] == "Negative").sum() / _total * 100), }) fig_ts = go.Figure() for _sk, _sl, _sc in [("neg", "Neg", "#ef4444"), ("neu", "Neu", "#eab308"), ("pos", "Pos", "#22c55e")]: fig_ts.add_trace(go.Bar( y=[d["topic"] for d in _topic_sent_data], x=[d[_sk] for d in _topic_sent_data], name=_sl, orientation="h", marker_color=_sc, hovertemplate="%{y}
" + _sl + ": %{x}%", )) _layout_ts = plotly_layout(260) _layout_ts["barmode"] = "stack" _layout_ts["showlegend"] = True _layout_ts["legend"] = dict(orientation="h", y=1.08, x=0.35, font=dict(size=11)) _layout_ts["xaxis"]["range"] = [0, 100] _layout_ts["xaxis"]["ticksuffix"] = "%" _layout_ts["yaxis"]["autorange"] = "reversed" fig_ts.update_layout(**_layout_ts) st.plotly_chart(fig_ts, width='stretch', config={"displayModeBar": False}) st.markdown('

', unsafe_allow_html=True) # ── ACTION TYPE CHARTS ──────────────────────────────────────── st.divider() st.markdown( '

Action Type AnalysisLast 100 msgs

', unsafe_allow_html=True ) # Category groupings _QUESTION_ACTIONS = [ "Access & Support", "Academic / Lecture / Concept Doubts", "Study Materials, Deliverables & Learning Resources", "Batch details / structure / offerings (incl faculty)", "Schedule & logistics (Batch)", "Guidance- What should I take/do?", "Suitability & Sufficiency (Is this enough?)", "Eligibility & audience fit - Can I take this?", "Information- Exam", "Information- Post Exam", ] _REQUEST_ACTIONS = [ "Content requests", "Content Feedback", "Faculty Request", "Faculty Feedback", "Academic requests", "Language Request", "Language medium", "Product/feature requests (non-content)", "Offline expansion & event-city requests", "General Feedback", "Others", ] _SHORT_ACTION = { "Access & Support": "Access & Support", "Academic / Lecture / Concept Doubts": "Academic Doubts", "Study Materials, Deliverables & Learning Resources": "Study Materials & Learning Resources", "Batch details / structure / offerings (incl faculty)": "Batch Details & Offerings", "Schedule & logistics (Batch)": "Batch Schedule & Logistics", "Guidance- What should I take/do?": "Guidance (What Should I Take/Do?)", "Suitability & Sufficiency (Is this enough?)": "Suitability & Sufficiency (Is This Enough?)", "Eligibility & audience fit - Can I take this?": "Eligibility (Can I Take This?)", "Information- Exam": "Exam Information", "Information- Post Exam": "Post Exam Information", "Content requests": "Content requests", "Content Feedback": "Content Feedback", "Faculty Request": "Faculty Request", "Faculty Feedback": "Faculty Feedback", "Academic requests": "Academic requests", "Language Request": "Language Request", "Language medium": "Language Medium", "Product/feature requests (non-content)": "Non Content Product Requests", "Offline expansion & event-city requests": "Offline Expansion & Event Requests", "General Feedback": "General Feedback", "Others": "Others", } # Compute counts from last 100 messages _at_counts: dict[str, int] = {} if "action_type" in all_df.columns: for _at in _QUESTION_ACTIONS + _REQUEST_ACTIONS: _at_counts[_at] = int((all_df.tail(100)["action_type"] == _at).sum()) else: _at_counts = {_at: 0 for _at in _QUESTION_ACTIONS + _REQUEST_ACTIONS} _q_data = {k: _at_counts.get(k, 0) for k in _QUESTION_ACTIONS if _at_counts.get(k, 0) > 0} _rf_data = {k: _at_counts.get(k, 0) for k in _REQUEST_ACTIONS if _at_counts.get(k, 0) > 0} _q_total = sum(_q_data.values()) _rf_total = sum(_rf_data.values()) _at_col1, _at_col2 = st.columns(2) with _at_col1: st.markdown( f'

Type of Questions

' f'

({_q_total} comments)

', unsafe_allow_html=True ) if _q_data: _q_sorted = sorted(_q_data.items(), key=lambda x: x[1], reverse=True) _q_labels = [_SHORT_ACTION.get(k, k) for k, _ in _q_sorted] _q_vals = [v for _, v in _q_sorted] fig_q = go.Figure(go.Bar( x=_q_labels, y=_q_vals, marker_color="#4a90d9", marker_line_width=0, text=_q_vals, textposition="outside", textfont=dict(size=11, color="#fff"), hovertemplate="%{x}
Comments: %{y}", )) fig_q.update_layout(**plotly_layout(280)) st.plotly_chart(fig_q, width='stretch', config={"displayModeBar": False}) else: st.markdown('

No data yet

', unsafe_allow_html=True) st.markdown('

', unsafe_allow_html=True) with _at_col2: st.markdown( f'

Type of Requests & Feedback

' f'

({_rf_total} comments)

', unsafe_allow_html=True ) if _rf_data: _rf_sorted = sorted(_rf_data.items(), key=lambda x: x[1], reverse=True) _rf_labels = [_SHORT_ACTION.get(k, k) for k, _ in _rf_sorted] _rf_vals = [v for _, v in _rf_sorted] fig_rf = go.Figure(go.Bar( x=_rf_labels, y=_rf_vals, marker_color="#f5a623", marker_line_width=0, text=_rf_vals, textposition="outside", textfont=dict(size=11, color="#fff"), hovertemplate="%{x}
Comments: %{y}", )) fig_rf.update_layout(**plotly_layout(280)) st.plotly_chart(fig_rf, width='stretch', config={"displayModeBar": False}) else: st.markdown('

No data yet

', unsafe_allow_html=True) st.markdown('

', unsafe_allow_html=True) # Top-5 horizontal bar panels _top5_col1, _top5_col2 = st.columns(2) def _hbar_rows_html(data: dict, color: str, max_val: int) -> str: html = "" for cat, count in sorted(data.items(), key=lambda x: x[1], reverse=True)[:5]: pct = round(count / max(max_val, 1) * 100) label = _SHORT_ACTION.get(cat, cat) html += ( f'

' f'

{label}

' f'

{pct}%

' f'

' ) return html with _top5_col1: st.markdown( '

' '

Top 5 Questions Students Ask

' '

Type of action count for Questions across tagged videos.

', unsafe_allow_html=True ) if _q_data: st.markdown(_hbar_rows_html(_q_data, "#f87171", max(_q_data.values(), default=1)), unsafe_allow_html=True) else: st.markdown('

No data yet

', unsafe_allow_html=True) st.markdown('

', unsafe_allow_html=True) with _top5_col2: st.markdown( '

' '

Top 5 Types of Requests & Feedback Students Give

' '

Type of action count for Request/Feedback across tagged videos.

', unsafe_allow_html=True ) if _rf_data: st.markdown(_hbar_rows_html(_rf_data, "#f87171", max(_rf_data.values(), default=1)), unsafe_allow_html=True) else: st.markdown('

No data yet

', unsafe_allow_html=True) st.markdown('

', unsafe_allow_html=True) # ── TOP CONTRIBUTORS ────────────────────────────────────────── st.divider() st.markdown( '

Top ContributorsAll Time

', unsafe_allow_html=True ) _contrib_json = json.dumps([{"author": m.get("author",""), "sentiment": m.get("sentiment","Neutral"), "topic": m.get("topic","General")} for m in all_data]) contributors = compute_top_contributors(_contrib_json) if contributors: max_count = contributors[0]["count"] rank_icons = {1: "🥇", 2: "🥈", 3: "🥉"} rank_classes = {1: "gold", 2: "silver", 3: "bronze"} for rank, c in enumerate(contributors, 1): bar_pct = int(c["count"] / max(max_count, 1) * 100) rank_cls = rank_classes.get(rank, "") rank_icon = rank_icons.get(rank, f"#{rank}") author = c["author"] count = c["count"] pos_pct = c["pos_pct"] neu_pct = c["neu_pct"] neg_pct = c["neg_pct"] html = ( f'

' f'

{rank_icon}

' f'

{author}

' f'

' f'' f'' f'' f'

' f'

{count} msgs

' f'

' ) st.markdown(html, unsafe_allow_html=True) # ── Combined Sentiment + Topic dual-bar chart ────────────── st.markdown('

', unsafe_allow_html=True) st.markdown( '

Sentiment & Topic Breakdown — Top Contributors

' '

Top bar = sentiment (Neg/Neu/Pos) · Bottom bar = topic mix · right = message count

', unsafe_allow_html=True ) # Each user occupies 2 numeric slots: sentiment at i*2+0.3, topic at i*2-0.3 # Tick label sits at i*2 (midpoint) showing the name once n = len(contributors) y_sent_num = [i * 2 + 0.3 for i in range(n)] y_topic_num = [i * 2 - 0.3 for i in range(n)] tick_vals = [i * 2 for i in range(n)] tick_text = [c["author"][:22] for c in contributors] fig_combo = go.Figure() # ── Sentiment traces ── for key, label, color in [ ("neg_pct", "Neg", "#ef4444"), ("neu_pct", "Neu", "#eab308"), ("pos_pct", "Pos", "#22c55e"), ]: fig_combo.add_trace(go.Bar( name=label, y=y_sent_num, x=[c[key] for c in contributors], orientation="h", marker_color=color, legendgroup="sent", legendgrouptitle_text="Sentiment" if key == "neg_pct" else None, width=0.5, hovertemplate="" + label + ": %{x}%", )) # ── Topic traces ── for key, label, color in [ ("t_appr", "Appreciation", "#f59e0b"), ("t_ques", "Question", "#3b82f6"), ("t_rf", "Request/Feedback","#8b5cf6"), ("t_promo", "Promo", "#ec4899"), ("t_spam", "Spam", "#ef4444"), ("t_gen", "General", "#6b7280"), ("t_mcq", "MCQ Answer", "#10b981"), ]: fig_combo.add_trace(go.Bar( name=label, y=y_topic_num, x=[c[key] for c in contributors], orientation="h", marker_color=color, legendgroup="topic", legendgrouptitle_text="Topic" if key == "t_appr" else None, width=0.5, hovertemplate="" + label + ": %{x}%", )) # ── Message count annotations (right of sentiment bar) ── annotations = [] for i, c in enumerate(contributors): annotations.append(dict( x=102, y=y_sent_num[i], text=f"{c['count']} msgs", showarrow=False, xanchor="left", font=dict(size=10, color="#94a3b8"), xref="x", yref="y", )) chart_h = max(400, n * 56) layout_combo = plotly_layout(chart_h) layout_combo["barmode"] = "stack" layout_combo["bargap"] = 0.1 layout_combo["showlegend"] = True layout_combo["legend"] = dict( orientation="h", y=1.0, x=0, font=dict(size=12, color="#f1f5f9"), title_font=dict(size=12, color="#a78bfa"), groupclick="toggleitem", yanchor="bottom", xanchor="left", bgcolor="rgba(0,0,0,0)", ) layout_combo["margin"] = dict(l=10, r=80, t=80, b=10) layout_combo["xaxis"]["range"] = [0, 115] layout_combo["xaxis"]["ticksuffix"] = "%" layout_combo["yaxis"] = dict( tickvals=tick_vals, ticktext=tick_text, tickfont=dict(size=10), autorange="reversed", showgrid=False, zeroline=False, showline=False, ) layout_combo["annotations"] = annotations fig_combo.update_layout(**layout_combo) st.plotly_chart(fig_combo, width='stretch', config={"displayModeBar": False}) st.markdown('

', unsafe_allow_html=True) contrib_df = pd.DataFrame(contributors) csv_download(contrib_df, "Download CSV", "top_contributors.csv") else: st.info("Not enough data yet.") # ── REPEAT SPAMMERS ─────────────────────────────────────────── st.divider() st.markdown( '

Repeat SpammersAll Time

', unsafe_allow_html=True ) rs_col1, rs_col2 = st.columns([1, 1]) with rs_col1: rs_window = st.slider("Time window (sec)", 5, 60, 15, key="rs_window") with rs_col2: rs_min = st.slider("Min repeats to flag", 2, 10, 2, key="rs_min") _rs_json = json.dumps([{ "author": m.get("author",""), "text": m.get("text",""), "topic": m.get("topic","General"), "sentiment": m.get("sentiment","Neutral"), "time": m.get("time","") } for m in all_data]) repeat_spammers = detect_repeat_spammers(_rs_json, window_sec=rs_window, min_repeats=rs_min) if repeat_spammers: st.markdown( f'

' f'Found {len(repeat_spammers)} users repeating the same message ' f'≥{rs_min}× within {rs_window}s

', unsafe_allow_html=True ) for rs in repeat_spammers: _t_color = TOPIC_COLOR.get(rs["topic"], "#6b7280") _s_color = SENT_COLORS.get(rs["sentiment"], "#6b7280") _burst = rs["max_burst"] _total = rs["count"] _severity = "#ef4444" if _burst >= 5 else "#eab308" if _burst >= 3 else "#f59e0b" st.markdown( f'

' f'

⚠️ {rs["author"]}

' f'

' f'' f'🔁 {_burst}× in {rs_window}s' f'{_total} total' f'

' f'

"{rs["text"]}"

' f'

' f'{rs["sentiment"]}' f'{rs["topic"]}' f'First: {rs["first_seen"]}' f'Last: {rs["last_seen"]}' f'

', unsafe_allow_html=True ) rs_df = pd.DataFrame(repeat_spammers) csv_download(rs_df, "Download CSV", "repeat_spammers.csv") else: st.markdown( '

No repeat spammers detected in current window.

', unsafe_allow_html=True ) # ── MULTI-STREAM COMPARISON ─────────────────────────────────── active_streams = [s for s in st.session_state.streams if r.llen(s["redis_key"]) > 0] if len(active_streams) > 1: st.divider() n_streams = len(active_streams) st.markdown( f'

Multi-Stream Comparison' f'{n_streams} streams

', unsafe_allow_html=True ) # ── Load all stream data ONCE (fix double-load) ─────────── _stream_cache: dict[str, dict] = {} for _s in active_streams: _rkey = _s["redis_key"] _raw = load_stream_data(_rkey) if not _raw: continue _sdf = pd.DataFrame(_raw) _sdf["sentiment"] = _sdf["sentiment"].apply(clean_sentiment) _sdf["topic"] = _sdf["topic"].apply(clean_topic) if "topic" in _sdf.columns else "General" _sc = _sdf["sentiment"].value_counts().to_dict() _p = _sc.get("Positive", 0) _n = _sc.get("Neutral", 0) _g = _sc.get("Negative", 0) _t = max(_p + _n + _g, 1) _tc = {lbl: int((_sdf["topic"] == lbl).sum()) for lbl in TOPIC_LABELS} _top_topic = max(_tc, key=_tc.get) _eng_json = json.dumps([ {"sentiment": m.get("sentiment","Neutral"), "topic": m.get("topic","General"), "time": m.get("time","")} for m in _raw ]) _eng = compute_engagement(_eng_json) _title = _s.get("video_title") or _s.get("video_id") or _rkey _stream_cache[_rkey] = { "df": _sdf, "raw": _raw, "p": _p, "n": _n, "g": _g, "t": _t, "tc": _tc, "top_topic": _top_topic, "eng": _eng, "title": _title, "sidx": st.session_state.streams.index(_s), } # ── Head-to-head comparison table ───────────────────────── st.markdown('

', unsafe_allow_html=True) st.markdown('

Head-to-Head Summary

All active streams at a glance

', unsafe_allow_html=True) _hth_rows = [] for _s in active_streams: _rkey = _s["redis_key"] if _rkey not in _stream_cache: continue _c = _stream_cache[_rkey] _sidx = _c["sidx"] _hth_rows.append({ "Stream": f"Stream {STREAM_NAMES[_sidx]}", "Title": _c["title"][:30], "Messages": _c["t"], "Positive %": f"{_c['p']/_c['t']*100:.1f}%", "Neutral %": f"{_c['n']/_c['t']*100:.1f}%", "Negative %": f"{_c['g']/_c['t']*100:.1f}%", "Top Topic": _c["top_topic"], "Engagement": f"{_c['eng']['score']}/100 {_c['eng']['grade']}", }) if _hth_rows: st.dataframe(pd.DataFrame(_hth_rows), hide_index=True, use_container_width=True) st.markdown('

', unsafe_allow_html=True) # ── Per-stream sentiment + topic + engagement cards ──────── chunk_size = 2 _cached_keys = [_s["redis_key"] for _s in active_streams if _s["redis_key"] in _stream_cache] for row_start in range(0, len(_cached_keys), chunk_size): row_keys = _cached_keys[row_start:row_start + chunk_size] cols = st.columns(len(row_keys)) for col, _rkey in zip(cols, row_keys): _c = _stream_cache[_rkey] _sidx = _c["sidx"] color = STREAM_COLORS[_sidx] slabel = STREAM_NAMES[_sidx] _p, _n, _g, _t = _c["p"], _c["n"], _c["g"], _c["t"] _eng = _c["eng"] _tc = _c["tc"] with col: st.markdown( f'' f'Stream {slabel} · {_c["title"][:25]}', unsafe_allow_html=True ) _ec = "#22c55e" if _eng["score"] >= 70 else "#eab308" if _eng["score"] >= 40 else "#ef4444" st.markdown( f'

' f'

{_eng["score"]}

' f'

Engagement

' f'

{_t}

' f'

Messages

' f'

{_p/_t*100:.0f}%

' f'

Positive

' f'

', unsafe_allow_html=True ) st.markdown('

', unsafe_allow_html=True) st.markdown('

Sentiment

', unsafe_allow_html=True) fig_s = go.Figure(go.Bar( x=["Pos", "Neu", "Neg"], y=[_p, _n, _g], marker_color=["#22c55e", "#eab308", "#ef4444"], marker_line_width=0, text=[_p, _n, _g], textposition="outside", hovertemplate="%{x}: %{y}", )) fig_s.update_layout(**plotly_layout(180)) st.plotly_chart(fig_s, width='stretch', config={"displayModeBar": False}) st.markdown('

', unsafe_allow_html=True) st.markdown('

Topic Breakdown

', unsafe_allow_html=True) _tc_colors = ["#f59e0b","#3b82f6","#ec4899","#ef4444","#6b7280","#10b981"] fig_t = go.Figure(go.Bar( x=TOPIC_LABELS, y=[_tc[l] for l in TOPIC_LABELS], marker_color=_tc_colors, marker_line_width=0, text=[_tc[l] for l in TOPIC_LABELS], textposition="outside", hovertemplate="%{x}: %{y}", )) _tl = plotly_layout(180) _tl["xaxis"]["tickfont"] = dict(size=8) fig_t.update_layout(**_tl) st.plotly_chart(fig_t, width='stretch', config={"displayModeBar": False}) st.markdown('

', unsafe_allow_html=True) # ── Overlay: positive ratio over time (all streams) ──────── st.markdown('

', unsafe_allow_html=True) st.markdown('

Positive Ratio Over Time

Rolling positive % per stream (synced refresh)

', unsafe_allow_html=True) fig_overlay = go.Figure() for _rkey, _c in _stream_cache.items(): _sidx = _c["sidx"] color = STREAM_COLORS[_sidx] slabel = STREAM_NAMES[_sidx] _sdf = _c["df"].copy() _sdf["is_pos"] = (_sdf["sentiment"] == "Positive").astype(int) _sdf["rolling"] = _sdf["is_pos"].rolling(10, min_periods=1).mean() * 100 fig_overlay.add_trace(go.Scatter( x=list(range(len(_sdf))), y=_sdf["rolling"], mode="lines", name=f"Stream {slabel} · {_c['title'][:20]}", line=dict(color=color, width=2), hovertemplate=f"Stream {slabel} msg %{{x}}: %{{y:.1f}}%", )) layout_ov = plotly_layout(220) layout_ov["showlegend"] = True layout_ov["legend"] = dict(orientation="h", y=1.08, font=dict(size=11, color="#f1f5f9")) layout_ov["yaxis"]["range"] = [0, 100] fig_overlay.update_layout(**layout_ov) st.plotly_chart(fig_overlay, width='stretch', config={"displayModeBar": False}) st.markdown('

', unsafe_allow_html=True) elif len(st.session_state.streams) > 1: st.divider() st.info("Add video IDs to your extra stream slots and click ▶ Start to enable multi-stream comparison.") # ── AUTO REFRESH ────────────────────────────────────────────── if auto_refresh: time.sleep(refresh_rate) st.rerun()