| |
| |
|
|
| import pandas as pd |
| import streamlit as st |
| import numpy as np |
|
|
| try: |
| import altair as alt |
| except Exception: |
| alt = None |
|
|
|
|
| def render_stats_tab(df_all_messages: pd.DataFrame, ss): |
| st.subheader("Usage & Conversation Stats") |
|
|
| df_all = (df_all_messages.copy() if df_all_messages is not None else pd.DataFrame()) |
| if df_all.empty: |
| st.info("No messages available for stats. Import from Cloud Pull or CSV first.") |
| return |
|
|
| |
| ts_utc = pd.to_datetime(df_all["ts"], errors="coerce", utc=True) |
| ts_jst = ts_utc.dt.tz_convert("Asia/Tokyo") |
| df_all["ts_jst"] = ts_jst |
| df_all["day"] = ts_jst.dt.strftime("%Y-%m-%d") |
| df_all["hour"] = ts_jst.dt.hour |
| df_all["dow"] = ts_jst.dt.dayofweek |
| df_all["dow_name"] = df_all["dow"].map({0: "Mon", 1: "Tue", 2: "Wed", 3: "Thu", 4: "Fri", 5: "Sat", 6: "Sun"}) |
|
|
| |
| idx_map = ss.get("user_index", {}) if ss is not None else {} |
|
|
| def _label(u: str) -> str: |
| rec = (idx_map.get(u, {}) or {}) |
| nickname = str(rec.get("nickname", "")).strip() |
| display = str(rec.get("display_name", "")).strip() |
| base = nickname or display or u |
| suffix = u[-6:] if isinstance(u, str) and len(u) >= 6 else u |
| return f"{base} ({suffix})" |
|
|
| df_all["sender"] = df_all["user_id"].astype(str).map(_label) |
|
|
| |
| st.markdown("**Time Range & Metric**") |
| colr1, colr2, colr3 = st.columns([1.2, 1, 1.2]) |
| with colr1: |
| range_choice = st.selectbox("Range", ["Past day", "Past week", "Past month", "Past year", "All"], index=1) |
| with colr2: |
| metric_type = st.radio("Metric", ["Message time", "First-seen (follow) time"], index=0) |
| with colr3: |
| gran_override = st.selectbox( |
| "Granularity", |
| ["Auto", "Hourly", "Daily", "Weekly"], |
| index=0, |
| help="Auto picks Hourly for ≤2 days, else Daily.", |
| ) |
|
|
| now_jst = pd.Timestamp.now(tz="Asia/Tokyo") |
| if range_choice == "Past day": |
| start_jst = now_jst - pd.Timedelta(days=1) |
| elif range_choice == "Past week": |
| start_jst = now_jst - pd.Timedelta(weeks=1) |
| elif range_choice == "Past month": |
| start_jst = now_jst - pd.Timedelta(days=30) |
| elif range_choice == "Past year": |
| start_jst = now_jst - pd.Timedelta(days=365) |
| else: |
| start_jst = df_all["ts_jst"].min() or (now_jst - pd.Timedelta(days=365)) |
| end_jst = now_jst |
|
|
| dff = df_all[(df_all["ts_jst"] >= start_jst) & (df_all["ts_jst"] <= end_jst)].copy() |
| if dff.empty: |
| st.info("No messages in the selected window.") |
| return |
|
|
| st.markdown("### Overview") |
|
|
| |
| if gran_override == "Hourly": |
| freq = "H" |
| elif gran_override == "Daily": |
| freq = "D" |
| elif gran_override == "Weekly": |
| freq = "W" |
| else: |
| freq = "H" if (end_jst - start_jst) <= pd.Timedelta(days=2) else "D" |
|
|
| |
| if metric_type == "Message time": |
| series = dff.set_index("ts_jst").resample(freq).size() |
| title_main = "Messages over time" |
| else: |
| first_seen = df_all.groupby("user_id")["ts_jst"].min().dropna() |
| fs_win = first_seen[(first_seen >= start_jst) & (first_seen <= end_jst)] |
| series = fs_win.to_frame("ts_jst").set_index("ts_jst").resample(freq).size() |
| title_main = "New users over time (first seen)" |
|
|
| series_df = series.rename_axis("time").reset_index(name="count") |
| if not series_df.empty: |
| if alt: |
| st.altair_chart( |
| alt.Chart(series_df).mark_line(point=True).encode( |
| x=alt.X("time:T", title="Time (JST)"), |
| y=alt.Y("count:Q", title="Count"), |
| ).properties(height=240, title=title_main), |
| use_container_width=True, |
| ) |
| else: |
| st.line_chart(series_df.set_index("time")["count"], height=240) |
|
|
| |
| by_hour = dff.groupby("hour").size().reset_index(name="count") |
| if alt: |
| st.altair_chart( |
| alt.Chart(by_hour).mark_bar().encode( |
| x=alt.X("hour:O", title="Hour (JST)"), |
| y=alt.Y("count:Q", title="Messages"), |
| ).properties(height=180, title="Messages by hour"), |
| use_container_width=True, |
| ) |
| else: |
| st.bar_chart(by_hour.set_index("hour")["count"], height=180) |
|
|
| |
| order_dow = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] |
| by_dow = dff.groupby("dow_name").size().reindex(order_dow).fillna(0).reset_index() |
| by_dow.columns = ["weekday", "count"] |
| if alt: |
| st.altair_chart( |
| alt.Chart(by_dow).mark_bar().encode( |
| x=alt.X("weekday:N", sort=order_dow, title="Weekday"), |
| y=alt.Y("count:Q", title="Messages"), |
| ).properties(height=180, title="Messages by weekday"), |
| use_container_width=True, |
| ) |
| else: |
| st.bar_chart(by_dow.set_index("weekday")["count"], height=180) |
|
|
| |
| with st.expander("Role breakdown"): |
| role_counts = dff.groupby("role").size().reset_index(name="count").sort_values("count", ascending=False) |
| if alt: |
| st.altair_chart( |
| alt.Chart(role_counts).mark_bar().encode( |
| x=alt.X("role:N", title="Role"), |
| y=alt.Y("count:Q", title="Messages"), |
| ).properties(height=160, title="Messages by role"), |
| use_container_width=True, |
| ) |
| else: |
| st.bar_chart(role_counts.set_index("role")["count"], height=160) |
|
|
| st.markdown("---") |
|
|
| |
| st.markdown("### Top 10 Senders (with per-day counts)") |
| pivot = ( |
| dff.assign(day=dff["ts_jst"].dt.strftime("%Y-%m-%d")) |
| .pivot_table(index="sender", columns="day", values="text", aggfunc="count", fill_value=0) |
| ) |
|
|
| top10 = pd.DataFrame() |
| if pivot.empty: |
| st.info("No senders in this window.") |
| else: |
| pivot["__Total"] = pivot.sum(axis=1) |
| top10 = pivot.sort_values("__Total", ascending=False).head(10) |
| cols = ["__Total"] + [c for c in top10.columns if c != "__Total"] |
| st.dataframe(top10[cols], use_container_width=True, height=260) |
|
|
| st.markdown("---") |
|
|
| |
| st.markdown("### Per-user Breakdown") |
| users_list = sorted(dff["sender"].unique()) |
| if not users_list: |
| st.info("No users to analyze in this window.") |
| return |
|
|
| pick_sender = st.selectbox("Select a sender", options=users_list, index=0, key="stats_pick_sender") |
| uid_sel = dff.loc[dff["sender"] == pick_sender, "user_id"].iloc[0] |
| dfu = dff[dff["user_id"] == uid_sel].copy() |
|
|
| total_msgs = dfu.shape[0] |
| active_days = dfu["day"].nunique() |
| lengths = dfu["text"].astype(str).map(len) |
| words = dfu["text"].astype(str).map(lambda s: len(s.split())) |
| median_gap = 0.0 |
| if total_msgs > 1: |
| gaps = dfu.sort_values("ts_jst")["ts_jst"].diff().dropna().dt.total_seconds() / 60.0 |
| if not gaps.empty: |
| median_gap = float(gaps.median()) |
|
|
| c1, c2, c3, c4, c5 = st.columns(5) |
| c1.metric("Messages", f"{total_msgs}") |
| c2.metric("Active days", f"{active_days}") |
| c3.metric("Avg length (chars)", f"{float(lengths.mean()):.1f}" if total_msgs else "0.0") |
| c4.metric("Avg words", f"{float(words.mean()):.1f}" if total_msgs else "0.0") |
| c5.metric("Median gap (min)", f"{median_gap:.1f}") |
|
|
| |
| freq_u = "H" if (end_jst - start_jst) <= pd.Timedelta(days=2) else "D" |
| ser_u = dfu.set_index("ts_jst").resample(freq_u).size() |
| ser_u_df = ser_u.rename_axis("ts_jst").reset_index(name="count") |
| if not ser_u_df.empty: |
| if alt: |
| st.altair_chart( |
| alt.Chart(ser_u_df).mark_line(point=True).encode( |
| x=alt.X("ts_jst:T", title="Time (JST)"), |
| y=alt.Y("count:Q", title="Messages"), |
| ).properties(height=220, title=f"Messages over time — {pick_sender}"), |
| use_container_width=True, |
| ) |
| else: |
| st.line_chart(ser_u_df.set_index("ts_jst")["count"], height=220) |
|
|
| |
| if not dfu.empty: |
| if alt: |
| hist = pd.DataFrame({"length": lengths}) |
| st.altair_chart( |
| alt.Chart(hist).mark_bar().encode( |
| x=alt.X("length:Q", bin=alt.Bin(maxbins=30), title="Message length (chars)"), |
| y=alt.Y("count():Q", title="Messages"), |
| ).properties(height=180, title="Message length distribution"), |
| use_container_width=True, |
| ) |
| else: |
| st.bar_chart(lengths.value_counts().sort_index(), height=180) |
|
|
| |
| if alt and not dfu.empty: |
| dfu_heat = dfu.groupby(["dow_name", "hour"]).size().reset_index(name="count") |
| st.altair_chart( |
| alt.Chart(dfu_heat).mark_rect().encode( |
| x=alt.X("hour:O", title="Hour (JST)"), |
| y=alt.Y("dow_name:O", sort=["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"], title="Weekday"), |
| color=alt.Color("count:Q", title="Msgs", scale=alt.Scale(scheme="bluegreen")), |
| ).properties(height=180, title="Activity heatmap"), |
| use_container_width=True, |
| ) |
|
|
| st.markdown("---") |
|
|
| |
| st.markdown("### Extra Insights") |
| peak_hour = int(dff["hour"].mode().iloc[0]) if not dff["hour"].isna().all() else 0 |
| peak_dow = dff["dow_name"].mode().iloc[0] if not dff["dow_name"].isna().all() else "N/A" |
| peak_hour_u = int(dfu["hour"].mode().iloc[0]) if not dfu["hour"].isna().all() else 0 |
| peak_dow_u = dfu["dow_name"].mode().iloc[0] if not dfu["dow_name"].isna().all() else "N/A" |
|
|
| e1, e2, e3, e4 = st.columns(4) |
| e1.metric("Global peak hour", f"{peak_hour}:00") |
| e2.metric("Global peak weekday", peak_dow) |
| e3.metric("User peak hour", f"{peak_hour_u}:00") |
| e4.metric("User peak weekday", peak_dow_u) |
|
|
| |
| ser_daily = dff.set_index("ts_jst").resample("D").size() |
| ser_daily_df = ser_daily.rename_axis("ts_jst").reset_index(name="count") |
| if not ser_daily_df.empty: |
| ser_daily_df["rolling_7d"] = ser_daily_df["count"].rolling(7, min_periods=1).sum() |
| if alt: |
| bars = alt.Chart(ser_daily_df).mark_bar().encode( |
| x=alt.X("ts_jst:T", title="Date (JST)"), |
| y=alt.Y("count:Q", title="Daily messages"), |
| tooltip=["ts_jst:T", "count:Q", "rolling_7d:Q"], |
| ).properties(height=200, title="Daily messages & rolling 7-day sum") |
| line = alt.Chart(ser_daily_df).mark_line(strokeDash=[4, 2]).encode( |
| x="ts_jst:T", |
| y=alt.Y("rolling_7d:Q", title="Rolling 7-day sum"), |
| ) |
| st.altair_chart(bars + line, use_container_width=True) |
| else: |
| st.line_chart(ser_daily_df.set_index("ts_jst")[["count", "rolling_7d"]], height=200) |
|
|
| |
| st.markdown("#### Export") |
| if isinstance(top10, pd.DataFrame) and not top10.empty: |
| csv_sum = top10.reset_index().rename(columns={"sender": "User"}) |
| st.download_button( |
| "⬇️ Download Top10 table (CSV)", |
| data=csv_sum.to_csv(index=False), |
| file_name="top10_senders.csv", |
| mime="text/csv", |
| ) |
| st.download_button( |
| "⬇️ Download filtered messages (CSV)", |
| data=dff.to_csv(index=False), |
| file_name="messages_filtered.csv", |
| mime="text/csv", |
| ) |
|
|