# stats_tab.py
# -*- coding: utf-8 -*-

import pandas as pd
import streamlit as st
import numpy as np

try:
    import altair as alt
except Exception:
    alt = None


def render_stats_tab(df_all_messages: pd.DataFrame, ss):
    st.subheader("Usage & Conversation Stats")

    df_all = (df_all_messages.copy() if df_all_messages is not None else pd.DataFrame())
    if df_all.empty:
        st.info("No messages available for stats. Import from Cloud Pull or CSV first.")
        return

    # Robust UTC→JST handling
    ts_utc = pd.to_datetime(df_all["ts"], errors="coerce", utc=True)
    ts_jst = ts_utc.dt.tz_convert("Asia/Tokyo")
    df_all["ts_jst"] = ts_jst
    df_all["day"] = ts_jst.dt.strftime("%Y-%m-%d")
    df_all["hour"] = ts_jst.dt.hour
    df_all["dow"] = ts_jst.dt.dayofweek  # 0=Mon..6=Sun
    df_all["dow_name"] = df_all["dow"].map({0: "Mon", 1: "Tue", 2: "Wed", 3: "Thu", 4: "Fri", 5: "Sat", 6: "Sun"})

    # Sender label (nickname > display_name > id)
    idx_map = ss.get("user_index", {}) if ss is not None else {}

    def _label(u: str) -> str:
        rec = (idx_map.get(u, {}) or {})
        nickname = str(rec.get("nickname", "")).strip()
        display = str(rec.get("display_name", "")).strip()
        base = nickname or display or u
        suffix = u[-6:] if isinstance(u, str) and len(u) >= 6 else u
        return f"{base} ({suffix})"

    df_all["sender"] = df_all["user_id"].astype(str).map(_label)

    # Controls
    st.markdown("**Time Range & Metric**")
    colr1, colr2, colr3 = st.columns([1.2, 1, 1.2])
    with colr1:
        range_choice = st.selectbox("Range", ["Past day", "Past week", "Past month", "Past year", "All"], index=1)
    with colr2:
        metric_type = st.radio("Metric", ["Message time", "First-seen (follow) time"], index=0)
    with colr3:
        gran_override = st.selectbox(
            "Granularity",
            ["Auto", "Hourly", "Daily", "Weekly"],
            index=0,
            help="Auto picks Hourly for ≤2 days, else Daily.",
        )

    now_jst = pd.Timestamp.now(tz="Asia/Tokyo")
    if range_choice == "Past day":
        start_jst = now_jst - pd.Timedelta(days=1)
    elif range_choice == "Past week":
        start_jst = now_jst - pd.Timedelta(weeks=1)
    elif range_choice == "Past month":
        start_jst = now_jst - pd.Timedelta(days=30)
    elif range_choice == "Past year":
        start_jst = now_jst - pd.Timedelta(days=365)
    else:
        start_jst = df_all["ts_jst"].min() or (now_jst - pd.Timedelta(days=365))
    end_jst = now_jst

    dff = df_all[(df_all["ts_jst"] >= start_jst) & (df_all["ts_jst"] <= end_jst)].copy()
    if dff.empty:
        st.info("No messages in the selected window.")
        return

    st.markdown("### Overview")

    # Frequency
    if gran_override == "Hourly":
        freq = "H"
    elif gran_override == "Daily":
        freq = "D"
    elif gran_override == "Weekly":
        freq = "W"
    else:
        freq = "H" if (end_jst - start_jst) <= pd.Timedelta(days=2) else "D"

    # Main time series
    if metric_type == "Message time":
        series = dff.set_index("ts_jst").resample(freq).size()
        title_main = "Messages over time"
    else:
        first_seen = df_all.groupby("user_id")["ts_jst"].min().dropna()
        fs_win = first_seen[(first_seen >= start_jst) & (first_seen <= end_jst)]
        series = fs_win.to_frame("ts_jst").set_index("ts_jst").resample(freq).size()
        title_main = "New users over time (first seen)"

    series_df = series.rename_axis("time").reset_index(name="count")
    if not series_df.empty:
        if alt:
            st.altair_chart(
                alt.Chart(series_df).mark_line(point=True).encode(
                    x=alt.X("time:T", title="Time (JST)"),
                    y=alt.Y("count:Q", title="Count"),
                ).properties(height=240, title=title_main),
                use_container_width=True,
            )
        else:
            st.line_chart(series_df.set_index("time")["count"], height=240)

    # Hour-of-day
    by_hour = dff.groupby("hour").size().reset_index(name="count")
    if alt:
        st.altair_chart(
            alt.Chart(by_hour).mark_bar().encode(
                x=alt.X("hour:O", title="Hour (JST)"),
                y=alt.Y("count:Q", title="Messages"),
            ).properties(height=180, title="Messages by hour"),
            use_container_width=True,
        )
    else:
        st.bar_chart(by_hour.set_index("hour")["count"], height=180)

    # Weekday
    order_dow = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
    by_dow = dff.groupby("dow_name").size().reindex(order_dow).fillna(0).reset_index()
    by_dow.columns = ["weekday", "count"]
    if alt:
        st.altair_chart(
            alt.Chart(by_dow).mark_bar().encode(
                x=alt.X("weekday:N", sort=order_dow, title="Weekday"),
                y=alt.Y("count:Q", title="Messages"),
            ).properties(height=180, title="Messages by weekday"),
            use_container_width=True,
        )
    else:
        st.bar_chart(by_dow.set_index("weekday")["count"], height=180)

    # Role breakdown
    with st.expander("Role breakdown"):
        role_counts = dff.groupby("role").size().reset_index(name="count").sort_values("count", ascending=False)
        if alt:
            st.altair_chart(
                alt.Chart(role_counts).mark_bar().encode(
                    x=alt.X("role:N", title="Role"),
                    y=alt.Y("count:Q", title="Messages"),
                ).properties(height=160, title="Messages by role"),
                use_container_width=True,
            )
        else:
            st.bar_chart(role_counts.set_index("role")["count"], height=160)

    st.markdown("---")

    # Top 10 senders per-day table
    st.markdown("### Top 10 Senders (with per-day counts)")
    pivot = (
        dff.assign(day=dff["ts_jst"].dt.strftime("%Y-%m-%d"))
        .pivot_table(index="sender", columns="day", values="text", aggfunc="count", fill_value=0)
    )

    top10 = pd.DataFrame()
    if pivot.empty:
        st.info("No senders in this window.")
    else:
        pivot["__Total"] = pivot.sum(axis=1)
        top10 = pivot.sort_values("__Total", ascending=False).head(10)
        cols = ["__Total"] + [c for c in top10.columns if c != "__Total"]
        st.dataframe(top10[cols], use_container_width=True, height=260)

    st.markdown("---")

    # Per-user breakdown
    st.markdown("### Per-user Breakdown")
    users_list = sorted(dff["sender"].unique())
    if not users_list:
        st.info("No users to analyze in this window.")
        return

    pick_sender = st.selectbox("Select a sender", options=users_list, index=0, key="stats_pick_sender")
    uid_sel = dff.loc[dff["sender"] == pick_sender, "user_id"].iloc[0]
    dfu = dff[dff["user_id"] == uid_sel].copy()

    total_msgs = dfu.shape[0]
    active_days = dfu["day"].nunique()
    lengths = dfu["text"].astype(str).map(len)
    words = dfu["text"].astype(str).map(lambda s: len(s.split()))
    median_gap = 0.0
    if total_msgs > 1:
        gaps = dfu.sort_values("ts_jst")["ts_jst"].diff().dropna().dt.total_seconds() / 60.0
        if not gaps.empty:
            median_gap = float(gaps.median())

    c1, c2, c3, c4, c5 = st.columns(5)
    c1.metric("Messages", f"{total_msgs}")
    c2.metric("Active days", f"{active_days}")
    c3.metric("Avg length (chars)", f"{float(lengths.mean()):.1f}" if total_msgs else "0.0")
    c4.metric("Avg words", f"{float(words.mean()):.1f}" if total_msgs else "0.0")
    c5.metric("Median gap (min)", f"{median_gap:.1f}")

    # Timeline
    freq_u = "H" if (end_jst - start_jst) <= pd.Timedelta(days=2) else "D"
    ser_u = dfu.set_index("ts_jst").resample(freq_u).size()
    ser_u_df = ser_u.rename_axis("ts_jst").reset_index(name="count")
    if not ser_u_df.empty:
        if alt:
            st.altair_chart(
                alt.Chart(ser_u_df).mark_line(point=True).encode(
                    x=alt.X("ts_jst:T", title="Time (JST)"),
                    y=alt.Y("count:Q", title="Messages"),
                ).properties(height=220, title=f"Messages over time — {pick_sender}"),
                use_container_width=True,
            )
        else:
            st.line_chart(ser_u_df.set_index("ts_jst")["count"], height=220)

    # Length histogram
    if not dfu.empty:
        if alt:
            hist = pd.DataFrame({"length": lengths})
            st.altair_chart(
                alt.Chart(hist).mark_bar().encode(
                    x=alt.X("length:Q", bin=alt.Bin(maxbins=30), title="Message length (chars)"),
                    y=alt.Y("count():Q", title="Messages"),
                ).properties(height=180, title="Message length distribution"),
                use_container_width=True,
            )
        else:
            st.bar_chart(lengths.value_counts().sort_index(), height=180)

    # Heatmap (weekday × hour)
    if alt and not dfu.empty:
        dfu_heat = dfu.groupby(["dow_name", "hour"]).size().reset_index(name="count")
        st.altair_chart(
            alt.Chart(dfu_heat).mark_rect().encode(
                x=alt.X("hour:O", title="Hour (JST)"),
                y=alt.Y("dow_name:O", sort=["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"], title="Weekday"),
                color=alt.Color("count:Q", title="Msgs", scale=alt.Scale(scheme="bluegreen")),
            ).properties(height=180, title="Activity heatmap"),
            use_container_width=True,
        )

    st.markdown("---")

    # Extra insights
    st.markdown("### Extra Insights")
    peak_hour = int(dff["hour"].mode().iloc[0]) if not dff["hour"].isna().all() else 0
    peak_dow = dff["dow_name"].mode().iloc[0] if not dff["dow_name"].isna().all() else "N/A"
    peak_hour_u = int(dfu["hour"].mode().iloc[0]) if not dfu["hour"].isna().all() else 0
    peak_dow_u = dfu["dow_name"].mode().iloc[0] if not dfu["dow_name"].isna().all() else "N/A"

    e1, e2, e3, e4 = st.columns(4)
    e1.metric("Global peak hour", f"{peak_hour}:00")
    e2.metric("Global peak weekday", peak_dow)
    e3.metric("User peak hour", f"{peak_hour_u}:00")
    e4.metric("User peak weekday", peak_dow_u)

    # Rolling 7-day sum
    ser_daily = dff.set_index("ts_jst").resample("D").size()
    ser_daily_df = ser_daily.rename_axis("ts_jst").reset_index(name="count")
    if not ser_daily_df.empty:
        ser_daily_df["rolling_7d"] = ser_daily_df["count"].rolling(7, min_periods=1).sum()
        if alt:
            bars = alt.Chart(ser_daily_df).mark_bar().encode(
                x=alt.X("ts_jst:T", title="Date (JST)"),
                y=alt.Y("count:Q", title="Daily messages"),
                tooltip=["ts_jst:T", "count:Q", "rolling_7d:Q"],
            ).properties(height=200, title="Daily messages & rolling 7-day sum")
            line = alt.Chart(ser_daily_df).mark_line(strokeDash=[4, 2]).encode(
                x="ts_jst:T",
                y=alt.Y("rolling_7d:Q", title="Rolling 7-day sum"),
            )
            st.altair_chart(bars + line, use_container_width=True)
        else:
            st.line_chart(ser_daily_df.set_index("ts_jst")[["count", "rolling_7d"]], height=200)

    # Exports
    st.markdown("#### Export")
    if isinstance(top10, pd.DataFrame) and not top10.empty:
        csv_sum = top10.reset_index().rename(columns={"sender": "User"})
        st.download_button(
            "⬇️ Download Top10 table (CSV)",
            data=csv_sum.to_csv(index=False),
            file_name="top10_senders.csv",
            mime="text/csv",
        )
    st.download_button(
        "⬇️ Download filtered messages (CSV)",
        data=dff.to_csv(index=False),
        file_name="messages_filtered.csv",
        mime="text/csv",
    )