File size: 3,411 Bytes
08c9602
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import time
import pandas as pd
import plotly.express as px
import streamlit as st

# Map the section keys (space‐separated) to the DataFrame column to group by.
SECTION_CONFIG = {
    "flex bucket": {
        "group_col": "BUCKET",        
        "drop_percent": 0.10
    },
    "bidder": {
        "group_col": "HB_BIDDER",     
        "drop_percent": 0.10
    },
    "device": {
        "group_col": "DEVICE",        
        "drop_percent": 0.10
    },
    "ad unit": {
        "group_col": "AD_UNIT_GROUP", 
        "drop_percent": 0.10
    },
    "refresh": {
        "group_col": "REFRESH",       
        "drop_percent": 0.10
    },
}

def update_section_generic_drop(key, df, start_times, container, drop_time):
    """
    A generic 5‑minute breakdown with drop detection.
    'key' can be 'flex_bucket' or 'flex bucket' (we normalize it).
    """
    elapsed = time.time() - start_times[key]
    mins, secs = divmod(elapsed, 60)

    # Standardize column names & build timestamp
    df.columns = [c.upper() for c in df.columns]
    df = df.sort_values(["EST_HOUR", "EST_MINUTE"])
    df["timestamp"] = pd.to_datetime(
        df["EST_DATE"].astype(str) + " " +
        df["EST_HOUR"].astype(str).str.zfill(2) + ":" +
        df["EST_MINUTE"].astype(str).str.zfill(2)
    )
    df["5MIN"] = df["timestamp"].dt.floor("5T")

    # Normalize the lookup key to match SECTION_CONFIG
    lookup = key.replace("_", " ").lower()
    config = SECTION_CONFIG.get(lookup)
    if not config:
        st.error(f"No configuration for section '{key}'.")
        return

    group_col   = config["group_col"]
    drop_pct    = config["drop_percent"]

    with container:
        st.subheader(f"{lookup.title()} Data")
        st.info(f"Query completed in {int(mins)}m {secs:.2f}s")

        # Filter to TODAY (uppercase)
        today_data = df[df["TIMEFRAME"].str.upper() == "TODAY"]
        if today_data.empty:
            st.info("No TODAY data for this section.")
            return

        # Aggregate over 5‑min intervals & plot
        agg_today = (
            today_data
            .groupby(["5MIN", group_col], as_index=False)["CNT"]
            .sum()
        )
        title = f"{lookup.title()} Impressions by Time of Day (5‑min)"
        fig = px.line(
            agg_today,
            x="5MIN",
            y="CNT",
            color=group_col,
            title=title,
            labels={"5MIN": "Time", "CNT": "Impressions", group_col: lookup.title()}
        )
        fig.update_xaxes(tickformat="%I:%M %p")
        st.plotly_chart(fig, use_container_width=True)

        # Drop detection at the flagged interval
        drop_subset = agg_today[agg_today["5MIN"] == drop_time]
        flagged = []
        if not drop_subset.empty:
            avg_cnt = drop_subset["CNT"].mean()
            for grp, cnt in drop_subset.groupby(group_col)["CNT"].sum().items():
                if cnt <= (1 - drop_pct) * avg_cnt:
                    flagged.append(grp)

        drop_str = drop_time.strftime("%I:%M %p") if drop_time else "N/A"
        if flagged:
            st.warning(f"{lookup.title()}: At {drop_str}, these groups dropped: {', '.join(flagged)}.")
        else:
            st.info(f"{lookup.title()}: No significant drop at {drop_str}.")

        # Optional raw TODAY data
        with st.expander(f"Show Raw TODAY {lookup.title()} Data"):
            st.dataframe(today_data)