Spaces:
Sleeping
Sleeping
github-actions[bot] commited on
Commit ·
08c9602
1
Parent(s): 0fc4a33
sync: automatic content update from github
Browse files- README.md +7 -4
- app.py +347 -0
- changelog.md +5 -0
- delivery_instructions.py +35 -0
- delivery_main.py +420 -0
- delivery_queries.py +466 -0
- delivery_section_utils.py +103 -0
- .gitattributes → gitattributes +0 -0
- house_ad_instructions.py +60 -0
- house_ad_main.py +356 -0
- house_ad_queries.py +220 -0
- house_ad_section_utils.py +373 -0
- index.html +0 -19
- requirements.txt +8 -0
- style.css +0 -28
README.md
CHANGED
|
@@ -1,10 +1,13 @@
|
|
| 1 |
---
|
| 2 |
title: Red Alert Investigations
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
-
sdk:
|
|
|
|
|
|
|
| 7 |
pinned: false
|
|
|
|
| 8 |
---
|
| 9 |
|
| 10 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 1 |
---
|
| 2 |
title: Red Alert Investigations
|
| 3 |
+
emoji: 📈
|
| 4 |
+
colorFrom: gray
|
| 5 |
+
colorTo: blue
|
| 6 |
+
sdk: streamlit
|
| 7 |
+
sdk_version: 1.43.1
|
| 8 |
+
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
+
short_description: Automate Red Alert Investigations
|
| 11 |
---
|
| 12 |
|
| 13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
|
@@ -0,0 +1,347 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import os
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import pytz
|
| 5 |
+
import base64
|
| 6 |
+
import altair as alt
|
| 7 |
+
from datetime import datetime, date, time, timedelta
|
| 8 |
+
from zoneinfo import ZoneInfo
|
| 9 |
+
import snowflake.connector
|
| 10 |
+
from cryptography.hazmat.primitives import serialization
|
| 11 |
+
from cryptography.hazmat.backends import default_backend
|
| 12 |
+
|
| 13 |
+
# --- Secrets and Key Handling ---
|
| 14 |
+
private_key_pem = os.getenv("SNOWFLAKE_PRIVATE_KEY").replace('\\n', "\n").encode()
|
| 15 |
+
private_key_obj = serialization.load_pem_private_key(
|
| 16 |
+
private_key_pem,
|
| 17 |
+
password=None,
|
| 18 |
+
backend=default_backend()
|
| 19 |
+
)
|
| 20 |
+
private_key_der = private_key_obj.private_bytes(
|
| 21 |
+
encoding=serialization.Encoding.DER,
|
| 22 |
+
format=serialization.PrivateFormat.PKCS8,
|
| 23 |
+
encryption_algorithm=serialization.NoEncryption()
|
| 24 |
+
)
|
| 25 |
+
private_key_b64 = base64.b64encode(private_key_der).decode('utf-8')
|
| 26 |
+
|
| 27 |
+
# Connection params
|
| 28 |
+
account_identifier = os.getenv("SNOWFLAKE_ACCOUNT_IDENTIFIER")
|
| 29 |
+
user = os.getenv("SNOWFLAKE_USER")
|
| 30 |
+
warehouse = os.getenv("SNOWFLAKE_WAREHOUSE")
|
| 31 |
+
database = os.getenv("SNOWFLAKE_DATABASE")
|
| 32 |
+
schema = os.getenv("SNOWFLAKE_SCHEMA")
|
| 33 |
+
role = os.getenv("SNOWFLAKE_ROLE")
|
| 34 |
+
table = os.getenv("SNOWFLAKE_TABLE")
|
| 35 |
+
message_filter = os.getenv("SNOWFLAKE_MESSAGE_FILTER")
|
| 36 |
+
campaign_id = os.getenv("SNOWFLAKE_CAMPAIGN_ID")
|
| 37 |
+
|
| 38 |
+
# Import query builders
|
| 39 |
+
from house_ad_main import run_house_ad_spike_query
|
| 40 |
+
from delivery_main import run_drop_query
|
| 41 |
+
from delivery_queries import (
|
| 42 |
+
get_main_query as get_main_delivery_query,
|
| 43 |
+
get_main_int_sov_query,
|
| 44 |
+
get_bidder_query as get_bidder_delivery_query,
|
| 45 |
+
get_flex_bucket_query,
|
| 46 |
+
get_device_query as get_device_delivery_query,
|
| 47 |
+
get_ad_unit_query as get_ad_unit_delivery_query,
|
| 48 |
+
get_refresh_query
|
| 49 |
+
)
|
| 50 |
+
from house_ad_queries import (
|
| 51 |
+
get_main_query as get_main_house_query,
|
| 52 |
+
get_flex_query as get_flex_house_query,
|
| 53 |
+
get_bidder_query as get_bidder_house_query,
|
| 54 |
+
get_deal_query,
|
| 55 |
+
get_ad_unit_query as get_ad_unit_house_query,
|
| 56 |
+
get_browser_query,
|
| 57 |
+
get_device_query as get_device_house_query,
|
| 58 |
+
get_random_integer_query,
|
| 59 |
+
get_hb_pb_query,
|
| 60 |
+
get_hb_size_query
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
# OpenAI (if required)
|
| 64 |
+
from openai import OpenAI
|
| 65 |
+
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
| 66 |
+
|
| 67 |
+
# Session defaults
|
| 68 |
+
if "deep_values" not in st.session_state:
|
| 69 |
+
st.session_state["deep_values"] = {}
|
| 70 |
+
|
| 71 |
+
# Sidebar filters
|
| 72 |
+
st.sidebar.title("Red Alert Investigations Filters")
|
| 73 |
+
analysis_type = st.sidebar.radio(
|
| 74 |
+
"Analysis Type",
|
| 75 |
+
["House_Ads","Display_Prebid","Display_OB","Display_AdX","Display_HBT_OB","Display_TAM",
|
| 76 |
+
"Video_Prebid","Video_OB","Video_AdX","Video_TAM"]
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
if analysis_type == "House_Ads":
|
| 80 |
+
ad_format_filter = integration_filter = None
|
| 81 |
+
else:
|
| 82 |
+
ad_format_filter, integration_filter = analysis_type.split("_",1)
|
| 83 |
+
|
| 84 |
+
# Time defaults
|
| 85 |
+
now_edt = datetime.now(ZoneInfo("America/New_York"))
|
| 86 |
+
default_start = now_edt - timedelta(hours=3)
|
| 87 |
+
default_end = now_edt
|
| 88 |
+
start_date = st.sidebar.date_input("Start Date", default_start.date())
|
| 89 |
+
start_hour = st.sidebar.selectbox("Start Hour", list(range(24)), index=default_start.hour)
|
| 90 |
+
end_date = st.sidebar.date_input("End Date", default_end.date())
|
| 91 |
+
end_hour = st.sidebar.selectbox("End Hour", list(range(24)), index=default_end.hour)
|
| 92 |
+
start_dt = datetime.combine(start_date, time(start_hour))
|
| 93 |
+
end_dt = datetime.combine(end_date, time(end_hour,59,59))
|
| 94 |
+
start_str = start_dt.strftime('%Y-%m-%d %H:%M:%S')
|
| 95 |
+
end_str = end_dt.strftime('%Y-%m-%d %H:%M:%S')
|
| 96 |
+
|
| 97 |
+
st.session_state["start_date"] = start_date
|
| 98 |
+
st.session_state["end_date"] = end_date
|
| 99 |
+
st.session_state["eastern"] = pytz.timezone("America/New_York")
|
| 100 |
+
|
| 101 |
+
# Data fetch helper
|
| 102 |
+
def fetch_df(sql: str) -> pd.DataFrame:
|
| 103 |
+
conn = snowflake.connector.connect(
|
| 104 |
+
account=account_identifier,
|
| 105 |
+
user=user,
|
| 106 |
+
private_key=private_key_b64,
|
| 107 |
+
warehouse=warehouse,
|
| 108 |
+
database=database,
|
| 109 |
+
schema=schema,
|
| 110 |
+
role=role,
|
| 111 |
+
)
|
| 112 |
+
return pd.read_sql(sql, conn)
|
| 113 |
+
|
| 114 |
+
# Tabs layout
|
| 115 |
+
tab_auto, tab_deep = st.tabs(["Auto-Analysis","Deep Dive"])
|
| 116 |
+
|
| 117 |
+
# Auto-Analysis Tab
|
| 118 |
+
with tab_auto:
|
| 119 |
+
st.title("Red Alert Investigations")
|
| 120 |
+
if analysis_type == "House_Ads":
|
| 121 |
+
st.header("House Ad Analysis")
|
| 122 |
+
if st.button("Run Analysis"):
|
| 123 |
+
st.session_state["query_run"] = False
|
| 124 |
+
run_house_ad_spike_query(
|
| 125 |
+
table, start_str, end_str,
|
| 126 |
+
message_filter, campaign_id,
|
| 127 |
+
private_key_b64, user,
|
| 128 |
+
account_identifier, warehouse,
|
| 129 |
+
database, schema, role,
|
| 130 |
+
client
|
| 131 |
+
)
|
| 132 |
+
else:
|
| 133 |
+
st.header(f"{ad_format_filter} {integration_filter} Analysis")
|
| 134 |
+
if st.button("Run Analysis"):
|
| 135 |
+
st.session_state["query_run"] = False
|
| 136 |
+
run_drop_query(
|
| 137 |
+
table, start_str, end_str,
|
| 138 |
+
message_filter, campaign_id,
|
| 139 |
+
private_key_b64, user,
|
| 140 |
+
account_identifier, warehouse,
|
| 141 |
+
database, schema, role,
|
| 142 |
+
client,
|
| 143 |
+
integration_filter, ad_format_filter
|
| 144 |
+
)
|
| 145 |
+
|
| 146 |
+
with tab_deep:
|
| 147 |
+
st.header("Deep Dive")
|
| 148 |
+
|
| 149 |
+
# 1) Select dimensions
|
| 150 |
+
if analysis_type == "House_Ads":
|
| 151 |
+
all_dims = [
|
| 152 |
+
"Flex Bucket","Bidder","Deal","Ad Unit","Browser",
|
| 153 |
+
"Device","Random Integer","HB Price Buckets","HB Size"
|
| 154 |
+
]
|
| 155 |
+
else:
|
| 156 |
+
all_dims = [
|
| 157 |
+
"Integration SOV","Bidder","Flex Bucket",
|
| 158 |
+
"Device","Ad Unit Group","Refresh"
|
| 159 |
+
]
|
| 160 |
+
to_plot = st.multiselect("1. Select dimensions", all_dims, key="dims")
|
| 161 |
+
|
| 162 |
+
# 2) Fetch unique values per dimension
|
| 163 |
+
if st.button("2. Fetch Values") and to_plot:
|
| 164 |
+
vals = {}
|
| 165 |
+
for dim in to_plot:
|
| 166 |
+
if dim == "Integration SOV" and analysis_type != "House_Ads":
|
| 167 |
+
dfv = fetch_df(get_main_int_sov_query(
|
| 168 |
+
table, start_str, end_str, message_filter,
|
| 169 |
+
campaign_id, ad_format_filter
|
| 170 |
+
))
|
| 171 |
+
col = "Integration"
|
| 172 |
+
elif analysis_type == "House_Ads":
|
| 173 |
+
fn_map = {
|
| 174 |
+
"Flex Bucket": get_flex_house_query,
|
| 175 |
+
"Bidder": get_bidder_house_query,
|
| 176 |
+
"Deal": get_deal_query,
|
| 177 |
+
"Ad Unit": get_ad_unit_house_query,
|
| 178 |
+
"Browser": get_browser_query,
|
| 179 |
+
"Device": get_device_house_query,
|
| 180 |
+
"Random Integer": get_random_integer_query,
|
| 181 |
+
"HB Price Buckets":get_hb_pb_query,
|
| 182 |
+
"HB Size": get_hb_size_query,
|
| 183 |
+
}
|
| 184 |
+
dfv = fetch_df(fn_map[dim](
|
| 185 |
+
table, start_str, end_str, message_filter, campaign_id
|
| 186 |
+
))
|
| 187 |
+
col = [c for c in dfv.columns
|
| 188 |
+
if c not in ("EST_DATE","EST_HOUR","EST_MINUTE","CNT")][0]
|
| 189 |
+
else:
|
| 190 |
+
fn_map = {
|
| 191 |
+
"Bidder": get_bidder_delivery_query,
|
| 192 |
+
"Flex Bucket": get_flex_bucket_query,
|
| 193 |
+
"Device": get_device_delivery_query,
|
| 194 |
+
"Ad Unit Group": get_ad_unit_delivery_query,
|
| 195 |
+
"Refresh": get_refresh_query,
|
| 196 |
+
}
|
| 197 |
+
dfv = fetch_df(fn_map[dim](
|
| 198 |
+
table, start_str, end_str, message_filter,
|
| 199 |
+
campaign_id, integration_filter, ad_format_filter
|
| 200 |
+
))
|
| 201 |
+
col = [c for c in dfv.columns
|
| 202 |
+
if c not in ("EST_DATE","EST_HOUR","EST_MINUTE","CNT")][0]
|
| 203 |
+
vals[dim] = sorted(dfv[col].dropna().unique())
|
| 204 |
+
st.session_state["deep_values"] = vals
|
| 205 |
+
|
| 206 |
+
# 3) Select filters & run the combined query
|
| 207 |
+
if st.session_state.get("deep_values"):
|
| 208 |
+
filters = {}
|
| 209 |
+
for dim, options in st.session_state["deep_values"].items():
|
| 210 |
+
filters[dim] = st.multiselect(
|
| 211 |
+
f"Filter {dim}", options, default=options,
|
| 212 |
+
key=f"fv_{dim}"
|
| 213 |
+
)
|
| 214 |
+
|
| 215 |
+
if st.button("3. Run Deep Dive"):
|
| 216 |
+
# 3a) Build the base CTE
|
| 217 |
+
if analysis_type == "House_Ads":
|
| 218 |
+
base = get_main_house_query(
|
| 219 |
+
table, start_str, end_str, message_filter, campaign_id
|
| 220 |
+
)
|
| 221 |
+
snippet_map = {
|
| 222 |
+
"Flex Bucket": "bucket",
|
| 223 |
+
"Bidder": "body[0]:slotTargeting:hb_bidder[0]::varchar AS BIDDER",
|
| 224 |
+
"Deal": "body[0]:slotTargeting:hb_deal[0]::varchar AS HB_DEAL",
|
| 225 |
+
"Ad Unit": "split(body[0]['adUnitPath'],'/')[2]::varchar AS AD_UNIT",
|
| 226 |
+
"Browser": "CASE WHEN lower(useragent) LIKE '%edg%' THEN 'Edge' WHEN lower(useragent) LIKE '%chrome%' THEN 'Chrome' WHEN lower(useragent) LIKE '%firefox%' THEN 'Firefox' WHEN lower(useragent) LIKE '%safari%' THEN 'Safari' ELSE 'Other' END AS BROWSER",
|
| 227 |
+
"Device": "CASE WHEN useragent LIKE '%Windows%' OR useragent LIKE '%Macintosh%' THEN 'desktop' WHEN useragent LIKE '%Android%' OR useragent LIKE '%Mobi%' THEN 'phone' WHEN useragent LIKE '%iPad%' OR useragent LIKE '%Tablet%' THEN 'tablet' ELSE 'other' END AS DEVICE",
|
| 228 |
+
"Random Integer": "body[0]:siteTargeting:ri[0]::varchar AS RANDOM_INTEGER",
|
| 229 |
+
"HB Price Buckets": "body[0]:slotTargeting:hb_pb[0]::varchar AS HB_PB",
|
| 230 |
+
"HB Size": "body[0]:slotTargeting:hb_size[0]::varchar AS HB_SIZE",
|
| 231 |
+
}
|
| 232 |
+
else:
|
| 233 |
+
base = get_main_delivery_query(
|
| 234 |
+
table, start_str, end_str,
|
| 235 |
+
message_filter, campaign_id,
|
| 236 |
+
integration_filter, ad_format_filter
|
| 237 |
+
)
|
| 238 |
+
snippet_map = {
|
| 239 |
+
"Integration SOV":"INTEGRATION",
|
| 240 |
+
"Bidder": "body[0]:slotTargeting:hb_bidder[0]::varchar AS HB_BIDDER",
|
| 241 |
+
"Flex Bucket": "bucket",
|
| 242 |
+
"Device": "CASE WHEN useragent LIKE '%Windows%' OR useragent LIKE '%Macintosh%' THEN 'desktop' WHEN useragent LIKE '%Android%' OR useragent LIKE '%Mobi%' THEN 'phone' WHEN useragent LIKE '%iPad%' OR useragent LIKE '%Tablet%' THEN 'tablet' ELSE 'other' END AS DEVICE",
|
| 243 |
+
"Ad Unit Group": "CASE WHEN split(body[0]['adUnitPath'],'/')[2]::varchar LIKE '%Outstream%' THEN 'Sticky_Outstream' WHEN split(body[0]['adUnitPath'],'/')[2]::varchar LIKE '%Video%' THEN 'Video' ELSE 'Other' END AS AD_UNIT_GROUP",
|
| 244 |
+
"Refresh": "body[0]:slotTargeting:refresh[0]::varchar AS REFRESH",
|
| 245 |
+
}
|
| 246 |
+
|
| 247 |
+
# 3b) Inject all selected dimension snippets, matching both lowercase & uppercase
|
| 248 |
+
select_snippets = [snippet_map[dim] for dim in to_plot]
|
| 249 |
+
dynamic_cte = (
|
| 250 |
+
base
|
| 251 |
+
.replace(
|
| 252 |
+
"count(*) as CNT",
|
| 253 |
+
f"count(*) as CNT, {', '.join(select_snippets)}"
|
| 254 |
+
)
|
| 255 |
+
.replace(
|
| 256 |
+
"COUNT(*) AS CNT",
|
| 257 |
+
f"COUNT(*) AS CNT, {', '.join(select_snippets)}"
|
| 258 |
+
)
|
| 259 |
+
)
|
| 260 |
+
|
| 261 |
+
# 3c) Build WHERE clauses from the filters
|
| 262 |
+
where_clauses = []
|
| 263 |
+
for dim, vals in filters.items():
|
| 264 |
+
alias = snippet_map[dim].split(" AS ")[-1]
|
| 265 |
+
val_list = ", ".join(f"'{v}'" for v in vals)
|
| 266 |
+
where_clauses.append(f"{alias} IN ({val_list})")
|
| 267 |
+
|
| 268 |
+
final_sql = (
|
| 269 |
+
f"SELECT *\n"
|
| 270 |
+
f"FROM (\n{dynamic_cte}\n) sub\n"
|
| 271 |
+
f"WHERE {' AND '.join(where_clauses)}"
|
| 272 |
+
)
|
| 273 |
+
|
| 274 |
+
# 3d) Execute & display
|
| 275 |
+
df_final = fetch_df(final_sql)
|
| 276 |
+
for dim, snippet in snippet_map.items():
|
| 277 |
+
alias = snippet.split(" AS ")[-1] # e.g. "bucket", "BROWSER", etc.
|
| 278 |
+
# find the actual DataFrame column (which will be uppercase)
|
| 279 |
+
match = next((c for c in df_final.columns if c.upper() == alias.upper()), None)
|
| 280 |
+
if match:
|
| 281 |
+
df_final.rename(columns={match: dim}, inplace=True)
|
| 282 |
+
|
| 283 |
+
# Build the minute‐precision datetime index
|
| 284 |
+
df_final["EST_DATETIME"] = (
|
| 285 |
+
pd.to_datetime(df_final["EST_DATE"]) +
|
| 286 |
+
pd.to_timedelta(df_final["EST_HOUR"], unit="h") +
|
| 287 |
+
pd.to_timedelta(df_final["EST_MINUTE"], unit="m")
|
| 288 |
+
)
|
| 289 |
+
|
| 290 |
+
st.subheader("Deep Dive Results")
|
| 291 |
+
st.dataframe(df_final)
|
| 292 |
+
|
| 293 |
+
# Build the Series column off your filtered dims
|
| 294 |
+
df_final["Series"] = (
|
| 295 |
+
df_final[list(filters.keys())]
|
| 296 |
+
.astype(str)
|
| 297 |
+
.agg(":".join, axis=1)
|
| 298 |
+
)
|
| 299 |
+
|
| 300 |
+
# Pivot on EST_DATETIME instead of EST_DATE
|
| 301 |
+
pivot = (
|
| 302 |
+
df_final
|
| 303 |
+
.pivot_table(
|
| 304 |
+
index="EST_DATETIME", # ← minute‐level axis
|
| 305 |
+
columns="Series",
|
| 306 |
+
values="CNT",
|
| 307 |
+
aggfunc="sum"
|
| 308 |
+
)
|
| 309 |
+
.fillna(0)
|
| 310 |
+
.sort_index()
|
| 311 |
+
)
|
| 312 |
+
|
| 313 |
+
pivot.columns = [col.replace(":", "_") for col in pivot.columns]
|
| 314 |
+
|
| 315 |
+
pivot_df = (
|
| 316 |
+
pivot
|
| 317 |
+
.reset_index()
|
| 318 |
+
.melt(id_vars="EST_DATETIME", var_name="Series", value_name="CNT")
|
| 319 |
+
)
|
| 320 |
+
|
| 321 |
+
# Build an Altair line chart:
|
| 322 |
+
chart = (
|
| 323 |
+
alt.Chart(pivot_df)
|
| 324 |
+
.mark_line(point=True)
|
| 325 |
+
.encode(
|
| 326 |
+
x=alt.X(
|
| 327 |
+
"EST_DATETIME:T",
|
| 328 |
+
axis=alt.Axis(
|
| 329 |
+
title="Time (NY)",
|
| 330 |
+
format="%H:%M", # show hour:minute on the axis
|
| 331 |
+
tickCount="hour" # one tick per hour
|
| 332 |
+
)
|
| 333 |
+
),
|
| 334 |
+
y=alt.Y("CNT:Q", title="Count"),
|
| 335 |
+
color=alt.Color("Series:N", title="Dimension"),
|
| 336 |
+
tooltip=[
|
| 337 |
+
alt.Tooltip("EST_DATETIME:T", title="Timestamp", format="%Y-%m-%d %H:%M"),
|
| 338 |
+
alt.Tooltip("Series:N", title="Series"),
|
| 339 |
+
alt.Tooltip("CNT:Q", title="Count"),
|
| 340 |
+
]
|
| 341 |
+
)
|
| 342 |
+
.properties(width=700, height=400)
|
| 343 |
+
.interactive() # allow pan/zoom
|
| 344 |
+
)
|
| 345 |
+
|
| 346 |
+
st.subheader("Deep Dive Trend")
|
| 347 |
+
st.altair_chart(chart, use_container_width=True)
|
changelog.md
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Changelog
|
| 2 |
+
|
| 3 |
+
- 2025-08-07 19:58 UTC: Cast LineItem IDs to VARCHAR in delivery queries to avoid numeric conversion errors.
|
| 4 |
+
- 2025-08-07 17:28 UTC: Quote table identifiers in queries to support hyphenated table names.
|
| 5 |
+
- 2025-08-07 14:28 UTC: Initialized changelog to track project updates.
|
delivery_instructions.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# instructions.py
|
| 2 |
+
|
| 3 |
+
NEXT_STEPS_INSTRUCTIONS = """
|
| 4 |
+
Flex Bucket:
|
| 5 |
+
If a single flex bucket is flagged as having a delivery drop, that bucket is the most likely source of the issue.
|
| 6 |
+
Check whether there was a recent deployment impacting that bucket—refer to the deployment time and bucket name in the flex_section message.
|
| 7 |
+
Send the flagged flex bucket details, along with deployment context, to the Ad Code team for investigation.
|
| 8 |
+
Include a hyperlink to the related Jira ticket.
|
| 9 |
+
If multiple buckets are flagged, the issue may be shared among them. If most or all are impacted, flex buckets may not be the root cause.
|
| 10 |
+
|
| 11 |
+
Bidder:
|
| 12 |
+
If a single hb_bidder is flagged as having a delivery drop, it is likely the source of the issue.
|
| 13 |
+
Check for any recent changes in GAM related to this bidder—this includes targeting changes, blocking rules, or budget issues.
|
| 14 |
+
Send the flagged bidder information to the Rev Ops team for deeper investigation.
|
| 15 |
+
The Ad Ops and Ad Code teams should also verify if there were any recent changes in GAM setup or ad code logic affecting bidder behavior.
|
| 16 |
+
If most or all bidders are flagged, it’s likely the drop is not specific to a single bidder.
|
| 17 |
+
|
| 18 |
+
Device:
|
| 19 |
+
If a single device type is flagged (e.g., desktop, phone, tablet), the issue is likely related to that device category.
|
| 20 |
+
Investigate whether there were recent front-end or ad code changes that could be suppressing impressions on that device type.
|
| 21 |
+
The Ad Code team should verify targeting and rendering conditions. The Ad Ops team should check for any targeting changes in GAM.
|
| 22 |
+
If multiple or all device types are flagged, the issue may lie upstream, not within device-specific rendering or targeting logic.
|
| 23 |
+
|
| 24 |
+
Ad Unit:
|
| 25 |
+
If a single ad unit group (e.g., Sidebar, Content, Footer) is flagged, investigate whether recent changes affected the structure or availability of that unit.
|
| 26 |
+
Escalate the findings to the Ad Code team. The Ad Ops team should check for any targeting changes in GAM.
|
| 27 |
+
If most or all ad unit groups are flagged, the issue is less likely to be specific to a single ad unit and may be campaign- or integration-related.
|
| 28 |
+
|
| 29 |
+
Refresh:
|
| 30 |
+
If a single refresh value (e.g., 1, 2, 3...) is flagged, it may indicate a technical issue affecting impressions during specific refresh cycles.
|
| 31 |
+
Investigate whether recent ad code changes modified refresh logic or behavior.
|
| 32 |
+
Coordinate with the Ad Code team to confirm if affected refresh values correspond with known logic updates.
|
| 33 |
+
The Ad Ops team should check for any targeting changes in GAM.
|
| 34 |
+
If most or all refresh values are flagged, the issue likely lies outside of refresh logic, possibly within broader rendering or integration pipelines.
|
| 35 |
+
"""
|
delivery_main.py
ADDED
|
@@ -0,0 +1,420 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import time
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import plotly.express as px
|
| 5 |
+
import snowflake.connector
|
| 6 |
+
import base64
|
| 7 |
+
from datetime import timedelta, datetime
|
| 8 |
+
from cryptography.hazmat.primitives import serialization
|
| 9 |
+
from cryptography.hazmat.backends import default_backend
|
| 10 |
+
import concurrent.futures
|
| 11 |
+
|
| 12 |
+
# Import SQL query functions
|
| 13 |
+
from delivery_queries import (
|
| 14 |
+
get_main_query,
|
| 15 |
+
get_main_int_sov_query,
|
| 16 |
+
get_bidder_query,
|
| 17 |
+
get_flex_bucket_query,
|
| 18 |
+
get_device_query,
|
| 19 |
+
get_ad_unit_query,
|
| 20 |
+
get_refresh_query,
|
| 21 |
+
)
|
| 22 |
+
from delivery_section_utils import update_section_generic_drop
|
| 23 |
+
|
| 24 |
+
# Import the NEXT_STEPS_INSTRUCTIONS for delivery drops
|
| 25 |
+
from delivery_instructions import NEXT_STEPS_INSTRUCTIONS
|
| 26 |
+
|
| 27 |
+
# Initialize session state
|
| 28 |
+
st.session_state.setdefault("query_run", False)
|
| 29 |
+
st.session_state.setdefault("findings_messages", [])
|
| 30 |
+
st.session_state.setdefault("query_df", None)
|
| 31 |
+
st.session_state.setdefault("agg_df", None)
|
| 32 |
+
st.session_state.setdefault("top_level_drop_time", None)
|
| 33 |
+
st.session_state.setdefault("key_findings_output", None)
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
@st.cache_data(show_spinner=False)
|
| 37 |
+
def cached_run_query(
|
| 38 |
+
query,
|
| 39 |
+
private_key_b64: str,
|
| 40 |
+
user: str,
|
| 41 |
+
account_identifier: str,
|
| 42 |
+
warehouse: str,
|
| 43 |
+
database: str,
|
| 44 |
+
schema: str,
|
| 45 |
+
role: str,
|
| 46 |
+
):
|
| 47 |
+
"""Run a Snowflake query and return a DataFrame."""
|
| 48 |
+
der = base64.b64decode(private_key_b64)
|
| 49 |
+
conn = snowflake.connector.connect(
|
| 50 |
+
user=user,
|
| 51 |
+
account=account_identifier,
|
| 52 |
+
warehouse=warehouse,
|
| 53 |
+
database=database,
|
| 54 |
+
schema=schema,
|
| 55 |
+
role=role,
|
| 56 |
+
private_key=der,
|
| 57 |
+
)
|
| 58 |
+
cs = conn.cursor()
|
| 59 |
+
cs.execute("ALTER SESSION SET STATEMENT_TIMEOUT_IN_SECONDS = 1800")
|
| 60 |
+
cs.execute(query)
|
| 61 |
+
rows = cs.fetchall()
|
| 62 |
+
cols = [c[0] for c in cs.description]
|
| 63 |
+
df = pd.DataFrame(rows, columns=cols)
|
| 64 |
+
cs.close()
|
| 65 |
+
conn.close()
|
| 66 |
+
return df
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def run_drop_query(
|
| 70 |
+
table,
|
| 71 |
+
start_datetime,
|
| 72 |
+
end_datetime,
|
| 73 |
+
message_filter,
|
| 74 |
+
campaign_id,
|
| 75 |
+
private_key_str,
|
| 76 |
+
user,
|
| 77 |
+
account_identifier,
|
| 78 |
+
warehouse,
|
| 79 |
+
database,
|
| 80 |
+
schema,
|
| 81 |
+
role,
|
| 82 |
+
client,
|
| 83 |
+
integration_filter=None,
|
| 84 |
+
ad_format_filter=None,
|
| 85 |
+
):
|
| 86 |
+
"""
|
| 87 |
+
Universal drop analysis for any Integration + Ad_Format.
|
| 88 |
+
"""
|
| 89 |
+
# 1) Build SQL statements with filters
|
| 90 |
+
main_sql = get_main_query(
|
| 91 |
+
table,
|
| 92 |
+
start_datetime,
|
| 93 |
+
end_datetime,
|
| 94 |
+
message_filter,
|
| 95 |
+
campaign_id,
|
| 96 |
+
integration_filter,
|
| 97 |
+
ad_format_filter,
|
| 98 |
+
)
|
| 99 |
+
flex_sql = get_flex_bucket_query(
|
| 100 |
+
table,
|
| 101 |
+
start_datetime,
|
| 102 |
+
end_datetime,
|
| 103 |
+
message_filter,
|
| 104 |
+
campaign_id,
|
| 105 |
+
integration_filter,
|
| 106 |
+
ad_format_filter,
|
| 107 |
+
)
|
| 108 |
+
bidder_sql = get_bidder_query(
|
| 109 |
+
table,
|
| 110 |
+
start_datetime,
|
| 111 |
+
end_datetime,
|
| 112 |
+
message_filter,
|
| 113 |
+
campaign_id,
|
| 114 |
+
integration_filter,
|
| 115 |
+
ad_format_filter,
|
| 116 |
+
)
|
| 117 |
+
device_sql = get_device_query(
|
| 118 |
+
table,
|
| 119 |
+
start_datetime,
|
| 120 |
+
end_datetime,
|
| 121 |
+
message_filter,
|
| 122 |
+
campaign_id,
|
| 123 |
+
integration_filter,
|
| 124 |
+
ad_format_filter,
|
| 125 |
+
)
|
| 126 |
+
ad_unit_sql = get_ad_unit_query(
|
| 127 |
+
table,
|
| 128 |
+
start_datetime,
|
| 129 |
+
end_datetime,
|
| 130 |
+
message_filter,
|
| 131 |
+
campaign_id,
|
| 132 |
+
integration_filter,
|
| 133 |
+
ad_format_filter,
|
| 134 |
+
)
|
| 135 |
+
refresh_sql = get_refresh_query(
|
| 136 |
+
table,
|
| 137 |
+
start_datetime,
|
| 138 |
+
end_datetime,
|
| 139 |
+
message_filter,
|
| 140 |
+
campaign_id,
|
| 141 |
+
integration_filter,
|
| 142 |
+
ad_format_filter,
|
| 143 |
+
)
|
| 144 |
+
|
| 145 |
+
# 2) Run top-level query once
|
| 146 |
+
if not st.session_state["query_run"]:
|
| 147 |
+
try:
|
| 148 |
+
t0 = time.time()
|
| 149 |
+
with st.spinner("Running top-level impressions query..."):
|
| 150 |
+
df = cached_run_query(
|
| 151 |
+
main_sql,
|
| 152 |
+
private_key_str,
|
| 153 |
+
user,
|
| 154 |
+
account_identifier,
|
| 155 |
+
warehouse,
|
| 156 |
+
database,
|
| 157 |
+
schema,
|
| 158 |
+
role,
|
| 159 |
+
)
|
| 160 |
+
elapsed = time.time() - t0
|
| 161 |
+
mins, secs = divmod(elapsed, 60)
|
| 162 |
+
st.info(f"Query ran in {int(mins)}m {secs:.2f}s")
|
| 163 |
+
|
| 164 |
+
# Normalize timestamps
|
| 165 |
+
df.columns = [c.upper() for c in df.columns]
|
| 166 |
+
df = df.sort_values(["EST_HOUR", "EST_MINUTE"])
|
| 167 |
+
df["timestamp"] = pd.to_datetime(
|
| 168 |
+
df["EST_DATE"].astype(str)
|
| 169 |
+
+ " "
|
| 170 |
+
+ df["EST_HOUR"].astype(str).str.zfill(2)
|
| 171 |
+
+ ":"
|
| 172 |
+
+ df["EST_MINUTE"].astype(str).str.zfill(2)
|
| 173 |
+
)
|
| 174 |
+
df["5min"] = df["timestamp"].dt.floor("5T")
|
| 175 |
+
base_date = (
|
| 176 |
+
df[df["TIMEFRAME"] == "TODAY"]["5min"].iloc[0].normalize()
|
| 177 |
+
if not df[df["TIMEFRAME"] == "TODAY"].empty
|
| 178 |
+
else pd.Timestamp("today").normalize()
|
| 179 |
+
)
|
| 180 |
+
start_hour = int(st.session_state.get("start_hour", 23))
|
| 181 |
+
|
| 182 |
+
def norm(ts):
|
| 183 |
+
return ts + pd.Timedelta(hours=24) if ts.hour < start_hour else ts
|
| 184 |
+
|
| 185 |
+
df["normalized_time"] = (
|
| 186 |
+
base_date + (df["5min"] - df["5min"].dt.normalize())
|
| 187 |
+
).apply(norm)
|
| 188 |
+
|
| 189 |
+
# Aggregate
|
| 190 |
+
agg_df = df.groupby(["TIMEFRAME", "normalized_time"], as_index=False)[
|
| 191 |
+
"CNT"
|
| 192 |
+
].sum()
|
| 193 |
+
|
| 194 |
+
# Save to state
|
| 195 |
+
st.session_state.update(
|
| 196 |
+
query_df=df, agg_df=agg_df, query_run=True, top_level_drop_time=None
|
| 197 |
+
)
|
| 198 |
+
except Exception as e:
|
| 199 |
+
st.error(f"Main query error: {e}")
|
| 200 |
+
return
|
| 201 |
+
|
| 202 |
+
else:
|
| 203 |
+
df = st.session_state["query_df"]
|
| 204 |
+
agg_df = st.session_state["agg_df"]
|
| 205 |
+
|
| 206 |
+
# 3) Display top-level
|
| 207 |
+
st.header("Top-Level Impressions Data")
|
| 208 |
+
drop_time = None
|
| 209 |
+
for ts in sorted(agg_df["normalized_time"].unique()):
|
| 210 |
+
today_cnt = agg_df[
|
| 211 |
+
(agg_df["normalized_time"] == ts) & (agg_df["TIMEFRAME"] == "TODAY")
|
| 212 |
+
]["CNT"]
|
| 213 |
+
other_cnt = agg_df[
|
| 214 |
+
(agg_df["normalized_time"] == ts) & (agg_df["TIMEFRAME"] != "TODAY")
|
| 215 |
+
]["CNT"]
|
| 216 |
+
if (
|
| 217 |
+
not today_cnt.empty
|
| 218 |
+
and not other_cnt.empty
|
| 219 |
+
and today_cnt.values[0] <= 0.9 * other_cnt.mean()
|
| 220 |
+
):
|
| 221 |
+
drop_time = ts
|
| 222 |
+
break
|
| 223 |
+
|
| 224 |
+
if drop_time:
|
| 225 |
+
msg = f"Top-Level: Delivery drop detected at {drop_time.strftime('%I:%M %p')}."
|
| 226 |
+
st.warning(msg)
|
| 227 |
+
else:
|
| 228 |
+
msg = "Top-Level: No significant delivery drop detected."
|
| 229 |
+
st.info(msg)
|
| 230 |
+
|
| 231 |
+
# Append message once
|
| 232 |
+
findings_messages = st.session_state.setdefault("findings_messages", [])
|
| 233 |
+
if msg not in findings_messages:
|
| 234 |
+
findings_messages.append(msg)
|
| 235 |
+
st.session_state["top_level_drop_time"] = drop_time
|
| 236 |
+
|
| 237 |
+
with st.expander("Raw Data"):
|
| 238 |
+
st.dataframe(df)
|
| 239 |
+
with st.expander("Aggregated Data"):
|
| 240 |
+
st.dataframe(agg_df)
|
| 241 |
+
|
| 242 |
+
fig = px.line(
|
| 243 |
+
agg_df,
|
| 244 |
+
x="normalized_time",
|
| 245 |
+
y="CNT",
|
| 246 |
+
color="TIMEFRAME",
|
| 247 |
+
labels={"normalized_time": "Time of Day", "CNT": "Impressions"},
|
| 248 |
+
)
|
| 249 |
+
fig.update_xaxes(tickformat="%I:%M %p")
|
| 250 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 251 |
+
|
| 252 |
+
# 4) Share-of-Voice
|
| 253 |
+
st.markdown("<hr>", unsafe_allow_html=True)
|
| 254 |
+
st.header("Share of Voice Analysis")
|
| 255 |
+
sov_sql = get_main_int_sov_query(
|
| 256 |
+
table,
|
| 257 |
+
start_datetime,
|
| 258 |
+
end_datetime,
|
| 259 |
+
message_filter,
|
| 260 |
+
campaign_id,
|
| 261 |
+
ad_format_filter=ad_format_filter,
|
| 262 |
+
)
|
| 263 |
+
try:
|
| 264 |
+
with st.spinner("Running SOV query..."):
|
| 265 |
+
sov_df = cached_run_query(
|
| 266 |
+
sov_sql,
|
| 267 |
+
private_key_str,
|
| 268 |
+
user,
|
| 269 |
+
account_identifier,
|
| 270 |
+
warehouse,
|
| 271 |
+
database,
|
| 272 |
+
schema,
|
| 273 |
+
role,
|
| 274 |
+
)
|
| 275 |
+
# Normalize same as above
|
| 276 |
+
sov_df["timestamp"] = pd.to_datetime(
|
| 277 |
+
sov_df["EST_DATE"].astype(str)
|
| 278 |
+
+ " "
|
| 279 |
+
+ sov_df["EST_HOUR"].astype(str).str.zfill(2)
|
| 280 |
+
+ ":"
|
| 281 |
+
+ sov_df["EST_MINUTE"].astype(str).str.zfill(2)
|
| 282 |
+
)
|
| 283 |
+
sov_df["5min"] = sov_df["timestamp"].dt.floor("5T")
|
| 284 |
+
base = pd.Timestamp("today").normalize()
|
| 285 |
+
sov_df["normalized_time"] = (
|
| 286 |
+
base + (sov_df["5min"] - sov_df["5min"].dt.normalize())
|
| 287 |
+
).apply(lambda ts: ts + pd.Timedelta(hours=24) if ts.hour < start_hour else ts)
|
| 288 |
+
|
| 289 |
+
# Group, exclude, percent, order
|
| 290 |
+
sov_grp = sov_df.groupby(["normalized_time", "INTEGRATION"], as_index=False)[
|
| 291 |
+
"CNT"
|
| 292 |
+
].sum()
|
| 293 |
+
sov_grp = sov_grp[~sov_grp["INTEGRATION"].str.contains("Ignore|Affiliate|PG")]
|
| 294 |
+
sov_grp["share"] = sov_grp["CNT"] / sov_grp.groupby("normalized_time")[
|
| 295 |
+
"CNT"
|
| 296 |
+
].transform("sum")
|
| 297 |
+
order = (
|
| 298 |
+
sov_grp.groupby("INTEGRATION")["share"]
|
| 299 |
+
.sum()
|
| 300 |
+
.sort_values(ascending=False)
|
| 301 |
+
.index.tolist()
|
| 302 |
+
)
|
| 303 |
+
fig2 = px.line(
|
| 304 |
+
sov_grp,
|
| 305 |
+
x="normalized_time",
|
| 306 |
+
y="share",
|
| 307 |
+
color="INTEGRATION",
|
| 308 |
+
category_orders={"INTEGRATION": order},
|
| 309 |
+
labels={"share": "Share of Total Impressions"},
|
| 310 |
+
)
|
| 311 |
+
fig2.update_xaxes(tickformat="%I:%M %p")
|
| 312 |
+
fig2.update_yaxes(tickformat=".2%")
|
| 313 |
+
st.plotly_chart(fig2, use_container_width=True)
|
| 314 |
+
except Exception as e:
|
| 315 |
+
st.error(f"SOV error: {e}")
|
| 316 |
+
|
| 317 |
+
# 5) Key Findings via OpenAI <-- CUT starts here
|
| 318 |
+
st.markdown("<hr>", unsafe_allow_html=True)
|
| 319 |
+
st.header("Key Findings and Next Steps")
|
| 320 |
+
key_findings_container = st.container()
|
| 321 |
+
with key_findings_container:
|
| 322 |
+
if st.session_state.get("key_findings_output"):
|
| 323 |
+
st.markdown(
|
| 324 |
+
st.session_state.get("key_findings_output"),
|
| 325 |
+
unsafe_allow_html=True,
|
| 326 |
+
)
|
| 327 |
+
else:
|
| 328 |
+
st.info(
|
| 329 |
+
"Key findings will appear here once additional queries have finished."
|
| 330 |
+
)
|
| 331 |
+
|
| 332 |
+
def generate_key_findings_callback():
|
| 333 |
+
findings = "\n".join(st.session_state.get("findings_messages", []))
|
| 334 |
+
flex_jira_info = st.session_state.get("flex_jira_info", "")
|
| 335 |
+
jira_section = (
|
| 336 |
+
f"\nJira Ticket Information from Flex Bucket section:\n{flex_jira_info}\n"
|
| 337 |
+
if flex_jira_info
|
| 338 |
+
else ""
|
| 339 |
+
)
|
| 340 |
+
prompt = (
|
| 341 |
+
"You are a helpful analyst investigating a drop in ad delivery. "
|
| 342 |
+
"A delivery drop detection dashboard has compiled a list of findings "
|
| 343 |
+
"showing potential drops across different dimensions. Below are the detailed findings "
|
| 344 |
+
"from the dashboard, along with any flagged Jira ticket information. "
|
| 345 |
+
"The NEXT_STEPS_INSTRUCTIONS file contains recommended next steps for each section "
|
| 346 |
+
"depending on the drop(s) flagged in the dashboard:\n\n"
|
| 347 |
+
f"Findings:\n{findings}\n"
|
| 348 |
+
f"{jira_section}\n"
|
| 349 |
+
"Next Steps Instructions:\n"
|
| 350 |
+
f"{NEXT_STEPS_INSTRUCTIONS}\n\n"
|
| 351 |
+
"Using the Findings, Jira section information, and Next Steps Instructions as helpful context, "
|
| 352 |
+
"create a concise summary that identifies the likely cause/causes of any detected delivery drops "
|
| 353 |
+
"and recommends actionable next steps. The summary should be a few sentences long followed by bullet points "
|
| 354 |
+
"with the main findings and recommended next steps. Please output the summary in Markdown format with each bullet "
|
| 355 |
+
"point on a new line, and indent sub-bullets properly. Ensure that each bullet point is on its own line. "
|
| 356 |
+
"There is no need to explicitly mention the Instructions file in the summary; just use it to inform your analysis."
|
| 357 |
+
)
|
| 358 |
+
st.session_state["key_findings"] = prompt
|
| 359 |
+
try:
|
| 360 |
+
response = client.responses.create(
|
| 361 |
+
model="o3-mini",
|
| 362 |
+
instructions="You are a helpful analyst who provides insights and recommends next steps.",
|
| 363 |
+
input=prompt,
|
| 364 |
+
)
|
| 365 |
+
st.session_state["key_findings_output"] = response.output_text.strip()
|
| 366 |
+
except Exception as e:
|
| 367 |
+
st.session_state["key_findings_output"] = f"Error calling OpenAI API: {e}"
|
| 368 |
+
|
| 369 |
+
# Once additional queries complete (below), automatically generate key findings:
|
| 370 |
+
generate_key_findings_callback()
|
| 371 |
+
|
| 372 |
+
# 6) Breakdown dimensions
|
| 373 |
+
st.markdown("<hr>", unsafe_allow_html=True)
|
| 374 |
+
st.header("Specific Dimensions Data")
|
| 375 |
+
st.info("Running breakdown queries...")
|
| 376 |
+
queries = {
|
| 377 |
+
"flex_bucket": flex_sql,
|
| 378 |
+
"bidder": bidder_sql,
|
| 379 |
+
"device": device_sql,
|
| 380 |
+
"ad_unit": ad_unit_sql,
|
| 381 |
+
"refresh": refresh_sql,
|
| 382 |
+
}
|
| 383 |
+
with st.spinner("Running additional queries..."):
|
| 384 |
+
with concurrent.futures.ThreadPoolExecutor() as ex:
|
| 385 |
+
futures = {
|
| 386 |
+
k: ex.submit(
|
| 387 |
+
cached_run_query,
|
| 388 |
+
q,
|
| 389 |
+
private_key_str,
|
| 390 |
+
user,
|
| 391 |
+
account_identifier,
|
| 392 |
+
warehouse,
|
| 393 |
+
database,
|
| 394 |
+
schema,
|
| 395 |
+
role,
|
| 396 |
+
)
|
| 397 |
+
for k, q in queries.items()
|
| 398 |
+
}
|
| 399 |
+
start_ts = {k: time.time() for k in queries}
|
| 400 |
+
conts = {k: st.container() for k in queries}
|
| 401 |
+
while futures:
|
| 402 |
+
done, _ = concurrent.futures.wait(
|
| 403 |
+
futures.values(),
|
| 404 |
+
timeout=0.5,
|
| 405 |
+
return_when=concurrent.futures.FIRST_COMPLETED,
|
| 406 |
+
)
|
| 407 |
+
for fut in done:
|
| 408 |
+
key = next(k for k, v in futures.items() if v is fut)
|
| 409 |
+
df_res = fut.result()
|
| 410 |
+
update_section_generic_drop(
|
| 411 |
+
key, df_res, start_ts, conts[key], drop_time
|
| 412 |
+
)
|
| 413 |
+
del futures[key]
|
| 414 |
+
|
| 415 |
+
# Update the key findings container with the new output.
|
| 416 |
+
with key_findings_container:
|
| 417 |
+
st.markdown(
|
| 418 |
+
st.session_state.get("key_findings_output", ""),
|
| 419 |
+
unsafe_allow_html=True,
|
| 420 |
+
)
|
delivery_queries.py
ADDED
|
@@ -0,0 +1,466 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
def _quote_identifier(identifier: str) -> str:
|
| 5 |
+
"""Quote SQL identifiers that contain special characters."""
|
| 6 |
+
|
| 7 |
+
def quote_part(part: str) -> str:
|
| 8 |
+
if re.match(r"^[A-Za-z_][A-Za-z0-9_]*$", part):
|
| 9 |
+
return part
|
| 10 |
+
return f'"{part}"'
|
| 11 |
+
|
| 12 |
+
return ".".join(quote_part(p) for p in identifier.split("."))
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def get_main_query(
|
| 16 |
+
table,
|
| 17 |
+
start_datetime,
|
| 18 |
+
end_datetime,
|
| 19 |
+
message_filter,
|
| 20 |
+
campaign_id,
|
| 21 |
+
integration_filter=None,
|
| 22 |
+
ad_format_filter=None,
|
| 23 |
+
):
|
| 24 |
+
"""Returns the main impression count query filtered by integration and ad format."""
|
| 25 |
+
|
| 26 |
+
table = _quote_identifier(table)
|
| 27 |
+
|
| 28 |
+
# Build optional filters to apply after the CTE union.
|
| 29 |
+
# Filtering on the calculated aliases (Integration/Ad_Format)
|
| 30 |
+
# in the CTE `WHERE` clause would force Snowflake to interpret
|
| 31 |
+
# those names as existing columns and attempt type coercion,
|
| 32 |
+
# which triggered errors like:
|
| 33 |
+
# Numeric value 'bciq1rts' is not recognized
|
| 34 |
+
# Instead we apply the filters on the outer SELECT where the
|
| 35 |
+
# aliases are available.
|
| 36 |
+
post_union_filter = ""
|
| 37 |
+
if integration_filter:
|
| 38 |
+
post_union_filter += f" AND Integration = '{integration_filter}'"
|
| 39 |
+
if ad_format_filter:
|
| 40 |
+
post_union_filter += f" AND Ad_Format = '{ad_format_filter}'"
|
| 41 |
+
|
| 42 |
+
return f"""
|
| 43 |
+
WITH today AS (
|
| 44 |
+
SELECT
|
| 45 |
+
to_date(convert_timezone('UTC','America/New_York',datetime)) AS EST_DATE,
|
| 46 |
+
extract(hour FROM convert_timezone('UTC','America/New_York',datetime)) AS EST_HOUR,
|
| 47 |
+
extract(minute FROM convert_timezone('UTC','America/New_York',datetime)) AS EST_MINUTE,
|
| 48 |
+
CASE
|
| 49 |
+
WHEN body[0]:yieldGroupIds[0]::varchar IN ('397722') THEN 'HBT_OB'
|
| 50 |
+
WHEN body[0]:campaignId::varchar = '2204701358' THEN 'House'
|
| 51 |
+
WHEN b.name LIKE '%Prebid%' THEN 'Prebid'
|
| 52 |
+
WHEN b.name LIKE '%TAM%' OR b.name LIKE '%Amazon%' THEN 'TAM'
|
| 53 |
+
WHEN b.name LIKE '%AdX%' THEN 'AdX'
|
| 54 |
+
WHEN len(body[0]:lineItemId::varchar) > 10 THEN 'AdX'
|
| 55 |
+
WHEN c.name LIKE '%TAM%' OR c.name LIKE '%Amazon%' THEN 'TAM'
|
| 56 |
+
WHEN c.name LIKE '%AdX%' THEN 'AdX'
|
| 57 |
+
WHEN c.name LIKE '%CS%' OR c.name LIKE '%S2S%' THEN 'Prebid'
|
| 58 |
+
WHEN b.name LIKE '39_%_%' THEN 'Direct'
|
| 59 |
+
WHEN b.name LIKE '38_%_%' THEN 'Direct'
|
| 60 |
+
WHEN b.name LIKE '8_%_%_%' AND b.name LIKE '%IX%' THEN 'Prebid'
|
| 61 |
+
WHEN b.name LIKE '8_%_%_%' THEN 'Ignore - House, Test, Pub Deal'
|
| 62 |
+
WHEN b.name LIKE '7_%_%_%' THEN 'PG'
|
| 63 |
+
WHEN b.name LIKE '5_%_%_%' THEN 'PG'
|
| 64 |
+
WHEN LEFT(b.name,1) = '4'
|
| 65 |
+
AND RIGHT(LEFT(b.name,2),1) BETWEEN '0' AND '9'
|
| 66 |
+
AND RIGHT(LEFT(b.name,3),1) BETWEEN '0' AND '9'
|
| 67 |
+
AND RIGHT(LEFT(b.name,4),1) BETWEEN '0' AND '9'
|
| 68 |
+
AND RIGHT(LEFT(b.name,5),1) = '_' THEN 'Affiliate'
|
| 69 |
+
WHEN b.name LIKE '0_%_%_%' THEN 'Ignore'
|
| 70 |
+
WHEN (body[0]:campaignId IS NULL
|
| 71 |
+
AND body[0]:slotTargeting:hb_bidder[0]::varchar IS NOT NULL)
|
| 72 |
+
THEN 'Prebid'
|
| 73 |
+
WHEN body[0]:companyIds IS NOT NULL THEN 'OB'
|
| 74 |
+
WHEN c.id IS NOT NULL THEN 'Prebid'
|
| 75 |
+
ELSE 'OB'
|
| 76 |
+
END AS Integration,
|
| 77 |
+
CASE
|
| 78 |
+
WHEN split(body[0]['adUnitPath'],'/')[2]::varchar LIKE '%Outstream%' THEN 'Display'
|
| 79 |
+
WHEN split(body[0]['adUnitPath'],'/')[2]::varchar LIKE '%Video%' THEN 'Video'
|
| 80 |
+
ELSE 'Display'
|
| 81 |
+
END AS Ad_Format,
|
| 82 |
+
COUNT(*) AS CNT,
|
| 83 |
+
'Today' AS timeframe
|
| 84 |
+
FROM {table} a
|
| 85 |
+
LEFT JOIN ANALYTICS.GAM360.ORDERS b
|
| 86 |
+
ON a.body[0]:campaignId::VARCHAR = b.ID::VARCHAR
|
| 87 |
+
LEFT JOIN ANALYTICS.GAM360.LINEITEM c
|
| 88 |
+
ON c.id::VARCHAR = a.body[0]:lineItemId::VARCHAR
|
| 89 |
+
WHERE convert_timezone('UTC','America/New_York',datetime)
|
| 90 |
+
BETWEEN '{start_datetime}' AND '{end_datetime}'
|
| 91 |
+
AND message = 'SlotRenderEnded::adImpression'
|
| 92 |
+
GROUP BY ALL
|
| 93 |
+
),
|
| 94 |
+
prev1 AS (
|
| 95 |
+
-- 1 Week Ago
|
| 96 |
+
SELECT
|
| 97 |
+
to_date(convert_timezone('UTC','America/New_York',datetime)) AS EST_DATE,
|
| 98 |
+
extract(hour FROM convert_timezone('UTC','America/New_York',datetime)) AS EST_HOUR,
|
| 99 |
+
extract(minute FROM convert_timezone('UTC','America/New_York',datetime)) AS EST_MINUTE,
|
| 100 |
+
CASE
|
| 101 |
+
WHEN body[0]:yieldGroupIds[0]::varchar IN ('397722') THEN 'HBT_OB'
|
| 102 |
+
WHEN body[0]:campaignId::varchar = '2204701358' THEN 'House'
|
| 103 |
+
WHEN b.name LIKE '%Prebid%' THEN 'Prebid'
|
| 104 |
+
WHEN b.name LIKE '%TAM%' OR b.name LIKE '%Amazon%' THEN 'TAM'
|
| 105 |
+
WHEN b.name LIKE '%AdX%' THEN 'AdX'
|
| 106 |
+
WHEN len(body[0]:lineItemId::varchar) > 10 THEN 'AdX'
|
| 107 |
+
WHEN c.name LIKE '%TAM%' OR c.name LIKE '%Amazon%' THEN 'TAM'
|
| 108 |
+
WHEN c.name LIKE '%AdX%' THEN 'AdX'
|
| 109 |
+
WHEN c.name LIKE '%CS%' OR c.name LIKE '%S2S%' THEN 'Prebid'
|
| 110 |
+
WHEN b.name LIKE '39_%_%' THEN 'Direct'
|
| 111 |
+
WHEN b.name LIKE '38_%_%' THEN 'Direct'
|
| 112 |
+
WHEN b.name LIKE '8_%_%_%' AND b.name LIKE '%IX%' THEN 'Prebid'
|
| 113 |
+
WHEN b.name LIKE '8_%_%_%' THEN 'Ignore - House, Test, Pub Deal'
|
| 114 |
+
WHEN b.name LIKE '7_%_%_%' THEN 'PG'
|
| 115 |
+
WHEN b.name LIKE '5_%_%_%' THEN 'PG'
|
| 116 |
+
WHEN LEFT(b.name,1) = '4'
|
| 117 |
+
AND RIGHT(LEFT(b.name,2),1) BETWEEN '0' AND '9'
|
| 118 |
+
AND RIGHT(LEFT(b.name,3),1) BETWEEN '0' AND '9'
|
| 119 |
+
AND RIGHT(LEFT(b.name,4),1) BETWEEN '0' AND '9'
|
| 120 |
+
AND RIGHT(LEFT(b.name,5),1) = '_' THEN 'Affiliate'
|
| 121 |
+
WHEN b.name LIKE '0_%_%_%' THEN 'Ignore'
|
| 122 |
+
WHEN (body[0]:campaignId IS NULL
|
| 123 |
+
AND body[0]:slotTargeting:hb_bidder[0]::varchar IS NOT NULL)
|
| 124 |
+
THEN 'Prebid'
|
| 125 |
+
WHEN body[0]:companyIds IS NOT NULL THEN 'OB'
|
| 126 |
+
WHEN c.id IS NOT NULL THEN 'Prebid'
|
| 127 |
+
ELSE 'OB'
|
| 128 |
+
END AS Integration,
|
| 129 |
+
CASE
|
| 130 |
+
WHEN split(body[0]['adUnitPath'],'/')[2]::varchar LIKE '%Outstream%' THEN 'Display'
|
| 131 |
+
WHEN split(body[0]['adUnitPath'],'/')[2]::varchar LIKE '%Video%' THEN 'Video'
|
| 132 |
+
ELSE 'Display'
|
| 133 |
+
END AS Ad_Format,
|
| 134 |
+
COUNT(*) AS CNT,
|
| 135 |
+
'1 Week Ago' AS timeframe
|
| 136 |
+
FROM {table} a
|
| 137 |
+
LEFT JOIN ANALYTICS.GAM360.ORDERS b
|
| 138 |
+
ON a.body[0]:campaignId::VARCHAR = b.ID::VARCHAR
|
| 139 |
+
LEFT JOIN ANALYTICS.GAM360.LINEITEM c
|
| 140 |
+
ON c.id::VARCHAR = a.body[0]:lineItemId::VARCHAR
|
| 141 |
+
WHERE convert_timezone('UTC','America/New_York',datetime)
|
| 142 |
+
BETWEEN dateadd(DAY,-7,'{start_datetime}') AND dateadd(DAY,-7,'{end_datetime}')
|
| 143 |
+
AND message = 'SlotRenderEnded::adImpression'
|
| 144 |
+
GROUP BY ALL
|
| 145 |
+
),
|
| 146 |
+
prev2 AS (
|
| 147 |
+
-- 2 Weeks Ago
|
| 148 |
+
SELECT
|
| 149 |
+
to_date(convert_timezone('UTC','America/New_York',datetime)) AS EST_DATE,
|
| 150 |
+
extract(hour FROM convert_timezone('UTC','America/New_York',datetime)) AS EST_HOUR,
|
| 151 |
+
extract(minute FROM convert_timezone('UTC','America/New_York',datetime)) AS EST_MINUTE,
|
| 152 |
+
CASE
|
| 153 |
+
WHEN body[0]:yieldGroupIds[0]::varchar IN ('397722') THEN 'HBT_OB'
|
| 154 |
+
WHEN body[0]:campaignId::varchar = '2204701358' THEN 'House'
|
| 155 |
+
WHEN b.name LIKE '%Prebid%' THEN 'Prebid'
|
| 156 |
+
WHEN b.name LIKE '%TAM%' OR b.name LIKE '%Amazon%' THEN 'TAM'
|
| 157 |
+
WHEN b.name LIKE '%AdX%' THEN 'AdX'
|
| 158 |
+
WHEN len(body[0]:lineItemId::varchar) > 10 THEN 'AdX'
|
| 159 |
+
WHEN c.name LIKE '%TAM%' OR c.name LIKE '%Amazon%' THEN 'TAM'
|
| 160 |
+
WHEN c.name LIKE '%AdX%' THEN 'AdX'
|
| 161 |
+
WHEN c.name LIKE '%CS%' OR c.name LIKE '%S2S%' THEN 'Prebid'
|
| 162 |
+
WHEN b.name LIKE '39_%_%' THEN 'Direct'
|
| 163 |
+
WHEN b.name LIKE '38_%_%' THEN 'Direct'
|
| 164 |
+
WHEN b.name LIKE '8_%_%_%' AND b.name LIKE '%IX%' THEN 'Prebid'
|
| 165 |
+
WHEN b.name LIKE '8_%_%_%' THEN 'Ignore - House, Test, Pub Deal'
|
| 166 |
+
WHEN b.name LIKE '7_%_%_%' THEN 'PG'
|
| 167 |
+
WHEN b.name LIKE '5_%_%_%' THEN 'PG'
|
| 168 |
+
WHEN LEFT(b.name,1) = '4'
|
| 169 |
+
AND RIGHT(LEFT(b.name,2),1) BETWEEN '0' AND '9'
|
| 170 |
+
AND RIGHT(LEFT(b.name,3),1) BETWEEN '0' AND '9'
|
| 171 |
+
AND RIGHT(LEFT(b.name,4),1) BETWEEN '0' AND '9'
|
| 172 |
+
AND RIGHT(LEFT(b.name,5),1) = '_' THEN 'Affiliate'
|
| 173 |
+
WHEN b.name LIKE '0_%_%_%' THEN 'Ignore'
|
| 174 |
+
WHEN (body[0]:campaignId IS NULL
|
| 175 |
+
AND body[0]:slotTargeting:hb_bidder[0]::varchar IS NOT NULL)
|
| 176 |
+
THEN 'Prebid'
|
| 177 |
+
WHEN body[0]:companyIds IS NOT NULL THEN 'OB'
|
| 178 |
+
WHEN c.id IS NOT NULL THEN 'Prebid'
|
| 179 |
+
ELSE 'OB'
|
| 180 |
+
END AS Integration,
|
| 181 |
+
CASE
|
| 182 |
+
WHEN split(body[0]['adUnitPath'],'/')[2]::varchar LIKE '%Outstream%' THEN 'Display'
|
| 183 |
+
WHEN split(body[0]['adUnitPath'],'/')[2]::varchar LIKE '%Video%' THEN 'Video'
|
| 184 |
+
ELSE 'Display'
|
| 185 |
+
END AS Ad_Format,
|
| 186 |
+
COUNT(*) AS CNT,
|
| 187 |
+
'2 Weeks Ago' AS timeframe
|
| 188 |
+
FROM {table} a
|
| 189 |
+
LEFT JOIN ANALYTICS.GAM360.ORDERS b
|
| 190 |
+
ON a.body[0]:campaignId::VARCHAR = b.ID::VARCHAR
|
| 191 |
+
LEFT JOIN ANALYTICS.GAM360.LINEITEM c
|
| 192 |
+
ON c.id::VARCHAR = a.body[0]:lineItemId::VARCHAR
|
| 193 |
+
WHERE convert_timezone('UTC','America/New_York',datetime)
|
| 194 |
+
BETWEEN dateadd(DAY,-14,'{start_datetime}')
|
| 195 |
+
AND dateadd(DAY,-14,'{end_datetime}')
|
| 196 |
+
AND message = 'SlotRenderEnded::adImpression'
|
| 197 |
+
GROUP BY ALL
|
| 198 |
+
),
|
| 199 |
+
prev3 AS (
|
| 200 |
+
-- 3 Weeks Ago
|
| 201 |
+
SELECT
|
| 202 |
+
to_date(convert_timezone('UTC','America/New_York',datetime)) AS EST_DATE,
|
| 203 |
+
extract(hour FROM convert_timezone('UTC','America/New_York',datetime)) AS EST_HOUR,
|
| 204 |
+
extract(minute FROM convert_timezone('UTC','America/New_York',datetime)) AS EST_MINUTE,
|
| 205 |
+
CASE
|
| 206 |
+
WHEN body[0]:yieldGroupIds[0]::varchar IN ('397722') THEN 'HBT_OB'
|
| 207 |
+
WHEN body[0]:campaignId::varchar = '2204701358' THEN 'House'
|
| 208 |
+
WHEN b.name LIKE '%Prebid%' THEN 'Prebid'
|
| 209 |
+
WHEN b.name LIKE '%TAM%' OR b.name LIKE '%Amazon%' THEN 'TAM'
|
| 210 |
+
WHEN b.name LIKE '%AdX%' THEN 'AdX'
|
| 211 |
+
WHEN len(body[0]:lineItemId::varchar) > 10 THEN 'AdX'
|
| 212 |
+
WHEN c.name LIKE '%TAM%' OR c.name LIKE '%Amazon%' THEN 'TAM'
|
| 213 |
+
WHEN c.name LIKE '%AdX%' THEN 'AdX'
|
| 214 |
+
WHEN c.name LIKE '%CS%' OR c.name LIKE '%S2S%' THEN 'Prebid'
|
| 215 |
+
WHEN b.name LIKE '39_%_%' THEN 'Direct'
|
| 216 |
+
WHEN b.name LIKE '38_%_%' THEN 'Direct'
|
| 217 |
+
WHEN b.name LIKE '8_%_%_%' AND b.name LIKE '%IX%' THEN 'Prebid'
|
| 218 |
+
WHEN b.name LIKE '8_%_%_%' THEN 'Ignore - House, Test, Pub Deal'
|
| 219 |
+
WHEN b.name LIKE '7_%_%_%' THEN 'PG'
|
| 220 |
+
WHEN b.name LIKE '5_%_%_%' THEN 'PG'
|
| 221 |
+
WHEN LEFT(b.name,1) = '4'
|
| 222 |
+
AND RIGHT(LEFT(b.name,2),1) BETWEEN '0' AND '9'
|
| 223 |
+
AND RIGHT(LEFT(b.name,3),1) BETWEEN '0' AND '9'
|
| 224 |
+
AND RIGHT(LEFT(b.name,4),1) BETWEEN '0' AND '9'
|
| 225 |
+
AND RIGHT(LEFT(b.name,5),1) = '_' THEN 'Affiliate'
|
| 226 |
+
WHEN b.name LIKE '0_%_%_%' THEN 'Ignore'
|
| 227 |
+
WHEN (body[0]:campaignId IS NULL
|
| 228 |
+
AND body[0]:slotTargeting:hb_bidder[0]::varchar IS NOT NULL)
|
| 229 |
+
THEN 'Prebid'
|
| 230 |
+
WHEN body[0]:companyIds IS NOT NULL THEN 'OB'
|
| 231 |
+
WHEN c.id IS NOT NULL THEN 'Prebid'
|
| 232 |
+
ELSE 'OB'
|
| 233 |
+
END AS Integration,
|
| 234 |
+
CASE
|
| 235 |
+
WHEN split(body[0]['adUnitPath'],'/')[2]::varchar LIKE '%Outstream%' THEN 'Display'
|
| 236 |
+
WHEN split(body[0]['adUnitPath'],'/')[2]::varchar LIKE '%Video%' THEN 'Video'
|
| 237 |
+
ELSE 'Display'
|
| 238 |
+
END AS Ad_Format,
|
| 239 |
+
COUNT(*) AS CNT,
|
| 240 |
+
'3 Weeks Ago' AS timeframe
|
| 241 |
+
FROM {table} a
|
| 242 |
+
LEFT JOIN ANALYTICS.GAM360.ORDERS b
|
| 243 |
+
ON a.body[0]:campaignId::VARCHAR = b.ID::VARCHAR
|
| 244 |
+
LEFT JOIN ANALYTICS.GAM360.LINEITEM c
|
| 245 |
+
ON c.id::VARCHAR = a.body[0]:lineItemId::VARCHAR
|
| 246 |
+
WHERE convert_timezone('UTC','America/New_York',datetime)
|
| 247 |
+
BETWEEN dateadd(DAY,-21,'{start_datetime}')
|
| 248 |
+
AND dateadd(DAY,-21,'{end_datetime}')
|
| 249 |
+
AND message = 'SlotRenderEnded::adImpression'
|
| 250 |
+
GROUP BY ALL
|
| 251 |
+
)
|
| 252 |
+
SELECT * FROM (
|
| 253 |
+
SELECT * FROM today
|
| 254 |
+
UNION ALL SELECT * FROM prev1
|
| 255 |
+
UNION ALL SELECT * FROM prev2
|
| 256 |
+
UNION ALL SELECT * FROM prev3
|
| 257 |
+
)
|
| 258 |
+
WHERE 1=1 {post_union_filter}
|
| 259 |
+
"""
|
| 260 |
+
|
| 261 |
+
|
| 262 |
+
def get_bidder_query(
|
| 263 |
+
table,
|
| 264 |
+
start_datetime,
|
| 265 |
+
end_datetime,
|
| 266 |
+
message_filter,
|
| 267 |
+
campaign_id,
|
| 268 |
+
integration_filter=None,
|
| 269 |
+
ad_format_filter=None,
|
| 270 |
+
):
|
| 271 |
+
base = get_main_query(
|
| 272 |
+
table,
|
| 273 |
+
start_datetime,
|
| 274 |
+
end_datetime,
|
| 275 |
+
message_filter,
|
| 276 |
+
campaign_id,
|
| 277 |
+
integration_filter,
|
| 278 |
+
ad_format_filter,
|
| 279 |
+
)
|
| 280 |
+
# inject hb_bidder field
|
| 281 |
+
return base.replace(
|
| 282 |
+
"COUNT(*) AS CNT",
|
| 283 |
+
"COUNT(*) AS CNT, body[0]:slotTargeting:hb_bidder[0]::varchar AS hb_bidder",
|
| 284 |
+
)
|
| 285 |
+
|
| 286 |
+
|
| 287 |
+
def get_flex_bucket_query(
|
| 288 |
+
table,
|
| 289 |
+
start_datetime,
|
| 290 |
+
end_datetime,
|
| 291 |
+
message_filter,
|
| 292 |
+
campaign_id,
|
| 293 |
+
integration_filter=None,
|
| 294 |
+
ad_format_filter=None,
|
| 295 |
+
):
|
| 296 |
+
base = get_main_query(
|
| 297 |
+
table,
|
| 298 |
+
start_datetime,
|
| 299 |
+
end_datetime,
|
| 300 |
+
message_filter,
|
| 301 |
+
campaign_id,
|
| 302 |
+
integration_filter,
|
| 303 |
+
ad_format_filter,
|
| 304 |
+
)
|
| 305 |
+
return base.replace("COUNT(*) AS CNT", "COUNT(*) AS CNT, bucket")
|
| 306 |
+
|
| 307 |
+
|
| 308 |
+
def get_device_query(
|
| 309 |
+
table,
|
| 310 |
+
start_datetime,
|
| 311 |
+
end_datetime,
|
| 312 |
+
message_filter,
|
| 313 |
+
campaign_id,
|
| 314 |
+
integration_filter=None,
|
| 315 |
+
ad_format_filter=None,
|
| 316 |
+
):
|
| 317 |
+
base = get_main_query(
|
| 318 |
+
table,
|
| 319 |
+
start_datetime,
|
| 320 |
+
end_datetime,
|
| 321 |
+
message_filter,
|
| 322 |
+
campaign_id,
|
| 323 |
+
integration_filter,
|
| 324 |
+
ad_format_filter,
|
| 325 |
+
)
|
| 326 |
+
# inject device case
|
| 327 |
+
device_case = (
|
| 328 |
+
"CASE "
|
| 329 |
+
"WHEN useragent LIKE '%iPad%' OR useragent LIKE '%Tablet%' THEN 'tablet' "
|
| 330 |
+
"WHEN useragent LIKE '%Windows%' OR useragent LIKE '%Macintosh%' THEN 'desktop' "
|
| 331 |
+
"WHEN useragent LIKE '%Android%' OR useragent LIKE '%iPhone%' OR useragent LIKE '%Mobi%' THEN 'phone' "
|
| 332 |
+
"ELSE 'other' END AS device"
|
| 333 |
+
)
|
| 334 |
+
return base.replace("COUNT(*) AS CNT", f"COUNT(*) AS CNT, {device_case}")
|
| 335 |
+
|
| 336 |
+
|
| 337 |
+
def get_ad_unit_query(
|
| 338 |
+
table,
|
| 339 |
+
start_datetime,
|
| 340 |
+
end_datetime,
|
| 341 |
+
message_filter,
|
| 342 |
+
campaign_id,
|
| 343 |
+
integration_filter=None,
|
| 344 |
+
ad_format_filter=None,
|
| 345 |
+
):
|
| 346 |
+
base = get_main_query(
|
| 347 |
+
table,
|
| 348 |
+
start_datetime,
|
| 349 |
+
end_datetime,
|
| 350 |
+
message_filter,
|
| 351 |
+
campaign_id,
|
| 352 |
+
integration_filter,
|
| 353 |
+
ad_format_filter,
|
| 354 |
+
)
|
| 355 |
+
ad_unit_case = (
|
| 356 |
+
"CASE "
|
| 357 |
+
"WHEN body[0]:slotElementId::varchar LIKE '%Content%' THEN 'Content' "
|
| 358 |
+
"WHEN body[0]:slotElementId::varchar LIKE '%Footer%' THEN 'Footer' "
|
| 359 |
+
"WHEN body[0]:slotElementId::varchar LIKE '%Recipe%' THEN 'Recipe' "
|
| 360 |
+
"WHEN body[0]:slotElementId::varchar LIKE '%Sidebar%' THEN 'Sidebar' "
|
| 361 |
+
"WHEN body[0]:slotElementId::varchar LIKE '%Header%' THEN 'Header' "
|
| 362 |
+
"WHEN body[0]:slotElementId::varchar LIKE '%Below_Post%' THEN 'Below_Post' "
|
| 363 |
+
"WHEN body[0]:slotElementId::varchar LIKE '%Outstream%' THEN 'Sticky Outstream' "
|
| 364 |
+
"WHEN body[0]:slotElementId::varchar LIKE '%Video%' THEN 'Video' "
|
| 365 |
+
"ELSE 'Other' END AS ad_unit_group"
|
| 366 |
+
)
|
| 367 |
+
return base.replace("COUNT(*) AS CNT", f"COUNT(*) AS CNT, {ad_unit_case}")
|
| 368 |
+
|
| 369 |
+
|
| 370 |
+
def get_refresh_query(
|
| 371 |
+
table,
|
| 372 |
+
start_datetime,
|
| 373 |
+
end_datetime,
|
| 374 |
+
message_filter,
|
| 375 |
+
campaign_id,
|
| 376 |
+
integration_filter=None,
|
| 377 |
+
ad_format_filter=None,
|
| 378 |
+
):
|
| 379 |
+
base = get_main_query(
|
| 380 |
+
table,
|
| 381 |
+
start_datetime,
|
| 382 |
+
end_datetime,
|
| 383 |
+
message_filter,
|
| 384 |
+
campaign_id,
|
| 385 |
+
integration_filter,
|
| 386 |
+
ad_format_filter,
|
| 387 |
+
)
|
| 388 |
+
refresh_field = "body[0]:slotTargeting:refresh[0]::varchar AS Refresh"
|
| 389 |
+
return base.replace("COUNT(*) AS CNT", f"COUNT(*) AS CNT, {refresh_field}")
|
| 390 |
+
|
| 391 |
+
|
| 392 |
+
def get_main_int_sov_query(
|
| 393 |
+
table,
|
| 394 |
+
start_datetime,
|
| 395 |
+
end_datetime,
|
| 396 |
+
message_filter,
|
| 397 |
+
campaign_id,
|
| 398 |
+
# integration_filter no longer used for SOV
|
| 399 |
+
ad_format_filter=None,
|
| 400 |
+
):
|
| 401 |
+
"""
|
| 402 |
+
Returns the share-of-voice query filtered only by ad format.
|
| 403 |
+
"""
|
| 404 |
+
table = _quote_identifier(table)
|
| 405 |
+
|
| 406 |
+
# Only apply Ad_Format filtering after the CTE so that the alias
|
| 407 |
+
# can be referenced safely.
|
| 408 |
+
post_union_filter = ""
|
| 409 |
+
if ad_format_filter:
|
| 410 |
+
post_union_filter = f" AND Ad_Format = '{ad_format_filter}'"
|
| 411 |
+
|
| 412 |
+
return f"""
|
| 413 |
+
WITH today AS (
|
| 414 |
+
SELECT
|
| 415 |
+
to_date(convert_timezone('UTC','America/New_York',datetime)) AS EST_DATE,
|
| 416 |
+
extract(hour FROM convert_timezone('UTC','America/New_York',datetime)) AS EST_HOUR,
|
| 417 |
+
extract(minute FROM convert_timezone('UTC','America/New_York',datetime)) AS EST_MINUTE,
|
| 418 |
+
CASE
|
| 419 |
+
WHEN body[0]:yieldGroupIds[0]::varchar IN ('397722') THEN 'HBT_OB'
|
| 420 |
+
WHEN body[0]:campaignId::varchar = '2204701358' THEN 'House'
|
| 421 |
+
WHEN b.name LIKE '%Prebid%' THEN 'Prebid'
|
| 422 |
+
WHEN b.name LIKE '%TAM%' OR b.name LIKE '%Amazon%' THEN 'TAM'
|
| 423 |
+
WHEN b.name LIKE '%AdX%' THEN 'AdX'
|
| 424 |
+
WHEN len(body[0]:lineItemId::varchar) > 10 THEN 'AdX'
|
| 425 |
+
WHEN c.name LIKE '%TAM%' OR c.name LIKE '%Amazon%' THEN 'TAM'
|
| 426 |
+
WHEN c.name LIKE '%AdX%' THEN 'AdX'
|
| 427 |
+
WHEN c.name LIKE '%CS%' OR c.name LIKE '%S2S%' THEN 'Prebid'
|
| 428 |
+
WHEN b.name LIKE '39_%_%' THEN 'Direct'
|
| 429 |
+
WHEN b.name LIKE '38_%_%' THEN 'Direct'
|
| 430 |
+
WHEN b.name LIKE '8_%_%_%' AND b.name LIKE '%IX%' THEN 'Prebid'
|
| 431 |
+
WHEN b.name LIKE '8_%_%_%' THEN 'Ignore - House, Test, Pub Deal'
|
| 432 |
+
WHEN b.name LIKE '7_%_%_%' THEN 'PG'
|
| 433 |
+
WHEN b.name LIKE '5_%_%_%' THEN 'PG'
|
| 434 |
+
WHEN LEFT(b.name,1) = '4'
|
| 435 |
+
AND RIGHT(LEFT(b.name,2),1) BETWEEN '0' AND '9'
|
| 436 |
+
AND RIGHT(LEFT(b.name,3),1) BETWEEN '0' AND '9'
|
| 437 |
+
AND RIGHT(LEFT(b.name,4),1) BETWEEN '0' AND '9'
|
| 438 |
+
AND RIGHT(LEFT(b.name,5),1) = '_' THEN 'Affiliate'
|
| 439 |
+
WHEN b.name LIKE '0_%_%_%' THEN 'Ignore'
|
| 440 |
+
WHEN (body[0]:campaignId IS NULL
|
| 441 |
+
AND body[0]:slotTargeting:hb_bidder[0]::varchar IS NOT NULL)
|
| 442 |
+
THEN 'Prebid'
|
| 443 |
+
WHEN body[0]:companyIds IS NOT NULL THEN 'OB'
|
| 444 |
+
WHEN c.id IS NOT NULL THEN 'Prebid'
|
| 445 |
+
ELSE 'OB'
|
| 446 |
+
END AS Integration,
|
| 447 |
+
CASE
|
| 448 |
+
WHEN split(body[0]['adUnitPath'],'/')[2]::varchar LIKE '%Outstream%' THEN 'Display'
|
| 449 |
+
WHEN split(body[0]['adUnitPath'],'/')[2]::varchar LIKE '%Video%' THEN 'Video'
|
| 450 |
+
ELSE 'Display'
|
| 451 |
+
END AS Ad_Format,
|
| 452 |
+
COUNT(*) AS CNT,
|
| 453 |
+
'Today' AS timeframe
|
| 454 |
+
FROM {table} a
|
| 455 |
+
LEFT JOIN ANALYTICS.GAM360.ORDERS b
|
| 456 |
+
ON a.body[0]:campaignId::VARCHAR = b.ID::VARCHAR
|
| 457 |
+
LEFT JOIN ANALYTICS.GAM360.LINEITEM c
|
| 458 |
+
ON c.id::VARCHAR = a.body[0]:lineItemId::VARCHAR
|
| 459 |
+
WHERE convert_timezone('UTC','America/New_York',datetime)
|
| 460 |
+
BETWEEN '{start_datetime}' AND '{end_datetime}'
|
| 461 |
+
AND message = 'SlotRenderEnded::adImpression'
|
| 462 |
+
GROUP BY ALL
|
| 463 |
+
)
|
| 464 |
+
SELECT * FROM today
|
| 465 |
+
WHERE 1=1 {post_union_filter}
|
| 466 |
+
"""
|
delivery_section_utils.py
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import time
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import plotly.express as px
|
| 4 |
+
import streamlit as st
|
| 5 |
+
|
| 6 |
+
# Map the section keys (space‐separated) to the DataFrame column to group by.
|
| 7 |
+
SECTION_CONFIG = {
|
| 8 |
+
"flex bucket": {
|
| 9 |
+
"group_col": "BUCKET",
|
| 10 |
+
"drop_percent": 0.10
|
| 11 |
+
},
|
| 12 |
+
"bidder": {
|
| 13 |
+
"group_col": "HB_BIDDER",
|
| 14 |
+
"drop_percent": 0.10
|
| 15 |
+
},
|
| 16 |
+
"device": {
|
| 17 |
+
"group_col": "DEVICE",
|
| 18 |
+
"drop_percent": 0.10
|
| 19 |
+
},
|
| 20 |
+
"ad unit": {
|
| 21 |
+
"group_col": "AD_UNIT_GROUP",
|
| 22 |
+
"drop_percent": 0.10
|
| 23 |
+
},
|
| 24 |
+
"refresh": {
|
| 25 |
+
"group_col": "REFRESH",
|
| 26 |
+
"drop_percent": 0.10
|
| 27 |
+
},
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
def update_section_generic_drop(key, df, start_times, container, drop_time):
|
| 31 |
+
"""
|
| 32 |
+
A generic 5‑minute breakdown with drop detection.
|
| 33 |
+
'key' can be 'flex_bucket' or 'flex bucket' (we normalize it).
|
| 34 |
+
"""
|
| 35 |
+
elapsed = time.time() - start_times[key]
|
| 36 |
+
mins, secs = divmod(elapsed, 60)
|
| 37 |
+
|
| 38 |
+
# Standardize column names & build timestamp
|
| 39 |
+
df.columns = [c.upper() for c in df.columns]
|
| 40 |
+
df = df.sort_values(["EST_HOUR", "EST_MINUTE"])
|
| 41 |
+
df["timestamp"] = pd.to_datetime(
|
| 42 |
+
df["EST_DATE"].astype(str) + " " +
|
| 43 |
+
df["EST_HOUR"].astype(str).str.zfill(2) + ":" +
|
| 44 |
+
df["EST_MINUTE"].astype(str).str.zfill(2)
|
| 45 |
+
)
|
| 46 |
+
df["5MIN"] = df["timestamp"].dt.floor("5T")
|
| 47 |
+
|
| 48 |
+
# Normalize the lookup key to match SECTION_CONFIG
|
| 49 |
+
lookup = key.replace("_", " ").lower()
|
| 50 |
+
config = SECTION_CONFIG.get(lookup)
|
| 51 |
+
if not config:
|
| 52 |
+
st.error(f"No configuration for section '{key}'.")
|
| 53 |
+
return
|
| 54 |
+
|
| 55 |
+
group_col = config["group_col"]
|
| 56 |
+
drop_pct = config["drop_percent"]
|
| 57 |
+
|
| 58 |
+
with container:
|
| 59 |
+
st.subheader(f"{lookup.title()} Data")
|
| 60 |
+
st.info(f"Query completed in {int(mins)}m {secs:.2f}s")
|
| 61 |
+
|
| 62 |
+
# Filter to TODAY (uppercase)
|
| 63 |
+
today_data = df[df["TIMEFRAME"].str.upper() == "TODAY"]
|
| 64 |
+
if today_data.empty:
|
| 65 |
+
st.info("No TODAY data for this section.")
|
| 66 |
+
return
|
| 67 |
+
|
| 68 |
+
# Aggregate over 5‑min intervals & plot
|
| 69 |
+
agg_today = (
|
| 70 |
+
today_data
|
| 71 |
+
.groupby(["5MIN", group_col], as_index=False)["CNT"]
|
| 72 |
+
.sum()
|
| 73 |
+
)
|
| 74 |
+
title = f"{lookup.title()} Impressions by Time of Day (5‑min)"
|
| 75 |
+
fig = px.line(
|
| 76 |
+
agg_today,
|
| 77 |
+
x="5MIN",
|
| 78 |
+
y="CNT",
|
| 79 |
+
color=group_col,
|
| 80 |
+
title=title,
|
| 81 |
+
labels={"5MIN": "Time", "CNT": "Impressions", group_col: lookup.title()}
|
| 82 |
+
)
|
| 83 |
+
fig.update_xaxes(tickformat="%I:%M %p")
|
| 84 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 85 |
+
|
| 86 |
+
# Drop detection at the flagged interval
|
| 87 |
+
drop_subset = agg_today[agg_today["5MIN"] == drop_time]
|
| 88 |
+
flagged = []
|
| 89 |
+
if not drop_subset.empty:
|
| 90 |
+
avg_cnt = drop_subset["CNT"].mean()
|
| 91 |
+
for grp, cnt in drop_subset.groupby(group_col)["CNT"].sum().items():
|
| 92 |
+
if cnt <= (1 - drop_pct) * avg_cnt:
|
| 93 |
+
flagged.append(grp)
|
| 94 |
+
|
| 95 |
+
drop_str = drop_time.strftime("%I:%M %p") if drop_time else "N/A"
|
| 96 |
+
if flagged:
|
| 97 |
+
st.warning(f"{lookup.title()}: At {drop_str}, these groups dropped: {', '.join(flagged)}.")
|
| 98 |
+
else:
|
| 99 |
+
st.info(f"{lookup.title()}: No significant drop at {drop_str}.")
|
| 100 |
+
|
| 101 |
+
# Optional raw TODAY data
|
| 102 |
+
with st.expander(f"Show Raw TODAY {lookup.title()} Data"):
|
| 103 |
+
st.dataframe(today_data)
|
.gitattributes → gitattributes
RENAMED
|
File without changes
|
house_ad_instructions.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# instructions.py
|
| 2 |
+
|
| 3 |
+
NEXT_STEPS_INSTRUCTIONS = """
|
| 4 |
+
Flex:
|
| 5 |
+
If a single flex bucket is flagged as having a house ad spike, that flex bucket is the likley cause of the house ad spike.
|
| 6 |
+
Even if other sections are showing an increase in house ads, the single flex bucket is most likely the cause of the spike.
|
| 7 |
+
If other sections are showing increases in house ads, that information should be sent to the ad code team as additional reference for the flagged flex bucket.
|
| 8 |
+
The deployment information listed in the flex_section message, especially the deployment time and bucket, should be sent to the ad code team so that they can investigate the issue further.
|
| 9 |
+
Make sure to include a hyperlink to the Jira issue in your response.
|
| 10 |
+
If multiple flex buckets are flagged as having a house ad spike, the issue could be related to the flagged buckets.
|
| 11 |
+
If the majority of flex buckets are being flagged as having a house ad spike, the issue is likely not related to flex buckets.
|
| 12 |
+
|
| 13 |
+
hb_bidder:
|
| 14 |
+
If a single hb_bidder is flagged as having a house ad spike, the issue is likely caused by or related to that hb_bidder.
|
| 15 |
+
Even if other sections are showing an increase in house ads, the hb_bidder is most likely the cause of the spike.
|
| 16 |
+
The hb_bidder information should be sent to the Rev Ops team to investigate further.
|
| 17 |
+
The Ad Ops team should also investigate whether any GAM changes were recently made that could be impacting the hb_bidder.
|
| 18 |
+
The Ad Code team should also investigate if there were any recent ad code changes that could be impacting the hb_bidder.
|
| 19 |
+
If the majority of hb_bidder values are being flagged as having a house ad spike, the issue is likely not related to hb_bidder values.
|
| 20 |
+
|
| 21 |
+
hb_deal:
|
| 22 |
+
If a single hb_deal is flagged as having a house ad spike, the issue is likely caused by or related to that hb_deal.
|
| 23 |
+
Even if other sections are showing an increase in house ads, the hb_deal is most likely the cause of the spike.
|
| 24 |
+
The Ad Ops team should also investigate whether any GAM changes, especially changes to protections and/or UPRs, were recently made that could be impacting the hb_deal.
|
| 25 |
+
The hb_deal information should be sent to the Sales team to investigate further.
|
| 26 |
+
If the majority of hb_deal values are being flagged as having a house ad spike, the issue is likely not related to hb_deal values.
|
| 27 |
+
|
| 28 |
+
Ad Unit:
|
| 29 |
+
If a single ad unit is flagged as having a house ad spike, the issue is likely related to that ad unit.
|
| 30 |
+
The ad code team should also investigate if there were any recent ad code changes that could be impacting the ad unit.
|
| 31 |
+
If the majority of ad unit values are being flagged as having a house ad spike, the issue is likely not related to ad unit values.
|
| 32 |
+
|
| 33 |
+
Browser:
|
| 34 |
+
If a single browser is flagged as having a house ad spike, the issue is likely related to that browser.
|
| 35 |
+
The ad code team should investigate if there were any recent ad code changes that could be impacting the browser.
|
| 36 |
+
If the majority of browser values are being flagged as having a house ad spike, the issue is likely not related to browser values.
|
| 37 |
+
|
| 38 |
+
Device:
|
| 39 |
+
If a single device is flagged as having a house ad spike, the issue is likely related to that device.
|
| 40 |
+
The ad code team should investigate if there were any recent ad code changes that could be impacting the device.
|
| 41 |
+
If the majority of device values are being flagged as having a house ad spike, the issue is likely not related to device values.
|
| 42 |
+
|
| 43 |
+
Random Integer:
|
| 44 |
+
If a single random integer is flagged as having a house ad spike, the issue is likely caused by or related to that random integer.
|
| 45 |
+
If multiple random integer values are being flagged as having a house ad spike, the issue could be related to those random integer values.
|
| 46 |
+
The Ad Ops team should investigate whether any GAM changes were recently made that could be impacting the random integer value(s).
|
| 47 |
+
The ad code team should investigate if there were any recent ad code changes that could be impacting the random integer value(s).
|
| 48 |
+
If the majority of random integer values are being flagged as having a house ad spike, the issue is likely not related to random integer values.
|
| 49 |
+
|
| 50 |
+
hb_pb:
|
| 51 |
+
If a single hb_pb value is flagged as having a house ad spike, the issue is likely caused by or related to that hb_pb.
|
| 52 |
+
The Ad Ops team should also investigate whether any GAM changes were recently made that could be impacting the hb_pb.
|
| 53 |
+
The ad code team should investigate if there were any recent ad code changes that could be impacting the hb_pb.
|
| 54 |
+
If the majority of hb_pb values are being flagged as having a house ad spike, the issue is likely not related to hb_pb values.
|
| 55 |
+
|
| 56 |
+
hb_size:
|
| 57 |
+
If a single hb_size value is flagged as having a house ad spike, the issue is likely related to that hb_size.
|
| 58 |
+
The ad code team should investigate if there were any recent ad code changes that could be impacting the hb_size.
|
| 59 |
+
If the majority of hb_size values are being flagged as having a house ad spike, the issue is likely not related to hb_size values.
|
| 60 |
+
"""
|
house_ad_main.py
ADDED
|
@@ -0,0 +1,356 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import time
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import plotly.express as px
|
| 5 |
+
import snowflake.connector
|
| 6 |
+
import base64
|
| 7 |
+
from datetime import timedelta, datetime
|
| 8 |
+
from cryptography.hazmat.primitives import serialization
|
| 9 |
+
from cryptography.hazmat.backends import default_backend
|
| 10 |
+
import concurrent.futures
|
| 11 |
+
|
| 12 |
+
# Import SQL query functions.
|
| 13 |
+
from house_ad_queries import (
|
| 14 |
+
get_main_query,
|
| 15 |
+
get_flex_query,
|
| 16 |
+
get_bidder_query,
|
| 17 |
+
get_deal_query,
|
| 18 |
+
get_ad_unit_query,
|
| 19 |
+
get_browser_query,
|
| 20 |
+
get_device_query,
|
| 21 |
+
get_random_integer_query,
|
| 22 |
+
get_hb_pb_query,
|
| 23 |
+
get_hb_size_query,
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
# Import the house ad section config.
|
| 27 |
+
from house_ad_section_utils import update_section_generic
|
| 28 |
+
|
| 29 |
+
# Import the NEXT_STEPS_INSTRUCTIONS at the top.
|
| 30 |
+
from house_ad_instructions import NEXT_STEPS_INSTRUCTIONS
|
| 31 |
+
|
| 32 |
+
# Initialize session state keys at the top so they only get set once.
|
| 33 |
+
st.session_state.setdefault("query_run", False)
|
| 34 |
+
st.session_state.setdefault("findings_messages", [])
|
| 35 |
+
st.session_state.setdefault("key_findings_output", None)
|
| 36 |
+
st.session_state.setdefault("query_df", None)
|
| 37 |
+
st.session_state.setdefault("agg_df", None)
|
| 38 |
+
st.session_state.setdefault("top_level_spike_time", None)
|
| 39 |
+
|
| 40 |
+
# --- Helper Functions ---
|
| 41 |
+
|
| 42 |
+
# def load_private_key(key_str):
|
| 43 |
+
# """Load a PEM-formatted private key."""
|
| 44 |
+
# return serialization.load_pem_private_key(
|
| 45 |
+
# key_str.encode("utf-8"),
|
| 46 |
+
# password=None,
|
| 47 |
+
# backend=default_backend()
|
| 48 |
+
# )
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
# Use caching to avoid re-running the same query on every interaction.
|
| 52 |
+
@st.cache_data(show_spinner=False)
|
| 53 |
+
def cached_run_query(
|
| 54 |
+
query,
|
| 55 |
+
private_key_b64: str,
|
| 56 |
+
user: str,
|
| 57 |
+
account_identifier: str,
|
| 58 |
+
warehouse: str,
|
| 59 |
+
database: str,
|
| 60 |
+
schema: str,
|
| 61 |
+
role: str,
|
| 62 |
+
):
|
| 63 |
+
# 1) Decode the base64‐encoded DER key
|
| 64 |
+
der = base64.b64decode(private_key_b64)
|
| 65 |
+
"""Connect to Snowflake and execute the given query. Cached to reduce re-runs."""
|
| 66 |
+
# private_key_obj = load_private_key(key_str=private_key_str)
|
| 67 |
+
conn = snowflake.connector.connect(
|
| 68 |
+
user=user,
|
| 69 |
+
account=account_identifier,
|
| 70 |
+
warehouse=warehouse,
|
| 71 |
+
database=database,
|
| 72 |
+
schema=schema,
|
| 73 |
+
role=role,
|
| 74 |
+
private_key=der,
|
| 75 |
+
)
|
| 76 |
+
cs = conn.cursor()
|
| 77 |
+
cs.execute("ALTER SESSION SET STATEMENT_TIMEOUT_IN_SECONDS = 1800")
|
| 78 |
+
cs.execute(query)
|
| 79 |
+
results = cs.fetchall()
|
| 80 |
+
columns = [col[0] for col in cs.description]
|
| 81 |
+
df = pd.DataFrame(results, columns=columns)
|
| 82 |
+
cs.close()
|
| 83 |
+
conn.close()
|
| 84 |
+
return df
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
# --- Main Function for House Ad Spike Analysis ---
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
def run_house_ad_spike_query(
|
| 91 |
+
table,
|
| 92 |
+
start_datetime,
|
| 93 |
+
end_datetime,
|
| 94 |
+
message_filter,
|
| 95 |
+
campaign_id,
|
| 96 |
+
private_key_str,
|
| 97 |
+
user,
|
| 98 |
+
account_identifier,
|
| 99 |
+
warehouse,
|
| 100 |
+
database,
|
| 101 |
+
schema,
|
| 102 |
+
role,
|
| 103 |
+
client,
|
| 104 |
+
):
|
| 105 |
+
"""
|
| 106 |
+
Run the house ad spike query along with additional dimensions,
|
| 107 |
+
generate key findings via OpenAI, and display the results.
|
| 108 |
+
"""
|
| 109 |
+
# --- Generate SQL Queries ---
|
| 110 |
+
main_sql = get_main_query(
|
| 111 |
+
table, start_datetime, end_datetime, message_filter, campaign_id
|
| 112 |
+
)
|
| 113 |
+
flex_sql = get_flex_query(
|
| 114 |
+
table, start_datetime, end_datetime, message_filter, campaign_id
|
| 115 |
+
)
|
| 116 |
+
bidder_sql = get_bidder_query(
|
| 117 |
+
table, start_datetime, end_datetime, message_filter, campaign_id
|
| 118 |
+
)
|
| 119 |
+
deal_sql = get_deal_query(
|
| 120 |
+
table, start_datetime, end_datetime, message_filter, campaign_id
|
| 121 |
+
)
|
| 122 |
+
ad_unit_sql = get_ad_unit_query(
|
| 123 |
+
table, start_datetime, end_datetime, message_filter, campaign_id
|
| 124 |
+
)
|
| 125 |
+
browser_sql = get_browser_query(
|
| 126 |
+
table, start_datetime, end_datetime, message_filter, campaign_id
|
| 127 |
+
)
|
| 128 |
+
device_sql = get_device_query(
|
| 129 |
+
table, start_datetime, end_datetime, message_filter, campaign_id
|
| 130 |
+
)
|
| 131 |
+
random_integer_sql = get_random_integer_query(
|
| 132 |
+
table, start_datetime, end_datetime, message_filter, campaign_id
|
| 133 |
+
)
|
| 134 |
+
hb_pb_sql = get_hb_pb_query(
|
| 135 |
+
table, start_datetime, end_datetime, message_filter, campaign_id
|
| 136 |
+
)
|
| 137 |
+
hb_size_sql = get_hb_size_query(
|
| 138 |
+
table, start_datetime, end_datetime, message_filter, campaign_id
|
| 139 |
+
)
|
| 140 |
+
|
| 141 |
+
# --- Main Query Execution ---
|
| 142 |
+
# Run query only if it hasn't been run already.
|
| 143 |
+
if not st.session_state["query_run"]:
|
| 144 |
+
try:
|
| 145 |
+
start_main = time.time()
|
| 146 |
+
with st.spinner("Connecting to Snowflake and running top-level query..."):
|
| 147 |
+
df = cached_run_query(
|
| 148 |
+
main_sql,
|
| 149 |
+
private_key_str,
|
| 150 |
+
user,
|
| 151 |
+
account_identifier,
|
| 152 |
+
warehouse,
|
| 153 |
+
database,
|
| 154 |
+
schema,
|
| 155 |
+
role,
|
| 156 |
+
)
|
| 157 |
+
elapsed_main = time.time() - start_main
|
| 158 |
+
elapsed_minutes = int(elapsed_main // 60)
|
| 159 |
+
elapsed_seconds = elapsed_main % 60
|
| 160 |
+
|
| 161 |
+
st.info(
|
| 162 |
+
f"Top-level SQL query executed in {elapsed_minutes} minute(s) and {elapsed_seconds:.2f} seconds."
|
| 163 |
+
)
|
| 164 |
+
|
| 165 |
+
# Process the results.
|
| 166 |
+
df.columns = [col.upper() for col in df.columns]
|
| 167 |
+
df.sort_values(by=["EST_HOUR", "EST_MINUTE"], inplace=True)
|
| 168 |
+
df["timestamp"] = pd.to_datetime(
|
| 169 |
+
df["EST_DATE"].astype(str)
|
| 170 |
+
+ " "
|
| 171 |
+
+ df["EST_HOUR"].astype(str).str.zfill(2)
|
| 172 |
+
+ ":"
|
| 173 |
+
+ df["EST_MINUTE"].astype(str).str.zfill(2)
|
| 174 |
+
)
|
| 175 |
+
df["5min"] = df["timestamp"].dt.floor("5T")
|
| 176 |
+
agg_df = df.groupby("5min", as_index=False)["CNT"].sum()
|
| 177 |
+
|
| 178 |
+
st.session_state["query_df"] = df
|
| 179 |
+
st.session_state["agg_df"] = agg_df
|
| 180 |
+
st.session_state["query_run"] = True
|
| 181 |
+
except Exception as e:
|
| 182 |
+
st.error(f"Error during main query execution: {e}")
|
| 183 |
+
return
|
| 184 |
+
else:
|
| 185 |
+
# Use stored data.
|
| 186 |
+
df = st.session_state.get("query_df")
|
| 187 |
+
agg_df = st.session_state.get("agg_df")
|
| 188 |
+
|
| 189 |
+
# --- Display Main Query Results ---
|
| 190 |
+
st.header("Top-Level Data")
|
| 191 |
+
top_level_baseline = 30
|
| 192 |
+
agg_df["is_spike"] = agg_df.apply(
|
| 193 |
+
lambda row: row["CNT"] > top_level_baseline, axis=1
|
| 194 |
+
)
|
| 195 |
+
spike_start = None
|
| 196 |
+
consecutive = 0
|
| 197 |
+
for idx, row in agg_df.sort_values("5min").iterrows():
|
| 198 |
+
if row["is_spike"]:
|
| 199 |
+
consecutive += 1
|
| 200 |
+
if consecutive == 2:
|
| 201 |
+
spike_start = row["5min"] - timedelta(minutes=5)
|
| 202 |
+
break
|
| 203 |
+
else:
|
| 204 |
+
consecutive = 0
|
| 205 |
+
|
| 206 |
+
if spike_start:
|
| 207 |
+
msg = f"Top-Level: House ad increase detected starting around {spike_start.strftime('%I:%M %p')}."
|
| 208 |
+
st.success(msg)
|
| 209 |
+
else:
|
| 210 |
+
msg = "Top-Level: No large, consistent spike detected in the current data."
|
| 211 |
+
st.info(msg)
|
| 212 |
+
# Append the message only once.
|
| 213 |
+
findings_messages = st.session_state.setdefault("findings_messages", [])
|
| 214 |
+
if msg not in findings_messages:
|
| 215 |
+
findings_messages.append(msg)
|
| 216 |
+
st.session_state["top_level_spike_time"] = spike_start
|
| 217 |
+
|
| 218 |
+
with st.expander("Show Raw Data"):
|
| 219 |
+
st.dataframe(df)
|
| 220 |
+
with st.expander("Show Raw 5-Minute Aggregated Data with Spike Alert"):
|
| 221 |
+
st.dataframe(agg_df)
|
| 222 |
+
|
| 223 |
+
title_text = "House Ads Count by 5-Minute Interval"
|
| 224 |
+
fig = px.line(
|
| 225 |
+
agg_df,
|
| 226 |
+
x="5min",
|
| 227 |
+
y="CNT",
|
| 228 |
+
title=title_text,
|
| 229 |
+
labels={"5min": "Time", "CNT": "House Ads Count"},
|
| 230 |
+
)
|
| 231 |
+
fig.update_xaxes(tickformat="%I:%M %p")
|
| 232 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 233 |
+
|
| 234 |
+
st.markdown("<hr style='border: 3px solid gray;'>", unsafe_allow_html=True)
|
| 235 |
+
|
| 236 |
+
# --- Key Findings via OpenAI ---
|
| 237 |
+
st.header("Key Findings and Next Steps")
|
| 238 |
+
# Create a container to hold the key findings output.
|
| 239 |
+
key_findings_container = st.container()
|
| 240 |
+
|
| 241 |
+
# Initially display what’s in session_state (if anything) or a placeholder.
|
| 242 |
+
with key_findings_container:
|
| 243 |
+
if st.session_state.get("key_findings_output"):
|
| 244 |
+
st.markdown(
|
| 245 |
+
st.session_state.get("key_findings_output"),
|
| 246 |
+
unsafe_allow_html=True,
|
| 247 |
+
)
|
| 248 |
+
else:
|
| 249 |
+
st.info(
|
| 250 |
+
"Key findings will appear here once additional queries have finished."
|
| 251 |
+
)
|
| 252 |
+
|
| 253 |
+
def generate_key_findings_callback():
|
| 254 |
+
findings = "\n".join(st.session_state.get("findings_messages", []))
|
| 255 |
+
flex_jira_info = st.session_state.get("flex_jira_info", "")
|
| 256 |
+
jira_section = (
|
| 257 |
+
f"\nJira Ticket Information from Flex Bucket section:\n{flex_jira_info}\n"
|
| 258 |
+
if flex_jira_info
|
| 259 |
+
else ""
|
| 260 |
+
)
|
| 261 |
+
prompt = (
|
| 262 |
+
"You are a helpful analyst investigating a spike in house ads. A house ad spike detection dashboard has compiled a list of findings "
|
| 263 |
+
"showing potential spikes across different dimensions. Below are the detailed findings from the dashboard, along with any flagged Jira ticket "
|
| 264 |
+
"information. The NEXT_STEPS_INSTRUCTIONS file contains recommended next steps for each section depending on the spike(s) flagged in the dashboard:\n\n"
|
| 265 |
+
f"Findings:\n{findings}\n"
|
| 266 |
+
f"{jira_section}\n"
|
| 267 |
+
"Next Steps Instructions:\n"
|
| 268 |
+
f"{NEXT_STEPS_INSTRUCTIONS}\n\n"
|
| 269 |
+
"Using the Findings, jira section information, and Next Steps Instructions as helpful context, create a concise summary "
|
| 270 |
+
"that identifies the likely cause/causes of any detected house ad spikes and recommends actionable next steps. The summary "
|
| 271 |
+
"should be a few sentences long followed by bullet points with the main findings and recommended next steps. Please output "
|
| 272 |
+
"the summary in Markdown format with each bullet point on a new line, and indent sub-bullets properly. Ensure that each bullet "
|
| 273 |
+
"point is on its own line. There is no need to explicitly mention the Instructions file in the summary, just use it to "
|
| 274 |
+
"inform your analysis. "
|
| 275 |
+
)
|
| 276 |
+
st.session_state["key_findings"] = prompt
|
| 277 |
+
try:
|
| 278 |
+
response = client.responses.create(
|
| 279 |
+
model="o3-mini",
|
| 280 |
+
instructions="You are a helpful analyst who provides insights and recommends next steps.",
|
| 281 |
+
input=prompt,
|
| 282 |
+
)
|
| 283 |
+
st.session_state["key_findings_output"] = response.output_text.strip()
|
| 284 |
+
except Exception as e:
|
| 285 |
+
st.session_state["key_findings_output"] = f"Error calling OpenAI API: {e}"
|
| 286 |
+
|
| 287 |
+
# --- Additional Queries for Specific Dimensions ---
|
| 288 |
+
st.header("Specific Dimensions Data")
|
| 289 |
+
st.info("Checking specific dimensions for house ad spikes...")
|
| 290 |
+
|
| 291 |
+
with st.spinner("Running additional queries..."):
|
| 292 |
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
| 293 |
+
futures = {}
|
| 294 |
+
start_times = {}
|
| 295 |
+
query_dict = {
|
| 296 |
+
"flex bucket": flex_sql,
|
| 297 |
+
"bidder": bidder_sql,
|
| 298 |
+
"deal": deal_sql,
|
| 299 |
+
"ad_unit": ad_unit_sql,
|
| 300 |
+
"browser": browser_sql,
|
| 301 |
+
"device": device_sql,
|
| 302 |
+
"random_integer": random_integer_sql,
|
| 303 |
+
"hb_pb": hb_pb_sql,
|
| 304 |
+
"hb_size": hb_size_sql,
|
| 305 |
+
}
|
| 306 |
+
for key, query in query_dict.items():
|
| 307 |
+
start_times[key] = time.time()
|
| 308 |
+
futures[key] = executor.submit(
|
| 309 |
+
cached_run_query,
|
| 310 |
+
query,
|
| 311 |
+
private_key_str,
|
| 312 |
+
user,
|
| 313 |
+
account_identifier,
|
| 314 |
+
warehouse,
|
| 315 |
+
database,
|
| 316 |
+
schema,
|
| 317 |
+
role,
|
| 318 |
+
)
|
| 319 |
+
|
| 320 |
+
containers = {
|
| 321 |
+
"flex bucket": st.container(),
|
| 322 |
+
"bidder": st.container(),
|
| 323 |
+
"deal": st.container(),
|
| 324 |
+
"ad_unit": st.container(),
|
| 325 |
+
"browser": st.container(),
|
| 326 |
+
"device": st.container(),
|
| 327 |
+
"random_integer": st.container(),
|
| 328 |
+
"hb_pb": st.container(),
|
| 329 |
+
"hb_size": st.container(),
|
| 330 |
+
}
|
| 331 |
+
|
| 332 |
+
spike_time = st.session_state.get("top_level_spike_time")
|
| 333 |
+
|
| 334 |
+
while futures:
|
| 335 |
+
done, _ = concurrent.futures.wait(
|
| 336 |
+
list(futures.values()),
|
| 337 |
+
timeout=0.5,
|
| 338 |
+
return_when=concurrent.futures.FIRST_COMPLETED,
|
| 339 |
+
)
|
| 340 |
+
for future in done:
|
| 341 |
+
key = [k for k, f in futures.items() if f == future][0]
|
| 342 |
+
df_result = future.result()
|
| 343 |
+
update_section_generic(
|
| 344 |
+
key, df_result, start_times, containers[key], spike_time
|
| 345 |
+
)
|
| 346 |
+
del futures[key]
|
| 347 |
+
|
| 348 |
+
# Once all additional queries have completed, automatically generate key findings.
|
| 349 |
+
generate_key_findings_callback()
|
| 350 |
+
|
| 351 |
+
# Update the key findings container with the new output.
|
| 352 |
+
with key_findings_container:
|
| 353 |
+
st.markdown(
|
| 354 |
+
st.session_state.get("key_findings_output", ""),
|
| 355 |
+
unsafe_allow_html=True,
|
| 356 |
+
)
|
house_ad_queries.py
ADDED
|
@@ -0,0 +1,220 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
from functools import wraps
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def _quote_identifier(identifier: str) -> str:
|
| 6 |
+
"""Quote SQL identifiers that contain special characters."""
|
| 7 |
+
|
| 8 |
+
def quote_part(part: str) -> str:
|
| 9 |
+
if re.match(r"^[A-Za-z_][A-Za-z0-9_]*$", part):
|
| 10 |
+
return part
|
| 11 |
+
return f'"{part}"'
|
| 12 |
+
|
| 13 |
+
return ".".join(quote_part(p) for p in identifier.split("."))
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def _sanitize_table(func):
|
| 17 |
+
@wraps(func)
|
| 18 |
+
def wrapper(table, *args, **kwargs):
|
| 19 |
+
table = _quote_identifier(table)
|
| 20 |
+
return func(table, *args, **kwargs)
|
| 21 |
+
|
| 22 |
+
return wrapper
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
@_sanitize_table
|
| 26 |
+
def get_main_query(table, start_datetime, end_datetime, message_filter, campaign_id):
|
| 27 |
+
return f"""
|
| 28 |
+
SELECT
|
| 29 |
+
to_date(convert_timezone('UTC', 'America/New_York', datetime)) AS EST_DATE,
|
| 30 |
+
extract(hour FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_HOUR,
|
| 31 |
+
extract(minute FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_MINUTE,
|
| 32 |
+
count(*) as CNT
|
| 33 |
+
FROM {table}
|
| 34 |
+
WHERE convert_timezone('UTC', 'America/New_York', datetime) BETWEEN '{start_datetime}' AND '{end_datetime}'
|
| 35 |
+
and message in ('{message_filter}')
|
| 36 |
+
and body[0]:campaignId::varchar in ('{campaign_id}')
|
| 37 |
+
and body[0]:slotTargeting:hb_pb[0]::DOUBLE >= 0.15
|
| 38 |
+
GROUP BY ALL
|
| 39 |
+
"""
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
@_sanitize_table
|
| 43 |
+
def get_flex_query(table, start_datetime, end_datetime, message_filter, campaign_id):
|
| 44 |
+
return f"""
|
| 45 |
+
SELECT
|
| 46 |
+
to_date(convert_timezone('UTC', 'America/New_York', datetime)) AS EST_DATE,
|
| 47 |
+
extract(hour FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_HOUR,
|
| 48 |
+
extract(minute FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_MINUTE,
|
| 49 |
+
bucket,
|
| 50 |
+
count(*) as CNT
|
| 51 |
+
FROM {table}
|
| 52 |
+
WHERE convert_timezone('UTC', 'America/New_York', datetime) BETWEEN '{start_datetime}' AND '{end_datetime}'
|
| 53 |
+
and message in ('{message_filter}')
|
| 54 |
+
and body[0]:campaignId::varchar in ('{campaign_id}')
|
| 55 |
+
and body[0]:slotTargeting:hb_pb[0]::DOUBLE >= 0.15
|
| 56 |
+
GROUP BY ALL
|
| 57 |
+
"""
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
@_sanitize_table
|
| 61 |
+
def get_bidder_query(table, start_datetime, end_datetime, message_filter, campaign_id):
|
| 62 |
+
return f"""
|
| 63 |
+
SELECT
|
| 64 |
+
to_date(convert_timezone('UTC', 'America/New_York', datetime)) AS EST_DATE,
|
| 65 |
+
extract(hour FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_HOUR,
|
| 66 |
+
extract(minute FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_MINUTE,
|
| 67 |
+
body[0]:slotTargeting:hb_bidder[0]::varchar as HB_BIDDER,
|
| 68 |
+
count(*) as CNT
|
| 69 |
+
FROM {table}
|
| 70 |
+
WHERE convert_timezone('UTC', 'America/New_York', datetime) BETWEEN '{start_datetime}' AND '{end_datetime}'
|
| 71 |
+
and message in ('{message_filter}')
|
| 72 |
+
and body[0]:campaignId::varchar in ('{campaign_id}')
|
| 73 |
+
and body[0]:slotTargeting:hb_pb[0]::DOUBLE >= 0.15
|
| 74 |
+
GROUP BY ALL
|
| 75 |
+
"""
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
@_sanitize_table
|
| 79 |
+
def get_deal_query(table, start_datetime, end_datetime, message_filter, campaign_id):
|
| 80 |
+
return f"""
|
| 81 |
+
SELECT
|
| 82 |
+
to_date(convert_timezone('UTC', 'America/New_York', datetime)) AS EST_DATE,
|
| 83 |
+
extract(hour FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_HOUR,
|
| 84 |
+
extract(minute FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_MINUTE,
|
| 85 |
+
body[0]:slotTargeting:hb_deal[0]::varchar as HB_DEAL,
|
| 86 |
+
count(*) as CNT
|
| 87 |
+
FROM {table}
|
| 88 |
+
WHERE convert_timezone('UTC', 'America/New_York', datetime) BETWEEN '{start_datetime}' AND '{end_datetime}'
|
| 89 |
+
and message in ('{message_filter}')
|
| 90 |
+
and body[0]:campaignId::varchar in ('{campaign_id}')
|
| 91 |
+
and body[0]:slotTargeting:hb_pb[0]::DOUBLE >= 0.15
|
| 92 |
+
GROUP BY ALL
|
| 93 |
+
"""
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
# New function for Ad Unit Data
|
| 97 |
+
@_sanitize_table
|
| 98 |
+
def get_ad_unit_query(table, start_datetime, end_datetime, message_filter, campaign_id):
|
| 99 |
+
return f"""
|
| 100 |
+
SELECT
|
| 101 |
+
to_date(convert_timezone('UTC', 'America/New_York', datetime)) AS EST_DATE,
|
| 102 |
+
extract(hour FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_HOUR,
|
| 103 |
+
extract(minute FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_MINUTE,
|
| 104 |
+
split(body[0]['adUnitPath'],'/')[2]::varchar as Ad_Unit,
|
| 105 |
+
count(*) as CNT
|
| 106 |
+
FROM {table}
|
| 107 |
+
WHERE convert_timezone('UTC', 'America/New_York', datetime) BETWEEN '{start_datetime}' AND '{end_datetime}'
|
| 108 |
+
and message in ('{message_filter}')
|
| 109 |
+
and body[0]:campaignId::varchar in ('{campaign_id}')
|
| 110 |
+
and body[0]:slotTargeting:hb_pb[0]::DOUBLE >= 0.15
|
| 111 |
+
GROUP BY ALL
|
| 112 |
+
"""
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
# New function for Browser Data
|
| 116 |
+
@_sanitize_table
|
| 117 |
+
def get_browser_query(table, start_datetime, end_datetime, message_filter, campaign_id):
|
| 118 |
+
return f"""
|
| 119 |
+
SELECT
|
| 120 |
+
to_date(convert_timezone('UTC', 'America/New_York', datetime)) AS EST_DATE,
|
| 121 |
+
extract(hour FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_HOUR,
|
| 122 |
+
extract(minute FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_MINUTE,
|
| 123 |
+
case
|
| 124 |
+
when lower(useragent) like '%edg%' then 'Edge'
|
| 125 |
+
when (lower(useragent) like '%cros%' or lower(useragent) like '%chrome%' or lower(useragent) like '%crios%') then 'Chrome'
|
| 126 |
+
when lower(useragent) like '%firefox%' then 'Firefox'
|
| 127 |
+
when lower(useragent) like '%applewebkit%' then 'Safari'
|
| 128 |
+
else 'other'
|
| 129 |
+
end as browser,
|
| 130 |
+
count(*) as CNT
|
| 131 |
+
FROM {table}
|
| 132 |
+
WHERE convert_timezone('UTC', 'America/New_York', datetime) BETWEEN '{start_datetime}' AND '{end_datetime}'
|
| 133 |
+
and message in ('{message_filter}')
|
| 134 |
+
and body[0]:campaignId::varchar in ('{campaign_id}')
|
| 135 |
+
and body[0]:slotTargeting:hb_pb[0]::DOUBLE >= 0.15
|
| 136 |
+
GROUP BY ALL
|
| 137 |
+
"""
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
# New function for Device Data
|
| 141 |
+
@_sanitize_table
|
| 142 |
+
def get_device_query(table, start_datetime, end_datetime, message_filter, campaign_id):
|
| 143 |
+
return f"""
|
| 144 |
+
SELECT
|
| 145 |
+
to_date(convert_timezone('UTC', 'America/New_York', datetime)) AS EST_DATE,
|
| 146 |
+
extract(hour FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_HOUR,
|
| 147 |
+
extract(minute FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_MINUTE,
|
| 148 |
+
case
|
| 149 |
+
when (useragent like '%Windows%' or useragent like '%Macintosh%') THEN 'desktop'
|
| 150 |
+
when (useragent like '%Android%' or useragent like '%iPhone%' or useragent like '%Mobi%') THEN 'phone'
|
| 151 |
+
when (useragent like '%iPad%' or useragent like '%Tablet%') THEN 'tablet'
|
| 152 |
+
else 'other'
|
| 153 |
+
end as device,
|
| 154 |
+
count(*) as CNT
|
| 155 |
+
FROM {table}
|
| 156 |
+
WHERE convert_timezone('UTC', 'America/New_York', datetime) BETWEEN '{start_datetime}' AND '{end_datetime}'
|
| 157 |
+
and message in ('{message_filter}')
|
| 158 |
+
and body[0]:campaignId::varchar in ('{campaign_id}')
|
| 159 |
+
and body[0]:slotTargeting:hb_pb[0]::DOUBLE >= 0.15
|
| 160 |
+
GROUP BY ALL
|
| 161 |
+
"""
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
# New function for Random Integer Data
|
| 165 |
+
@_sanitize_table
|
| 166 |
+
def get_random_integer_query(
|
| 167 |
+
table, start_datetime, end_datetime, message_filter, campaign_id
|
| 168 |
+
):
|
| 169 |
+
return f"""
|
| 170 |
+
SELECT
|
| 171 |
+
to_date(convert_timezone('UTC', 'America/New_York', datetime)) AS EST_DATE,
|
| 172 |
+
extract(hour FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_HOUR,
|
| 173 |
+
extract(minute FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_MINUTE,
|
| 174 |
+
body[0]:siteTargeting:ri[0]::varchar as Random_Integer,
|
| 175 |
+
count(*) as CNT
|
| 176 |
+
FROM {table}
|
| 177 |
+
WHERE convert_timezone('UTC', 'America/New_York', datetime) BETWEEN '{start_datetime}' AND '{end_datetime}'
|
| 178 |
+
and message in ('{message_filter}')
|
| 179 |
+
and body[0]:campaignId::varchar in ('{campaign_id}')
|
| 180 |
+
and body[0]:slotTargeting:hb_pb[0]::DOUBLE >= 0.15
|
| 181 |
+
GROUP BY ALL
|
| 182 |
+
"""
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
# New function for hb_pb Data
|
| 186 |
+
@_sanitize_table
|
| 187 |
+
def get_hb_pb_query(table, start_datetime, end_datetime, message_filter, campaign_id):
|
| 188 |
+
return f"""
|
| 189 |
+
SELECT
|
| 190 |
+
to_date(convert_timezone('UTC', 'America/New_York', datetime)) AS EST_DATE,
|
| 191 |
+
extract(hour FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_HOUR,
|
| 192 |
+
extract(minute FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_MINUTE,
|
| 193 |
+
body[0]:slotTargeting:hb_pb[0]::varchar as hb_pb,
|
| 194 |
+
count(*) as CNT
|
| 195 |
+
FROM {table}
|
| 196 |
+
WHERE convert_timezone('UTC', 'America/New_York', datetime) BETWEEN '{start_datetime}' AND '{end_datetime}'
|
| 197 |
+
and message in ('{message_filter}')
|
| 198 |
+
and body[0]:campaignId::varchar in ('{campaign_id}')
|
| 199 |
+
and body[0]:slotTargeting:hb_pb[0]::DOUBLE >= 0.15
|
| 200 |
+
GROUP BY ALL
|
| 201 |
+
"""
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
# New function for hb_size Data
|
| 205 |
+
@_sanitize_table
|
| 206 |
+
def get_hb_size_query(table, start_datetime, end_datetime, message_filter, campaign_id):
|
| 207 |
+
return f"""
|
| 208 |
+
SELECT
|
| 209 |
+
to_date(convert_timezone('UTC', 'America/New_York', datetime)) AS EST_DATE,
|
| 210 |
+
extract(hour FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_HOUR,
|
| 211 |
+
extract(minute FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_MINUTE,
|
| 212 |
+
body[0]:slotTargeting:hb_size[0]::varchar as hb_size,
|
| 213 |
+
count(*) as CNT
|
| 214 |
+
FROM {table}
|
| 215 |
+
WHERE convert_timezone('UTC', 'America/New_York', datetime) BETWEEN '{start_datetime}' AND '{end_datetime}'
|
| 216 |
+
and message in ('{message_filter}')
|
| 217 |
+
and body[0]:campaignId::varchar in ('{campaign_id}')
|
| 218 |
+
and body[0]:slotTargeting:hb_pb[0]::DOUBLE >= 0.15
|
| 219 |
+
GROUP BY ALL
|
| 220 |
+
"""
|
house_ad_section_utils.py
ADDED
|
@@ -0,0 +1,373 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import time
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import plotly.express as px
|
| 4 |
+
import streamlit as st
|
| 5 |
+
import os
|
| 6 |
+
import pytz
|
| 7 |
+
import re
|
| 8 |
+
from datetime import timedelta, date, datetime
|
| 9 |
+
from atlassian import Jira
|
| 10 |
+
|
| 11 |
+
# --- Jira API Configuration for Deployments ---
|
| 12 |
+
JIRA_URL = os.getenv("JIRA_URL")
|
| 13 |
+
JIRA_USERNAME = os.getenv("JIRA_USERNAME")
|
| 14 |
+
JIRA_API_TOKEN = os.getenv("JIRA_API_TOKEN")
|
| 15 |
+
|
| 16 |
+
# Initialize your Jira
|
| 17 |
+
jira_client = Jira(url=JIRA_URL, username=JIRA_USERNAME, password=JIRA_API_TOKEN)
|
| 18 |
+
|
| 19 |
+
# Configuration dictionary for sections.
|
| 20 |
+
SECTION_CONFIG = {
|
| 21 |
+
"flex bucket": {
|
| 22 |
+
"group_col": "BUCKET",
|
| 23 |
+
"chart_title": "Flex Bucket House Ads Count by 5-Minute Interval",
|
| 24 |
+
"baseline": 40,
|
| 25 |
+
"spike_threshold": 2,
|
| 26 |
+
},
|
| 27 |
+
"bidder": {
|
| 28 |
+
"group_col": "HB_BIDDER",
|
| 29 |
+
"chart_title": "hb_bidder House Ads Count by 5-Minute Interval",
|
| 30 |
+
"baseline": 40,
|
| 31 |
+
"spike_threshold": 2,
|
| 32 |
+
},
|
| 33 |
+
"deal": {
|
| 34 |
+
"group_col": "HB_DEAL",
|
| 35 |
+
"chart_title": "hb_deal House Ads Count by 5-Minute Interval",
|
| 36 |
+
"baseline": 40,
|
| 37 |
+
"spike_threshold": 2,
|
| 38 |
+
},
|
| 39 |
+
"ad_unit": {
|
| 40 |
+
"group_col": "AD_UNIT",
|
| 41 |
+
"chart_title": "Ad Unit House Ads Count by 5-Minute Interval",
|
| 42 |
+
"baseline": 40,
|
| 43 |
+
"spike_threshold": 2,
|
| 44 |
+
},
|
| 45 |
+
"browser": {
|
| 46 |
+
"group_col": "BROWSER",
|
| 47 |
+
"chart_title": "Browser House Ads Count by 5-Minute Interval",
|
| 48 |
+
"baseline": 40,
|
| 49 |
+
"spike_threshold": 1,
|
| 50 |
+
},
|
| 51 |
+
"device": {
|
| 52 |
+
"group_col": "DEVICE",
|
| 53 |
+
"chart_title": "Device House Ads Count by 5-Minute Interval",
|
| 54 |
+
"baseline": 40,
|
| 55 |
+
"spike_threshold": 1,
|
| 56 |
+
},
|
| 57 |
+
"random_integer": {
|
| 58 |
+
"group_col": "RANDOM_INTEGER",
|
| 59 |
+
"chart_title": "Random Integer House Ads Count by 5-Minute Interval",
|
| 60 |
+
"baseline": 40,
|
| 61 |
+
"spike_threshold": 2,
|
| 62 |
+
},
|
| 63 |
+
"hb_pb": {
|
| 64 |
+
"group_col": "HB_PB",
|
| 65 |
+
"chart_title": "hb_pb House Ads Count by 5-Minute Interval",
|
| 66 |
+
"baseline": 40,
|
| 67 |
+
"spike_threshold": 2,
|
| 68 |
+
},
|
| 69 |
+
"hb_size": {
|
| 70 |
+
"group_col": "HB_SIZE",
|
| 71 |
+
"chart_title": "hb_size House Ads Count by 5-Minute Interval",
|
| 72 |
+
"baseline": 40,
|
| 73 |
+
"spike_threshold": 2,
|
| 74 |
+
},
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def parse_deployment_info(comment_text):
|
| 79 |
+
"""
|
| 80 |
+
Parses a comment for deployment info if it follows the expected structure:
|
| 81 |
+
|
| 82 |
+
Deployed At: <timestamp>
|
| 83 |
+
Bucket: <bucket>
|
| 84 |
+
Traffic: <traffic>
|
| 85 |
+
Branch: <branch>
|
| 86 |
+
|
| 87 |
+
Returns a tuple: (deployed_at, bucket, traffic, branch).
|
| 88 |
+
If not all keys are found, returns empty strings.
|
| 89 |
+
"""
|
| 90 |
+
deployed_at, bucket, traffic, branch = "", "", "", ""
|
| 91 |
+
keys_found = set()
|
| 92 |
+
lines = comment_text.splitlines()
|
| 93 |
+
for line in lines:
|
| 94 |
+
if "Deployed At:" in line:
|
| 95 |
+
deployed_at = line.split("Deployed At:")[1].strip()
|
| 96 |
+
keys_found.add("Deployed At")
|
| 97 |
+
elif "Bucket:" in line:
|
| 98 |
+
bucket = line.split("Bucket:")[1].strip()
|
| 99 |
+
keys_found.add("Bucket")
|
| 100 |
+
elif "Traffic:" in line:
|
| 101 |
+
traffic = line.split("Traffic:")[1].strip()
|
| 102 |
+
keys_found.add("Traffic")
|
| 103 |
+
elif "Branch:" in line:
|
| 104 |
+
branch = line.split("Branch:")[1].strip()
|
| 105 |
+
keys_found.add("Branch")
|
| 106 |
+
if keys_found == {"Deployed At", "Bucket", "Traffic", "Branch"}:
|
| 107 |
+
return deployed_at, bucket, traffic, branch
|
| 108 |
+
else:
|
| 109 |
+
return "", "", "", ""
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
def update_section_generic(key, df, start_times, container, spike_time):
|
| 113 |
+
"""
|
| 114 |
+
Updates a section based on the provided key, using the top-level spike time to anchor
|
| 115 |
+
the pre- and post-window comparisons for share-of-voice.
|
| 116 |
+
"""
|
| 117 |
+
# Compute elapsed time for the query.
|
| 118 |
+
elapsed_section = time.time() - start_times[key]
|
| 119 |
+
minutes_container = int(elapsed_section // 60)
|
| 120 |
+
seconds_container = elapsed_section % 60
|
| 121 |
+
|
| 122 |
+
# Standardize column names and create a unified timestamp.
|
| 123 |
+
df.columns = [col.upper() for col in df.columns]
|
| 124 |
+
df.sort_values(by=["EST_HOUR", "EST_MINUTE"], inplace=True)
|
| 125 |
+
df["timestamp"] = pd.to_datetime(
|
| 126 |
+
df["EST_DATE"].astype(str)
|
| 127 |
+
+ " "
|
| 128 |
+
+ df["EST_HOUR"].astype(str).str.zfill(2)
|
| 129 |
+
+ ":"
|
| 130 |
+
+ df["EST_MINUTE"].astype(str).str.zfill(2)
|
| 131 |
+
)
|
| 132 |
+
df["5min"] = df["timestamp"].dt.floor("5T")
|
| 133 |
+
|
| 134 |
+
# Retrieve configuration for the current section.
|
| 135 |
+
config = SECTION_CONFIG.get(key, {})
|
| 136 |
+
baseline = config.get("baseline", 30)
|
| 137 |
+
group_col = config.get("group_col")
|
| 138 |
+
spike_threshold = config.get("spike_threshold", 3)
|
| 139 |
+
|
| 140 |
+
with container:
|
| 141 |
+
st.subheader(f"{key.capitalize()} Data")
|
| 142 |
+
st.info(
|
| 143 |
+
f"{key.capitalize()} query completed in {minutes_container} minute(s) and {seconds_container:.2f} seconds."
|
| 144 |
+
)
|
| 145 |
+
|
| 146 |
+
# Group the data by 5-minute intervals and the configured grouping column.
|
| 147 |
+
agg_df = df.groupby(["5min", group_col], as_index=False)["CNT"].sum()
|
| 148 |
+
|
| 149 |
+
# Get the data corresponding to the spike time.
|
| 150 |
+
spike_row = agg_df[agg_df["5min"] == spike_time]
|
| 151 |
+
|
| 152 |
+
# Flag groups where the count exceeds the baseline.
|
| 153 |
+
flagged_groups = []
|
| 154 |
+
for grp in spike_row[group_col].unique():
|
| 155 |
+
group_count = spike_row[spike_row[group_col] == grp]["CNT"].sum()
|
| 156 |
+
if group_count > baseline:
|
| 157 |
+
flagged_groups.append(grp)
|
| 158 |
+
|
| 159 |
+
# Create the chart once.
|
| 160 |
+
fig = px.line(
|
| 161 |
+
agg_df,
|
| 162 |
+
x="5min",
|
| 163 |
+
y="CNT",
|
| 164 |
+
color=group_col,
|
| 165 |
+
title=config.get(
|
| 166 |
+
"chart_title",
|
| 167 |
+
f"{key.capitalize()} House Ads Count by 5-Minute Interval",
|
| 168 |
+
),
|
| 169 |
+
labels={"5min": "Time", "CNT": "House Ads Count", group_col: key},
|
| 170 |
+
)
|
| 171 |
+
fig.update_xaxes(tickformat="%I:%M %p")
|
| 172 |
+
|
| 173 |
+
if flagged_groups:
|
| 174 |
+
if len(flagged_groups) > spike_threshold:
|
| 175 |
+
msg = f"{key.capitalize()}: House ad increase detected for multiple {key} groups starting around {spike_time.strftime('%I:%M %p')}."
|
| 176 |
+
st.warning(msg)
|
| 177 |
+
with st.expander(f"Show Raw {key.capitalize()} Data"):
|
| 178 |
+
st.dataframe(df)
|
| 179 |
+
with st.expander("Show Chart"):
|
| 180 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 181 |
+
else:
|
| 182 |
+
msg = f"{key.capitalize()}: House ad increase detected for {', '.join(flagged_groups)} starting around {spike_time.strftime('%I:%M %p')}."
|
| 183 |
+
st.success(msg)
|
| 184 |
+
with st.expander(f"Show Raw {key.capitalize()} Data"):
|
| 185 |
+
st.dataframe(df)
|
| 186 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 187 |
+
st.session_state.setdefault("findings_messages", []).append(msg)
|
| 188 |
+
else:
|
| 189 |
+
msg = f"{key.capitalize()}: No significant {key} spikes detected."
|
| 190 |
+
st.info(msg)
|
| 191 |
+
st.session_state.setdefault("findings_messages", []).append(msg)
|
| 192 |
+
with st.expander(f"Show Raw {key.capitalize()} Data"):
|
| 193 |
+
st.dataframe(df)
|
| 194 |
+
with st.expander("Show Chart"):
|
| 195 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 196 |
+
|
| 197 |
+
if key == "flex bucket":
|
| 198 |
+
st.write("### Deployment Information")
|
| 199 |
+
flex_jira_info = "" # Initialize an empty variable.
|
| 200 |
+
try:
|
| 201 |
+
# Use the selected dashboard date to define the full day range.
|
| 202 |
+
start_date = st.session_state.get("start_date")
|
| 203 |
+
end_date = st.session_state.get("end_date")
|
| 204 |
+
eastern = st.session_state.get("eastern")
|
| 205 |
+
start_datetime = datetime.combine(start_date, datetime.min.time())
|
| 206 |
+
end_datetime = datetime.combine(end_date, datetime.max.time())
|
| 207 |
+
start_str = start_datetime.astimezone(pytz.utc).strftime(
|
| 208 |
+
"%Y-%m-%d %H:%M"
|
| 209 |
+
)
|
| 210 |
+
end_str = end_datetime.astimezone(pytz.utc).strftime("%Y-%m-%d %H:%M")
|
| 211 |
+
st.info("Fetching deployment information from Jira...")
|
| 212 |
+
|
| 213 |
+
# Build a JQL query for the selected date range.
|
| 214 |
+
dashboard_start_str = (
|
| 215 |
+
f"{start_date.month}/{start_date.day}/{start_date.strftime('%y')}"
|
| 216 |
+
)
|
| 217 |
+
dashboard_end_str = (
|
| 218 |
+
f"{end_date.month}/{end_date.day}/{end_date.strftime('%y')}"
|
| 219 |
+
)
|
| 220 |
+
jql = (
|
| 221 |
+
f'comment ~ "Deployed At: {dashboard_start_str}" '
|
| 222 |
+
f'OR comment ~ "Deployed At: {dashboard_end_str}" '
|
| 223 |
+
f'AND comment ~ "Bucket:" '
|
| 224 |
+
f'AND comment ~ "Traffic:" '
|
| 225 |
+
f'AND comment ~ "Branch:"'
|
| 226 |
+
)
|
| 227 |
+
|
| 228 |
+
# --- Pagination: Retrieve all matching issues ---
|
| 229 |
+
startAt = 0
|
| 230 |
+
limit = 50
|
| 231 |
+
deployments_list = []
|
| 232 |
+
|
| 233 |
+
while True:
|
| 234 |
+
response_page = jira_client.jql(
|
| 235 |
+
jql,
|
| 236 |
+
fields="key,summary,updated,comment",
|
| 237 |
+
start=startAt,
|
| 238 |
+
limit=limit,
|
| 239 |
+
)
|
| 240 |
+
issues = response_page.get("issues", [])
|
| 241 |
+
deployments_list.extend(issues)
|
| 242 |
+
if len(issues) < limit:
|
| 243 |
+
break
|
| 244 |
+
startAt += len(issues)
|
| 245 |
+
|
| 246 |
+
deployments = []
|
| 247 |
+
for issue in deployments_list:
|
| 248 |
+
key_val = issue["key"]
|
| 249 |
+
summary = issue["fields"]["summary"]
|
| 250 |
+
updated = issue["fields"]["updated"]
|
| 251 |
+
key_link = f'<a href="{JIRA_URL}/browse/{key_val}" target="_blank">{key_val}</a>'
|
| 252 |
+
|
| 253 |
+
try:
|
| 254 |
+
updated_dt = pd.to_datetime(updated, utc=True).astimezone(
|
| 255 |
+
eastern
|
| 256 |
+
)
|
| 257 |
+
except Exception:
|
| 258 |
+
updated_dt = None
|
| 259 |
+
|
| 260 |
+
comment_field = issue["fields"].get("comment", {})
|
| 261 |
+
comments = comment_field.get("comments", [])
|
| 262 |
+
|
| 263 |
+
deployment_found = False
|
| 264 |
+
deployment_comment = ""
|
| 265 |
+
if comments:
|
| 266 |
+
for comment in comments:
|
| 267 |
+
try:
|
| 268 |
+
comment_dt = pd.to_datetime(
|
| 269 |
+
comment["created"], utc=True
|
| 270 |
+
).astimezone(eastern)
|
| 271 |
+
except Exception:
|
| 272 |
+
continue
|
| 273 |
+
# Check if the comment was created on the selected date.
|
| 274 |
+
if start_date <= comment_dt.date() <= end_date:
|
| 275 |
+
body = comment["body"].strip()
|
| 276 |
+
if body.lower().startswith("deployed"):
|
| 277 |
+
deployment_found = True
|
| 278 |
+
deployment_comment = body
|
| 279 |
+
break
|
| 280 |
+
if deployment_found:
|
| 281 |
+
dep_at, bucket, traffic, branch = parse_deployment_info(
|
| 282 |
+
deployment_comment
|
| 283 |
+
)
|
| 284 |
+
if not dep_at and deployment_comment.lower().startswith(
|
| 285 |
+
"deployed to prod"
|
| 286 |
+
):
|
| 287 |
+
timestamp_text = re.sub(
|
| 288 |
+
r"(?i)^deployed\s+to\s+prod\s*(at\s*)?",
|
| 289 |
+
"",
|
| 290 |
+
deployment_comment,
|
| 291 |
+
).strip()
|
| 292 |
+
if "." in timestamp_text:
|
| 293 |
+
timestamp_text = timestamp_text.split(".")[0].strip()
|
| 294 |
+
dep_at = timestamp_text
|
| 295 |
+
bucket, traffic, branch = "", "", ""
|
| 296 |
+
if dep_at:
|
| 297 |
+
try:
|
| 298 |
+
deployed_dt = pd.to_datetime(
|
| 299 |
+
dep_at, format="%m/%d/%y, %I:%M %p", errors="coerce"
|
| 300 |
+
)
|
| 301 |
+
except Exception:
|
| 302 |
+
deployed_dt = None
|
| 303 |
+
if deployed_dt is not None and deployed_dt is not pd.NaT:
|
| 304 |
+
deployed_dt = eastern.localize(
|
| 305 |
+
deployed_dt.replace(tzinfo=None)
|
| 306 |
+
)
|
| 307 |
+
deployments.append(
|
| 308 |
+
{
|
| 309 |
+
"Deployed Date": deployed_dt.strftime(
|
| 310 |
+
"%m/%d/%y"
|
| 311 |
+
),
|
| 312 |
+
"Deployed Time": deployed_dt.strftime(
|
| 313 |
+
"%I:%M %p"
|
| 314 |
+
),
|
| 315 |
+
"Key": key_link,
|
| 316 |
+
"Summary": summary,
|
| 317 |
+
"Bucket": bucket if bucket else "production",
|
| 318 |
+
}
|
| 319 |
+
)
|
| 320 |
+
|
| 321 |
+
if deployments:
|
| 322 |
+
df_deployments = pd.DataFrame(deployments).reset_index(drop=True)
|
| 323 |
+
df_deployments["Deployed_dt"] = pd.to_datetime(
|
| 324 |
+
df_deployments["Deployed Date"]
|
| 325 |
+
+ " "
|
| 326 |
+
+ df_deployments["Deployed Time"],
|
| 327 |
+
format="%m/%d/%y %I:%M %p",
|
| 328 |
+
errors="coerce",
|
| 329 |
+
)
|
| 330 |
+
df_deployments.sort_values(
|
| 331 |
+
"Deployed_dt", ascending=False, inplace=True
|
| 332 |
+
)
|
| 333 |
+
df_deployments.drop("Deployed_dt", axis=1, inplace=True)
|
| 334 |
+
|
| 335 |
+
# Filter the DataFrame to only show flagged deployments.
|
| 336 |
+
df_flagged = df_deployments[
|
| 337 |
+
df_deployments["Bucket"].isin(flagged_groups)
|
| 338 |
+
]
|
| 339 |
+
|
| 340 |
+
if not df_flagged.empty:
|
| 341 |
+
# Build a string containing info for all flagged Jira tickets.
|
| 342 |
+
tickets_info_list = []
|
| 343 |
+
for _, row in df_flagged.iterrows():
|
| 344 |
+
tickets_info_list.append(
|
| 345 |
+
f"Jira Ticket: {row['Key']} - {row['Summary']}"
|
| 346 |
+
)
|
| 347 |
+
flex_jira_info = "\n".join(tickets_info_list)
|
| 348 |
+
|
| 349 |
+
# Reorder columns for display.
|
| 350 |
+
cols = [
|
| 351 |
+
"Deployed Date",
|
| 352 |
+
"Deployed Time",
|
| 353 |
+
"Key",
|
| 354 |
+
"Summary",
|
| 355 |
+
"Bucket",
|
| 356 |
+
]
|
| 357 |
+
df_flagged = df_flagged[cols]
|
| 358 |
+
styled_df = df_flagged.style.hide(axis="index")
|
| 359 |
+
st.markdown(
|
| 360 |
+
styled_df.to_html(escape=False), unsafe_allow_html=True
|
| 361 |
+
)
|
| 362 |
+
else:
|
| 363 |
+
st.info(
|
| 364 |
+
"No flagged deployments found for the selected criteria."
|
| 365 |
+
)
|
| 366 |
+
else:
|
| 367 |
+
st.info("No deployments found for the selected criteria.")
|
| 368 |
+
except Exception as e:
|
| 369 |
+
st.error(f"Error fetching deployments: {e}")
|
| 370 |
+
|
| 371 |
+
# Save the Jira info (if any) to session state.
|
| 372 |
+
st.session_state["flex_jira_info"] = flex_jira_info
|
| 373 |
+
st.markdown("<hr style='border: 3px solid gray;'>", unsafe_allow_html=True)
|
index.html
DELETED
|
@@ -1,19 +0,0 @@
|
|
| 1 |
-
<!doctype html>
|
| 2 |
-
<html>
|
| 3 |
-
<head>
|
| 4 |
-
<meta charset="utf-8" />
|
| 5 |
-
<meta name="viewport" content="width=device-width" />
|
| 6 |
-
<title>My static Space</title>
|
| 7 |
-
<link rel="stylesheet" href="style.css" />
|
| 8 |
-
</head>
|
| 9 |
-
<body>
|
| 10 |
-
<div class="card">
|
| 11 |
-
<h1>Welcome to your static Space!</h1>
|
| 12 |
-
<p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
|
| 13 |
-
<p>
|
| 14 |
-
Also don't forget to check the
|
| 15 |
-
<a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
|
| 16 |
-
</p>
|
| 17 |
-
</div>
|
| 18 |
-
</body>
|
| 19 |
-
</html>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit
|
| 2 |
+
snowflake-connector-python
|
| 3 |
+
cryptography
|
| 4 |
+
pandas
|
| 5 |
+
plotly
|
| 6 |
+
atlassian-python-api
|
| 7 |
+
pytz
|
| 8 |
+
openai
|
style.css
DELETED
|
@@ -1,28 +0,0 @@
|
|
| 1 |
-
body {
|
| 2 |
-
padding: 2rem;
|
| 3 |
-
font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif;
|
| 4 |
-
}
|
| 5 |
-
|
| 6 |
-
h1 {
|
| 7 |
-
font-size: 16px;
|
| 8 |
-
margin-top: 0;
|
| 9 |
-
}
|
| 10 |
-
|
| 11 |
-
p {
|
| 12 |
-
color: rgb(107, 114, 128);
|
| 13 |
-
font-size: 15px;
|
| 14 |
-
margin-bottom: 10px;
|
| 15 |
-
margin-top: 5px;
|
| 16 |
-
}
|
| 17 |
-
|
| 18 |
-
.card {
|
| 19 |
-
max-width: 620px;
|
| 20 |
-
margin: 0 auto;
|
| 21 |
-
padding: 16px;
|
| 22 |
-
border: 1px solid lightgray;
|
| 23 |
-
border-radius: 16px;
|
| 24 |
-
}
|
| 25 |
-
|
| 26 |
-
.card p:last-child {
|
| 27 |
-
margin-bottom: 0;
|
| 28 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|