github-actions[bot] commited on
Commit
08c9602
·
1 Parent(s): 0fc4a33

sync: automatic content update from github

Browse files
README.md CHANGED
@@ -1,10 +1,13 @@
1
  ---
2
  title: Red Alert Investigations
3
- emoji: 💻
4
- colorFrom: purple
5
- colorTo: purple
6
- sdk: static
 
 
7
  pinned: false
 
8
  ---
9
 
10
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
  title: Red Alert Investigations
3
+ emoji: 📈
4
+ colorFrom: gray
5
+ colorTo: blue
6
+ sdk: streamlit
7
+ sdk_version: 1.43.1
8
+ app_file: app.py
9
  pinned: false
10
+ short_description: Automate Red Alert Investigations
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,347 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import pandas as pd
4
+ import pytz
5
+ import base64
6
+ import altair as alt
7
+ from datetime import datetime, date, time, timedelta
8
+ from zoneinfo import ZoneInfo
9
+ import snowflake.connector
10
+ from cryptography.hazmat.primitives import serialization
11
+ from cryptography.hazmat.backends import default_backend
12
+
13
+ # --- Secrets and Key Handling ---
14
+ private_key_pem = os.getenv("SNOWFLAKE_PRIVATE_KEY").replace('\\n', "\n").encode()
15
+ private_key_obj = serialization.load_pem_private_key(
16
+ private_key_pem,
17
+ password=None,
18
+ backend=default_backend()
19
+ )
20
+ private_key_der = private_key_obj.private_bytes(
21
+ encoding=serialization.Encoding.DER,
22
+ format=serialization.PrivateFormat.PKCS8,
23
+ encryption_algorithm=serialization.NoEncryption()
24
+ )
25
+ private_key_b64 = base64.b64encode(private_key_der).decode('utf-8')
26
+
27
+ # Connection params
28
+ account_identifier = os.getenv("SNOWFLAKE_ACCOUNT_IDENTIFIER")
29
+ user = os.getenv("SNOWFLAKE_USER")
30
+ warehouse = os.getenv("SNOWFLAKE_WAREHOUSE")
31
+ database = os.getenv("SNOWFLAKE_DATABASE")
32
+ schema = os.getenv("SNOWFLAKE_SCHEMA")
33
+ role = os.getenv("SNOWFLAKE_ROLE")
34
+ table = os.getenv("SNOWFLAKE_TABLE")
35
+ message_filter = os.getenv("SNOWFLAKE_MESSAGE_FILTER")
36
+ campaign_id = os.getenv("SNOWFLAKE_CAMPAIGN_ID")
37
+
38
+ # Import query builders
39
+ from house_ad_main import run_house_ad_spike_query
40
+ from delivery_main import run_drop_query
41
+ from delivery_queries import (
42
+ get_main_query as get_main_delivery_query,
43
+ get_main_int_sov_query,
44
+ get_bidder_query as get_bidder_delivery_query,
45
+ get_flex_bucket_query,
46
+ get_device_query as get_device_delivery_query,
47
+ get_ad_unit_query as get_ad_unit_delivery_query,
48
+ get_refresh_query
49
+ )
50
+ from house_ad_queries import (
51
+ get_main_query as get_main_house_query,
52
+ get_flex_query as get_flex_house_query,
53
+ get_bidder_query as get_bidder_house_query,
54
+ get_deal_query,
55
+ get_ad_unit_query as get_ad_unit_house_query,
56
+ get_browser_query,
57
+ get_device_query as get_device_house_query,
58
+ get_random_integer_query,
59
+ get_hb_pb_query,
60
+ get_hb_size_query
61
+ )
62
+
63
+ # OpenAI (if required)
64
+ from openai import OpenAI
65
+ client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
66
+
67
+ # Session defaults
68
+ if "deep_values" not in st.session_state:
69
+ st.session_state["deep_values"] = {}
70
+
71
+ # Sidebar filters
72
+ st.sidebar.title("Red Alert Investigations Filters")
73
+ analysis_type = st.sidebar.radio(
74
+ "Analysis Type",
75
+ ["House_Ads","Display_Prebid","Display_OB","Display_AdX","Display_HBT_OB","Display_TAM",
76
+ "Video_Prebid","Video_OB","Video_AdX","Video_TAM"]
77
+ )
78
+
79
+ if analysis_type == "House_Ads":
80
+ ad_format_filter = integration_filter = None
81
+ else:
82
+ ad_format_filter, integration_filter = analysis_type.split("_",1)
83
+
84
+ # Time defaults
85
+ now_edt = datetime.now(ZoneInfo("America/New_York"))
86
+ default_start = now_edt - timedelta(hours=3)
87
+ default_end = now_edt
88
+ start_date = st.sidebar.date_input("Start Date", default_start.date())
89
+ start_hour = st.sidebar.selectbox("Start Hour", list(range(24)), index=default_start.hour)
90
+ end_date = st.sidebar.date_input("End Date", default_end.date())
91
+ end_hour = st.sidebar.selectbox("End Hour", list(range(24)), index=default_end.hour)
92
+ start_dt = datetime.combine(start_date, time(start_hour))
93
+ end_dt = datetime.combine(end_date, time(end_hour,59,59))
94
+ start_str = start_dt.strftime('%Y-%m-%d %H:%M:%S')
95
+ end_str = end_dt.strftime('%Y-%m-%d %H:%M:%S')
96
+
97
+ st.session_state["start_date"] = start_date
98
+ st.session_state["end_date"] = end_date
99
+ st.session_state["eastern"] = pytz.timezone("America/New_York")
100
+
101
+ # Data fetch helper
102
+ def fetch_df(sql: str) -> pd.DataFrame:
103
+ conn = snowflake.connector.connect(
104
+ account=account_identifier,
105
+ user=user,
106
+ private_key=private_key_b64,
107
+ warehouse=warehouse,
108
+ database=database,
109
+ schema=schema,
110
+ role=role,
111
+ )
112
+ return pd.read_sql(sql, conn)
113
+
114
+ # Tabs layout
115
+ tab_auto, tab_deep = st.tabs(["Auto-Analysis","Deep Dive"])
116
+
117
+ # Auto-Analysis Tab
118
+ with tab_auto:
119
+ st.title("Red Alert Investigations")
120
+ if analysis_type == "House_Ads":
121
+ st.header("House Ad Analysis")
122
+ if st.button("Run Analysis"):
123
+ st.session_state["query_run"] = False
124
+ run_house_ad_spike_query(
125
+ table, start_str, end_str,
126
+ message_filter, campaign_id,
127
+ private_key_b64, user,
128
+ account_identifier, warehouse,
129
+ database, schema, role,
130
+ client
131
+ )
132
+ else:
133
+ st.header(f"{ad_format_filter} {integration_filter} Analysis")
134
+ if st.button("Run Analysis"):
135
+ st.session_state["query_run"] = False
136
+ run_drop_query(
137
+ table, start_str, end_str,
138
+ message_filter, campaign_id,
139
+ private_key_b64, user,
140
+ account_identifier, warehouse,
141
+ database, schema, role,
142
+ client,
143
+ integration_filter, ad_format_filter
144
+ )
145
+
146
+ with tab_deep:
147
+ st.header("Deep Dive")
148
+
149
+ # 1) Select dimensions
150
+ if analysis_type == "House_Ads":
151
+ all_dims = [
152
+ "Flex Bucket","Bidder","Deal","Ad Unit","Browser",
153
+ "Device","Random Integer","HB Price Buckets","HB Size"
154
+ ]
155
+ else:
156
+ all_dims = [
157
+ "Integration SOV","Bidder","Flex Bucket",
158
+ "Device","Ad Unit Group","Refresh"
159
+ ]
160
+ to_plot = st.multiselect("1. Select dimensions", all_dims, key="dims")
161
+
162
+ # 2) Fetch unique values per dimension
163
+ if st.button("2. Fetch Values") and to_plot:
164
+ vals = {}
165
+ for dim in to_plot:
166
+ if dim == "Integration SOV" and analysis_type != "House_Ads":
167
+ dfv = fetch_df(get_main_int_sov_query(
168
+ table, start_str, end_str, message_filter,
169
+ campaign_id, ad_format_filter
170
+ ))
171
+ col = "Integration"
172
+ elif analysis_type == "House_Ads":
173
+ fn_map = {
174
+ "Flex Bucket": get_flex_house_query,
175
+ "Bidder": get_bidder_house_query,
176
+ "Deal": get_deal_query,
177
+ "Ad Unit": get_ad_unit_house_query,
178
+ "Browser": get_browser_query,
179
+ "Device": get_device_house_query,
180
+ "Random Integer": get_random_integer_query,
181
+ "HB Price Buckets":get_hb_pb_query,
182
+ "HB Size": get_hb_size_query,
183
+ }
184
+ dfv = fetch_df(fn_map[dim](
185
+ table, start_str, end_str, message_filter, campaign_id
186
+ ))
187
+ col = [c for c in dfv.columns
188
+ if c not in ("EST_DATE","EST_HOUR","EST_MINUTE","CNT")][0]
189
+ else:
190
+ fn_map = {
191
+ "Bidder": get_bidder_delivery_query,
192
+ "Flex Bucket": get_flex_bucket_query,
193
+ "Device": get_device_delivery_query,
194
+ "Ad Unit Group": get_ad_unit_delivery_query,
195
+ "Refresh": get_refresh_query,
196
+ }
197
+ dfv = fetch_df(fn_map[dim](
198
+ table, start_str, end_str, message_filter,
199
+ campaign_id, integration_filter, ad_format_filter
200
+ ))
201
+ col = [c for c in dfv.columns
202
+ if c not in ("EST_DATE","EST_HOUR","EST_MINUTE","CNT")][0]
203
+ vals[dim] = sorted(dfv[col].dropna().unique())
204
+ st.session_state["deep_values"] = vals
205
+
206
+ # 3) Select filters & run the combined query
207
+ if st.session_state.get("deep_values"):
208
+ filters = {}
209
+ for dim, options in st.session_state["deep_values"].items():
210
+ filters[dim] = st.multiselect(
211
+ f"Filter {dim}", options, default=options,
212
+ key=f"fv_{dim}"
213
+ )
214
+
215
+ if st.button("3. Run Deep Dive"):
216
+ # 3a) Build the base CTE
217
+ if analysis_type == "House_Ads":
218
+ base = get_main_house_query(
219
+ table, start_str, end_str, message_filter, campaign_id
220
+ )
221
+ snippet_map = {
222
+ "Flex Bucket": "bucket",
223
+ "Bidder": "body[0]:slotTargeting:hb_bidder[0]::varchar AS BIDDER",
224
+ "Deal": "body[0]:slotTargeting:hb_deal[0]::varchar AS HB_DEAL",
225
+ "Ad Unit": "split(body[0]['adUnitPath'],'/')[2]::varchar AS AD_UNIT",
226
+ "Browser": "CASE WHEN lower(useragent) LIKE '%edg%' THEN 'Edge' WHEN lower(useragent) LIKE '%chrome%' THEN 'Chrome' WHEN lower(useragent) LIKE '%firefox%' THEN 'Firefox' WHEN lower(useragent) LIKE '%safari%' THEN 'Safari' ELSE 'Other' END AS BROWSER",
227
+ "Device": "CASE WHEN useragent LIKE '%Windows%' OR useragent LIKE '%Macintosh%' THEN 'desktop' WHEN useragent LIKE '%Android%' OR useragent LIKE '%Mobi%' THEN 'phone' WHEN useragent LIKE '%iPad%' OR useragent LIKE '%Tablet%' THEN 'tablet' ELSE 'other' END AS DEVICE",
228
+ "Random Integer": "body[0]:siteTargeting:ri[0]::varchar AS RANDOM_INTEGER",
229
+ "HB Price Buckets": "body[0]:slotTargeting:hb_pb[0]::varchar AS HB_PB",
230
+ "HB Size": "body[0]:slotTargeting:hb_size[0]::varchar AS HB_SIZE",
231
+ }
232
+ else:
233
+ base = get_main_delivery_query(
234
+ table, start_str, end_str,
235
+ message_filter, campaign_id,
236
+ integration_filter, ad_format_filter
237
+ )
238
+ snippet_map = {
239
+ "Integration SOV":"INTEGRATION",
240
+ "Bidder": "body[0]:slotTargeting:hb_bidder[0]::varchar AS HB_BIDDER",
241
+ "Flex Bucket": "bucket",
242
+ "Device": "CASE WHEN useragent LIKE '%Windows%' OR useragent LIKE '%Macintosh%' THEN 'desktop' WHEN useragent LIKE '%Android%' OR useragent LIKE '%Mobi%' THEN 'phone' WHEN useragent LIKE '%iPad%' OR useragent LIKE '%Tablet%' THEN 'tablet' ELSE 'other' END AS DEVICE",
243
+ "Ad Unit Group": "CASE WHEN split(body[0]['adUnitPath'],'/')[2]::varchar LIKE '%Outstream%' THEN 'Sticky_Outstream' WHEN split(body[0]['adUnitPath'],'/')[2]::varchar LIKE '%Video%' THEN 'Video' ELSE 'Other' END AS AD_UNIT_GROUP",
244
+ "Refresh": "body[0]:slotTargeting:refresh[0]::varchar AS REFRESH",
245
+ }
246
+
247
+ # 3b) Inject all selected dimension snippets, matching both lowercase & uppercase
248
+ select_snippets = [snippet_map[dim] for dim in to_plot]
249
+ dynamic_cte = (
250
+ base
251
+ .replace(
252
+ "count(*) as CNT",
253
+ f"count(*) as CNT, {', '.join(select_snippets)}"
254
+ )
255
+ .replace(
256
+ "COUNT(*) AS CNT",
257
+ f"COUNT(*) AS CNT, {', '.join(select_snippets)}"
258
+ )
259
+ )
260
+
261
+ # 3c) Build WHERE clauses from the filters
262
+ where_clauses = []
263
+ for dim, vals in filters.items():
264
+ alias = snippet_map[dim].split(" AS ")[-1]
265
+ val_list = ", ".join(f"'{v}'" for v in vals)
266
+ where_clauses.append(f"{alias} IN ({val_list})")
267
+
268
+ final_sql = (
269
+ f"SELECT *\n"
270
+ f"FROM (\n{dynamic_cte}\n) sub\n"
271
+ f"WHERE {' AND '.join(where_clauses)}"
272
+ )
273
+
274
+ # 3d) Execute & display
275
+ df_final = fetch_df(final_sql)
276
+ for dim, snippet in snippet_map.items():
277
+ alias = snippet.split(" AS ")[-1] # e.g. "bucket", "BROWSER", etc.
278
+ # find the actual DataFrame column (which will be uppercase)
279
+ match = next((c for c in df_final.columns if c.upper() == alias.upper()), None)
280
+ if match:
281
+ df_final.rename(columns={match: dim}, inplace=True)
282
+
283
+ # Build the minute‐precision datetime index
284
+ df_final["EST_DATETIME"] = (
285
+ pd.to_datetime(df_final["EST_DATE"]) +
286
+ pd.to_timedelta(df_final["EST_HOUR"], unit="h") +
287
+ pd.to_timedelta(df_final["EST_MINUTE"], unit="m")
288
+ )
289
+
290
+ st.subheader("Deep Dive Results")
291
+ st.dataframe(df_final)
292
+
293
+ # Build the Series column off your filtered dims
294
+ df_final["Series"] = (
295
+ df_final[list(filters.keys())]
296
+ .astype(str)
297
+ .agg(":".join, axis=1)
298
+ )
299
+
300
+ # Pivot on EST_DATETIME instead of EST_DATE
301
+ pivot = (
302
+ df_final
303
+ .pivot_table(
304
+ index="EST_DATETIME", # ← minute‐level axis
305
+ columns="Series",
306
+ values="CNT",
307
+ aggfunc="sum"
308
+ )
309
+ .fillna(0)
310
+ .sort_index()
311
+ )
312
+
313
+ pivot.columns = [col.replace(":", "_") for col in pivot.columns]
314
+
315
+ pivot_df = (
316
+ pivot
317
+ .reset_index()
318
+ .melt(id_vars="EST_DATETIME", var_name="Series", value_name="CNT")
319
+ )
320
+
321
+ # Build an Altair line chart:
322
+ chart = (
323
+ alt.Chart(pivot_df)
324
+ .mark_line(point=True)
325
+ .encode(
326
+ x=alt.X(
327
+ "EST_DATETIME:T",
328
+ axis=alt.Axis(
329
+ title="Time (NY)",
330
+ format="%H:%M", # show hour:minute on the axis
331
+ tickCount="hour" # one tick per hour
332
+ )
333
+ ),
334
+ y=alt.Y("CNT:Q", title="Count"),
335
+ color=alt.Color("Series:N", title="Dimension"),
336
+ tooltip=[
337
+ alt.Tooltip("EST_DATETIME:T", title="Timestamp", format="%Y-%m-%d %H:%M"),
338
+ alt.Tooltip("Series:N", title="Series"),
339
+ alt.Tooltip("CNT:Q", title="Count"),
340
+ ]
341
+ )
342
+ .properties(width=700, height=400)
343
+ .interactive() # allow pan/zoom
344
+ )
345
+
346
+ st.subheader("Deep Dive Trend")
347
+ st.altair_chart(chart, use_container_width=True)
changelog.md ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # Changelog
2
+
3
+ - 2025-08-07 19:58 UTC: Cast LineItem IDs to VARCHAR in delivery queries to avoid numeric conversion errors.
4
+ - 2025-08-07 17:28 UTC: Quote table identifiers in queries to support hyphenated table names.
5
+ - 2025-08-07 14:28 UTC: Initialized changelog to track project updates.
delivery_instructions.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # instructions.py
2
+
3
+ NEXT_STEPS_INSTRUCTIONS = """
4
+ Flex Bucket:
5
+ If a single flex bucket is flagged as having a delivery drop, that bucket is the most likely source of the issue.
6
+ Check whether there was a recent deployment impacting that bucket—refer to the deployment time and bucket name in the flex_section message.
7
+ Send the flagged flex bucket details, along with deployment context, to the Ad Code team for investigation.
8
+ Include a hyperlink to the related Jira ticket.
9
+ If multiple buckets are flagged, the issue may be shared among them. If most or all are impacted, flex buckets may not be the root cause.
10
+
11
+ Bidder:
12
+ If a single hb_bidder is flagged as having a delivery drop, it is likely the source of the issue.
13
+ Check for any recent changes in GAM related to this bidder—this includes targeting changes, blocking rules, or budget issues.
14
+ Send the flagged bidder information to the Rev Ops team for deeper investigation.
15
+ The Ad Ops and Ad Code teams should also verify if there were any recent changes in GAM setup or ad code logic affecting bidder behavior.
16
+ If most or all bidders are flagged, it’s likely the drop is not specific to a single bidder.
17
+
18
+ Device:
19
+ If a single device type is flagged (e.g., desktop, phone, tablet), the issue is likely related to that device category.
20
+ Investigate whether there were recent front-end or ad code changes that could be suppressing impressions on that device type.
21
+ The Ad Code team should verify targeting and rendering conditions. The Ad Ops team should check for any targeting changes in GAM.
22
+ If multiple or all device types are flagged, the issue may lie upstream, not within device-specific rendering or targeting logic.
23
+
24
+ Ad Unit:
25
+ If a single ad unit group (e.g., Sidebar, Content, Footer) is flagged, investigate whether recent changes affected the structure or availability of that unit.
26
+ Escalate the findings to the Ad Code team. The Ad Ops team should check for any targeting changes in GAM.
27
+ If most or all ad unit groups are flagged, the issue is less likely to be specific to a single ad unit and may be campaign- or integration-related.
28
+
29
+ Refresh:
30
+ If a single refresh value (e.g., 1, 2, 3...) is flagged, it may indicate a technical issue affecting impressions during specific refresh cycles.
31
+ Investigate whether recent ad code changes modified refresh logic or behavior.
32
+ Coordinate with the Ad Code team to confirm if affected refresh values correspond with known logic updates.
33
+ The Ad Ops team should check for any targeting changes in GAM.
34
+ If most or all refresh values are flagged, the issue likely lies outside of refresh logic, possibly within broader rendering or integration pipelines.
35
+ """
delivery_main.py ADDED
@@ -0,0 +1,420 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import time
3
+ import pandas as pd
4
+ import plotly.express as px
5
+ import snowflake.connector
6
+ import base64
7
+ from datetime import timedelta, datetime
8
+ from cryptography.hazmat.primitives import serialization
9
+ from cryptography.hazmat.backends import default_backend
10
+ import concurrent.futures
11
+
12
+ # Import SQL query functions
13
+ from delivery_queries import (
14
+ get_main_query,
15
+ get_main_int_sov_query,
16
+ get_bidder_query,
17
+ get_flex_bucket_query,
18
+ get_device_query,
19
+ get_ad_unit_query,
20
+ get_refresh_query,
21
+ )
22
+ from delivery_section_utils import update_section_generic_drop
23
+
24
+ # Import the NEXT_STEPS_INSTRUCTIONS for delivery drops
25
+ from delivery_instructions import NEXT_STEPS_INSTRUCTIONS
26
+
27
+ # Initialize session state
28
+ st.session_state.setdefault("query_run", False)
29
+ st.session_state.setdefault("findings_messages", [])
30
+ st.session_state.setdefault("query_df", None)
31
+ st.session_state.setdefault("agg_df", None)
32
+ st.session_state.setdefault("top_level_drop_time", None)
33
+ st.session_state.setdefault("key_findings_output", None)
34
+
35
+
36
+ @st.cache_data(show_spinner=False)
37
+ def cached_run_query(
38
+ query,
39
+ private_key_b64: str,
40
+ user: str,
41
+ account_identifier: str,
42
+ warehouse: str,
43
+ database: str,
44
+ schema: str,
45
+ role: str,
46
+ ):
47
+ """Run a Snowflake query and return a DataFrame."""
48
+ der = base64.b64decode(private_key_b64)
49
+ conn = snowflake.connector.connect(
50
+ user=user,
51
+ account=account_identifier,
52
+ warehouse=warehouse,
53
+ database=database,
54
+ schema=schema,
55
+ role=role,
56
+ private_key=der,
57
+ )
58
+ cs = conn.cursor()
59
+ cs.execute("ALTER SESSION SET STATEMENT_TIMEOUT_IN_SECONDS = 1800")
60
+ cs.execute(query)
61
+ rows = cs.fetchall()
62
+ cols = [c[0] for c in cs.description]
63
+ df = pd.DataFrame(rows, columns=cols)
64
+ cs.close()
65
+ conn.close()
66
+ return df
67
+
68
+
69
+ def run_drop_query(
70
+ table,
71
+ start_datetime,
72
+ end_datetime,
73
+ message_filter,
74
+ campaign_id,
75
+ private_key_str,
76
+ user,
77
+ account_identifier,
78
+ warehouse,
79
+ database,
80
+ schema,
81
+ role,
82
+ client,
83
+ integration_filter=None,
84
+ ad_format_filter=None,
85
+ ):
86
+ """
87
+ Universal drop analysis for any Integration + Ad_Format.
88
+ """
89
+ # 1) Build SQL statements with filters
90
+ main_sql = get_main_query(
91
+ table,
92
+ start_datetime,
93
+ end_datetime,
94
+ message_filter,
95
+ campaign_id,
96
+ integration_filter,
97
+ ad_format_filter,
98
+ )
99
+ flex_sql = get_flex_bucket_query(
100
+ table,
101
+ start_datetime,
102
+ end_datetime,
103
+ message_filter,
104
+ campaign_id,
105
+ integration_filter,
106
+ ad_format_filter,
107
+ )
108
+ bidder_sql = get_bidder_query(
109
+ table,
110
+ start_datetime,
111
+ end_datetime,
112
+ message_filter,
113
+ campaign_id,
114
+ integration_filter,
115
+ ad_format_filter,
116
+ )
117
+ device_sql = get_device_query(
118
+ table,
119
+ start_datetime,
120
+ end_datetime,
121
+ message_filter,
122
+ campaign_id,
123
+ integration_filter,
124
+ ad_format_filter,
125
+ )
126
+ ad_unit_sql = get_ad_unit_query(
127
+ table,
128
+ start_datetime,
129
+ end_datetime,
130
+ message_filter,
131
+ campaign_id,
132
+ integration_filter,
133
+ ad_format_filter,
134
+ )
135
+ refresh_sql = get_refresh_query(
136
+ table,
137
+ start_datetime,
138
+ end_datetime,
139
+ message_filter,
140
+ campaign_id,
141
+ integration_filter,
142
+ ad_format_filter,
143
+ )
144
+
145
+ # 2) Run top-level query once
146
+ if not st.session_state["query_run"]:
147
+ try:
148
+ t0 = time.time()
149
+ with st.spinner("Running top-level impressions query..."):
150
+ df = cached_run_query(
151
+ main_sql,
152
+ private_key_str,
153
+ user,
154
+ account_identifier,
155
+ warehouse,
156
+ database,
157
+ schema,
158
+ role,
159
+ )
160
+ elapsed = time.time() - t0
161
+ mins, secs = divmod(elapsed, 60)
162
+ st.info(f"Query ran in {int(mins)}m {secs:.2f}s")
163
+
164
+ # Normalize timestamps
165
+ df.columns = [c.upper() for c in df.columns]
166
+ df = df.sort_values(["EST_HOUR", "EST_MINUTE"])
167
+ df["timestamp"] = pd.to_datetime(
168
+ df["EST_DATE"].astype(str)
169
+ + " "
170
+ + df["EST_HOUR"].astype(str).str.zfill(2)
171
+ + ":"
172
+ + df["EST_MINUTE"].astype(str).str.zfill(2)
173
+ )
174
+ df["5min"] = df["timestamp"].dt.floor("5T")
175
+ base_date = (
176
+ df[df["TIMEFRAME"] == "TODAY"]["5min"].iloc[0].normalize()
177
+ if not df[df["TIMEFRAME"] == "TODAY"].empty
178
+ else pd.Timestamp("today").normalize()
179
+ )
180
+ start_hour = int(st.session_state.get("start_hour", 23))
181
+
182
+ def norm(ts):
183
+ return ts + pd.Timedelta(hours=24) if ts.hour < start_hour else ts
184
+
185
+ df["normalized_time"] = (
186
+ base_date + (df["5min"] - df["5min"].dt.normalize())
187
+ ).apply(norm)
188
+
189
+ # Aggregate
190
+ agg_df = df.groupby(["TIMEFRAME", "normalized_time"], as_index=False)[
191
+ "CNT"
192
+ ].sum()
193
+
194
+ # Save to state
195
+ st.session_state.update(
196
+ query_df=df, agg_df=agg_df, query_run=True, top_level_drop_time=None
197
+ )
198
+ except Exception as e:
199
+ st.error(f"Main query error: {e}")
200
+ return
201
+
202
+ else:
203
+ df = st.session_state["query_df"]
204
+ agg_df = st.session_state["agg_df"]
205
+
206
+ # 3) Display top-level
207
+ st.header("Top-Level Impressions Data")
208
+ drop_time = None
209
+ for ts in sorted(agg_df["normalized_time"].unique()):
210
+ today_cnt = agg_df[
211
+ (agg_df["normalized_time"] == ts) & (agg_df["TIMEFRAME"] == "TODAY")
212
+ ]["CNT"]
213
+ other_cnt = agg_df[
214
+ (agg_df["normalized_time"] == ts) & (agg_df["TIMEFRAME"] != "TODAY")
215
+ ]["CNT"]
216
+ if (
217
+ not today_cnt.empty
218
+ and not other_cnt.empty
219
+ and today_cnt.values[0] <= 0.9 * other_cnt.mean()
220
+ ):
221
+ drop_time = ts
222
+ break
223
+
224
+ if drop_time:
225
+ msg = f"Top-Level: Delivery drop detected at {drop_time.strftime('%I:%M %p')}."
226
+ st.warning(msg)
227
+ else:
228
+ msg = "Top-Level: No significant delivery drop detected."
229
+ st.info(msg)
230
+
231
+ # Append message once
232
+ findings_messages = st.session_state.setdefault("findings_messages", [])
233
+ if msg not in findings_messages:
234
+ findings_messages.append(msg)
235
+ st.session_state["top_level_drop_time"] = drop_time
236
+
237
+ with st.expander("Raw Data"):
238
+ st.dataframe(df)
239
+ with st.expander("Aggregated Data"):
240
+ st.dataframe(agg_df)
241
+
242
+ fig = px.line(
243
+ agg_df,
244
+ x="normalized_time",
245
+ y="CNT",
246
+ color="TIMEFRAME",
247
+ labels={"normalized_time": "Time of Day", "CNT": "Impressions"},
248
+ )
249
+ fig.update_xaxes(tickformat="%I:%M %p")
250
+ st.plotly_chart(fig, use_container_width=True)
251
+
252
+ # 4) Share-of-Voice
253
+ st.markdown("<hr>", unsafe_allow_html=True)
254
+ st.header("Share of Voice Analysis")
255
+ sov_sql = get_main_int_sov_query(
256
+ table,
257
+ start_datetime,
258
+ end_datetime,
259
+ message_filter,
260
+ campaign_id,
261
+ ad_format_filter=ad_format_filter,
262
+ )
263
+ try:
264
+ with st.spinner("Running SOV query..."):
265
+ sov_df = cached_run_query(
266
+ sov_sql,
267
+ private_key_str,
268
+ user,
269
+ account_identifier,
270
+ warehouse,
271
+ database,
272
+ schema,
273
+ role,
274
+ )
275
+ # Normalize same as above
276
+ sov_df["timestamp"] = pd.to_datetime(
277
+ sov_df["EST_DATE"].astype(str)
278
+ + " "
279
+ + sov_df["EST_HOUR"].astype(str).str.zfill(2)
280
+ + ":"
281
+ + sov_df["EST_MINUTE"].astype(str).str.zfill(2)
282
+ )
283
+ sov_df["5min"] = sov_df["timestamp"].dt.floor("5T")
284
+ base = pd.Timestamp("today").normalize()
285
+ sov_df["normalized_time"] = (
286
+ base + (sov_df["5min"] - sov_df["5min"].dt.normalize())
287
+ ).apply(lambda ts: ts + pd.Timedelta(hours=24) if ts.hour < start_hour else ts)
288
+
289
+ # Group, exclude, percent, order
290
+ sov_grp = sov_df.groupby(["normalized_time", "INTEGRATION"], as_index=False)[
291
+ "CNT"
292
+ ].sum()
293
+ sov_grp = sov_grp[~sov_grp["INTEGRATION"].str.contains("Ignore|Affiliate|PG")]
294
+ sov_grp["share"] = sov_grp["CNT"] / sov_grp.groupby("normalized_time")[
295
+ "CNT"
296
+ ].transform("sum")
297
+ order = (
298
+ sov_grp.groupby("INTEGRATION")["share"]
299
+ .sum()
300
+ .sort_values(ascending=False)
301
+ .index.tolist()
302
+ )
303
+ fig2 = px.line(
304
+ sov_grp,
305
+ x="normalized_time",
306
+ y="share",
307
+ color="INTEGRATION",
308
+ category_orders={"INTEGRATION": order},
309
+ labels={"share": "Share of Total Impressions"},
310
+ )
311
+ fig2.update_xaxes(tickformat="%I:%M %p")
312
+ fig2.update_yaxes(tickformat=".2%")
313
+ st.plotly_chart(fig2, use_container_width=True)
314
+ except Exception as e:
315
+ st.error(f"SOV error: {e}")
316
+
317
+ # 5) Key Findings via OpenAI <-- CUT starts here
318
+ st.markdown("<hr>", unsafe_allow_html=True)
319
+ st.header("Key Findings and Next Steps")
320
+ key_findings_container = st.container()
321
+ with key_findings_container:
322
+ if st.session_state.get("key_findings_output"):
323
+ st.markdown(
324
+ st.session_state.get("key_findings_output"),
325
+ unsafe_allow_html=True,
326
+ )
327
+ else:
328
+ st.info(
329
+ "Key findings will appear here once additional queries have finished."
330
+ )
331
+
332
+ def generate_key_findings_callback():
333
+ findings = "\n".join(st.session_state.get("findings_messages", []))
334
+ flex_jira_info = st.session_state.get("flex_jira_info", "")
335
+ jira_section = (
336
+ f"\nJira Ticket Information from Flex Bucket section:\n{flex_jira_info}\n"
337
+ if flex_jira_info
338
+ else ""
339
+ )
340
+ prompt = (
341
+ "You are a helpful analyst investigating a drop in ad delivery. "
342
+ "A delivery drop detection dashboard has compiled a list of findings "
343
+ "showing potential drops across different dimensions. Below are the detailed findings "
344
+ "from the dashboard, along with any flagged Jira ticket information. "
345
+ "The NEXT_STEPS_INSTRUCTIONS file contains recommended next steps for each section "
346
+ "depending on the drop(s) flagged in the dashboard:\n\n"
347
+ f"Findings:\n{findings}\n"
348
+ f"{jira_section}\n"
349
+ "Next Steps Instructions:\n"
350
+ f"{NEXT_STEPS_INSTRUCTIONS}\n\n"
351
+ "Using the Findings, Jira section information, and Next Steps Instructions as helpful context, "
352
+ "create a concise summary that identifies the likely cause/causes of any detected delivery drops "
353
+ "and recommends actionable next steps. The summary should be a few sentences long followed by bullet points "
354
+ "with the main findings and recommended next steps. Please output the summary in Markdown format with each bullet "
355
+ "point on a new line, and indent sub-bullets properly. Ensure that each bullet point is on its own line. "
356
+ "There is no need to explicitly mention the Instructions file in the summary; just use it to inform your analysis."
357
+ )
358
+ st.session_state["key_findings"] = prompt
359
+ try:
360
+ response = client.responses.create(
361
+ model="o3-mini",
362
+ instructions="You are a helpful analyst who provides insights and recommends next steps.",
363
+ input=prompt,
364
+ )
365
+ st.session_state["key_findings_output"] = response.output_text.strip()
366
+ except Exception as e:
367
+ st.session_state["key_findings_output"] = f"Error calling OpenAI API: {e}"
368
+
369
+ # Once additional queries complete (below), automatically generate key findings:
370
+ generate_key_findings_callback()
371
+
372
+ # 6) Breakdown dimensions
373
+ st.markdown("<hr>", unsafe_allow_html=True)
374
+ st.header("Specific Dimensions Data")
375
+ st.info("Running breakdown queries...")
376
+ queries = {
377
+ "flex_bucket": flex_sql,
378
+ "bidder": bidder_sql,
379
+ "device": device_sql,
380
+ "ad_unit": ad_unit_sql,
381
+ "refresh": refresh_sql,
382
+ }
383
+ with st.spinner("Running additional queries..."):
384
+ with concurrent.futures.ThreadPoolExecutor() as ex:
385
+ futures = {
386
+ k: ex.submit(
387
+ cached_run_query,
388
+ q,
389
+ private_key_str,
390
+ user,
391
+ account_identifier,
392
+ warehouse,
393
+ database,
394
+ schema,
395
+ role,
396
+ )
397
+ for k, q in queries.items()
398
+ }
399
+ start_ts = {k: time.time() for k in queries}
400
+ conts = {k: st.container() for k in queries}
401
+ while futures:
402
+ done, _ = concurrent.futures.wait(
403
+ futures.values(),
404
+ timeout=0.5,
405
+ return_when=concurrent.futures.FIRST_COMPLETED,
406
+ )
407
+ for fut in done:
408
+ key = next(k for k, v in futures.items() if v is fut)
409
+ df_res = fut.result()
410
+ update_section_generic_drop(
411
+ key, df_res, start_ts, conts[key], drop_time
412
+ )
413
+ del futures[key]
414
+
415
+ # Update the key findings container with the new output.
416
+ with key_findings_container:
417
+ st.markdown(
418
+ st.session_state.get("key_findings_output", ""),
419
+ unsafe_allow_html=True,
420
+ )
delivery_queries.py ADDED
@@ -0,0 +1,466 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+
4
+ def _quote_identifier(identifier: str) -> str:
5
+ """Quote SQL identifiers that contain special characters."""
6
+
7
+ def quote_part(part: str) -> str:
8
+ if re.match(r"^[A-Za-z_][A-Za-z0-9_]*$", part):
9
+ return part
10
+ return f'"{part}"'
11
+
12
+ return ".".join(quote_part(p) for p in identifier.split("."))
13
+
14
+
15
+ def get_main_query(
16
+ table,
17
+ start_datetime,
18
+ end_datetime,
19
+ message_filter,
20
+ campaign_id,
21
+ integration_filter=None,
22
+ ad_format_filter=None,
23
+ ):
24
+ """Returns the main impression count query filtered by integration and ad format."""
25
+
26
+ table = _quote_identifier(table)
27
+
28
+ # Build optional filters to apply after the CTE union.
29
+ # Filtering on the calculated aliases (Integration/Ad_Format)
30
+ # in the CTE `WHERE` clause would force Snowflake to interpret
31
+ # those names as existing columns and attempt type coercion,
32
+ # which triggered errors like:
33
+ # Numeric value 'bciq1rts' is not recognized
34
+ # Instead we apply the filters on the outer SELECT where the
35
+ # aliases are available.
36
+ post_union_filter = ""
37
+ if integration_filter:
38
+ post_union_filter += f" AND Integration = '{integration_filter}'"
39
+ if ad_format_filter:
40
+ post_union_filter += f" AND Ad_Format = '{ad_format_filter}'"
41
+
42
+ return f"""
43
+ WITH today AS (
44
+ SELECT
45
+ to_date(convert_timezone('UTC','America/New_York',datetime)) AS EST_DATE,
46
+ extract(hour FROM convert_timezone('UTC','America/New_York',datetime)) AS EST_HOUR,
47
+ extract(minute FROM convert_timezone('UTC','America/New_York',datetime)) AS EST_MINUTE,
48
+ CASE
49
+ WHEN body[0]:yieldGroupIds[0]::varchar IN ('397722') THEN 'HBT_OB'
50
+ WHEN body[0]:campaignId::varchar = '2204701358' THEN 'House'
51
+ WHEN b.name LIKE '%Prebid%' THEN 'Prebid'
52
+ WHEN b.name LIKE '%TAM%' OR b.name LIKE '%Amazon%' THEN 'TAM'
53
+ WHEN b.name LIKE '%AdX%' THEN 'AdX'
54
+ WHEN len(body[0]:lineItemId::varchar) > 10 THEN 'AdX'
55
+ WHEN c.name LIKE '%TAM%' OR c.name LIKE '%Amazon%' THEN 'TAM'
56
+ WHEN c.name LIKE '%AdX%' THEN 'AdX'
57
+ WHEN c.name LIKE '%CS%' OR c.name LIKE '%S2S%' THEN 'Prebid'
58
+ WHEN b.name LIKE '39_%_%' THEN 'Direct'
59
+ WHEN b.name LIKE '38_%_%' THEN 'Direct'
60
+ WHEN b.name LIKE '8_%_%_%' AND b.name LIKE '%IX%' THEN 'Prebid'
61
+ WHEN b.name LIKE '8_%_%_%' THEN 'Ignore - House, Test, Pub Deal'
62
+ WHEN b.name LIKE '7_%_%_%' THEN 'PG'
63
+ WHEN b.name LIKE '5_%_%_%' THEN 'PG'
64
+ WHEN LEFT(b.name,1) = '4'
65
+ AND RIGHT(LEFT(b.name,2),1) BETWEEN '0' AND '9'
66
+ AND RIGHT(LEFT(b.name,3),1) BETWEEN '0' AND '9'
67
+ AND RIGHT(LEFT(b.name,4),1) BETWEEN '0' AND '9'
68
+ AND RIGHT(LEFT(b.name,5),1) = '_' THEN 'Affiliate'
69
+ WHEN b.name LIKE '0_%_%_%' THEN 'Ignore'
70
+ WHEN (body[0]:campaignId IS NULL
71
+ AND body[0]:slotTargeting:hb_bidder[0]::varchar IS NOT NULL)
72
+ THEN 'Prebid'
73
+ WHEN body[0]:companyIds IS NOT NULL THEN 'OB'
74
+ WHEN c.id IS NOT NULL THEN 'Prebid'
75
+ ELSE 'OB'
76
+ END AS Integration,
77
+ CASE
78
+ WHEN split(body[0]['adUnitPath'],'/')[2]::varchar LIKE '%Outstream%' THEN 'Display'
79
+ WHEN split(body[0]['adUnitPath'],'/')[2]::varchar LIKE '%Video%' THEN 'Video'
80
+ ELSE 'Display'
81
+ END AS Ad_Format,
82
+ COUNT(*) AS CNT,
83
+ 'Today' AS timeframe
84
+ FROM {table} a
85
+ LEFT JOIN ANALYTICS.GAM360.ORDERS b
86
+ ON a.body[0]:campaignId::VARCHAR = b.ID::VARCHAR
87
+ LEFT JOIN ANALYTICS.GAM360.LINEITEM c
88
+ ON c.id::VARCHAR = a.body[0]:lineItemId::VARCHAR
89
+ WHERE convert_timezone('UTC','America/New_York',datetime)
90
+ BETWEEN '{start_datetime}' AND '{end_datetime}'
91
+ AND message = 'SlotRenderEnded::adImpression'
92
+ GROUP BY ALL
93
+ ),
94
+ prev1 AS (
95
+ -- 1 Week Ago
96
+ SELECT
97
+ to_date(convert_timezone('UTC','America/New_York',datetime)) AS EST_DATE,
98
+ extract(hour FROM convert_timezone('UTC','America/New_York',datetime)) AS EST_HOUR,
99
+ extract(minute FROM convert_timezone('UTC','America/New_York',datetime)) AS EST_MINUTE,
100
+ CASE
101
+ WHEN body[0]:yieldGroupIds[0]::varchar IN ('397722') THEN 'HBT_OB'
102
+ WHEN body[0]:campaignId::varchar = '2204701358' THEN 'House'
103
+ WHEN b.name LIKE '%Prebid%' THEN 'Prebid'
104
+ WHEN b.name LIKE '%TAM%' OR b.name LIKE '%Amazon%' THEN 'TAM'
105
+ WHEN b.name LIKE '%AdX%' THEN 'AdX'
106
+ WHEN len(body[0]:lineItemId::varchar) > 10 THEN 'AdX'
107
+ WHEN c.name LIKE '%TAM%' OR c.name LIKE '%Amazon%' THEN 'TAM'
108
+ WHEN c.name LIKE '%AdX%' THEN 'AdX'
109
+ WHEN c.name LIKE '%CS%' OR c.name LIKE '%S2S%' THEN 'Prebid'
110
+ WHEN b.name LIKE '39_%_%' THEN 'Direct'
111
+ WHEN b.name LIKE '38_%_%' THEN 'Direct'
112
+ WHEN b.name LIKE '8_%_%_%' AND b.name LIKE '%IX%' THEN 'Prebid'
113
+ WHEN b.name LIKE '8_%_%_%' THEN 'Ignore - House, Test, Pub Deal'
114
+ WHEN b.name LIKE '7_%_%_%' THEN 'PG'
115
+ WHEN b.name LIKE '5_%_%_%' THEN 'PG'
116
+ WHEN LEFT(b.name,1) = '4'
117
+ AND RIGHT(LEFT(b.name,2),1) BETWEEN '0' AND '9'
118
+ AND RIGHT(LEFT(b.name,3),1) BETWEEN '0' AND '9'
119
+ AND RIGHT(LEFT(b.name,4),1) BETWEEN '0' AND '9'
120
+ AND RIGHT(LEFT(b.name,5),1) = '_' THEN 'Affiliate'
121
+ WHEN b.name LIKE '0_%_%_%' THEN 'Ignore'
122
+ WHEN (body[0]:campaignId IS NULL
123
+ AND body[0]:slotTargeting:hb_bidder[0]::varchar IS NOT NULL)
124
+ THEN 'Prebid'
125
+ WHEN body[0]:companyIds IS NOT NULL THEN 'OB'
126
+ WHEN c.id IS NOT NULL THEN 'Prebid'
127
+ ELSE 'OB'
128
+ END AS Integration,
129
+ CASE
130
+ WHEN split(body[0]['adUnitPath'],'/')[2]::varchar LIKE '%Outstream%' THEN 'Display'
131
+ WHEN split(body[0]['adUnitPath'],'/')[2]::varchar LIKE '%Video%' THEN 'Video'
132
+ ELSE 'Display'
133
+ END AS Ad_Format,
134
+ COUNT(*) AS CNT,
135
+ '1 Week Ago' AS timeframe
136
+ FROM {table} a
137
+ LEFT JOIN ANALYTICS.GAM360.ORDERS b
138
+ ON a.body[0]:campaignId::VARCHAR = b.ID::VARCHAR
139
+ LEFT JOIN ANALYTICS.GAM360.LINEITEM c
140
+ ON c.id::VARCHAR = a.body[0]:lineItemId::VARCHAR
141
+ WHERE convert_timezone('UTC','America/New_York',datetime)
142
+ BETWEEN dateadd(DAY,-7,'{start_datetime}') AND dateadd(DAY,-7,'{end_datetime}')
143
+ AND message = 'SlotRenderEnded::adImpression'
144
+ GROUP BY ALL
145
+ ),
146
+ prev2 AS (
147
+ -- 2 Weeks Ago
148
+ SELECT
149
+ to_date(convert_timezone('UTC','America/New_York',datetime)) AS EST_DATE,
150
+ extract(hour FROM convert_timezone('UTC','America/New_York',datetime)) AS EST_HOUR,
151
+ extract(minute FROM convert_timezone('UTC','America/New_York',datetime)) AS EST_MINUTE,
152
+ CASE
153
+ WHEN body[0]:yieldGroupIds[0]::varchar IN ('397722') THEN 'HBT_OB'
154
+ WHEN body[0]:campaignId::varchar = '2204701358' THEN 'House'
155
+ WHEN b.name LIKE '%Prebid%' THEN 'Prebid'
156
+ WHEN b.name LIKE '%TAM%' OR b.name LIKE '%Amazon%' THEN 'TAM'
157
+ WHEN b.name LIKE '%AdX%' THEN 'AdX'
158
+ WHEN len(body[0]:lineItemId::varchar) > 10 THEN 'AdX'
159
+ WHEN c.name LIKE '%TAM%' OR c.name LIKE '%Amazon%' THEN 'TAM'
160
+ WHEN c.name LIKE '%AdX%' THEN 'AdX'
161
+ WHEN c.name LIKE '%CS%' OR c.name LIKE '%S2S%' THEN 'Prebid'
162
+ WHEN b.name LIKE '39_%_%' THEN 'Direct'
163
+ WHEN b.name LIKE '38_%_%' THEN 'Direct'
164
+ WHEN b.name LIKE '8_%_%_%' AND b.name LIKE '%IX%' THEN 'Prebid'
165
+ WHEN b.name LIKE '8_%_%_%' THEN 'Ignore - House, Test, Pub Deal'
166
+ WHEN b.name LIKE '7_%_%_%' THEN 'PG'
167
+ WHEN b.name LIKE '5_%_%_%' THEN 'PG'
168
+ WHEN LEFT(b.name,1) = '4'
169
+ AND RIGHT(LEFT(b.name,2),1) BETWEEN '0' AND '9'
170
+ AND RIGHT(LEFT(b.name,3),1) BETWEEN '0' AND '9'
171
+ AND RIGHT(LEFT(b.name,4),1) BETWEEN '0' AND '9'
172
+ AND RIGHT(LEFT(b.name,5),1) = '_' THEN 'Affiliate'
173
+ WHEN b.name LIKE '0_%_%_%' THEN 'Ignore'
174
+ WHEN (body[0]:campaignId IS NULL
175
+ AND body[0]:slotTargeting:hb_bidder[0]::varchar IS NOT NULL)
176
+ THEN 'Prebid'
177
+ WHEN body[0]:companyIds IS NOT NULL THEN 'OB'
178
+ WHEN c.id IS NOT NULL THEN 'Prebid'
179
+ ELSE 'OB'
180
+ END AS Integration,
181
+ CASE
182
+ WHEN split(body[0]['adUnitPath'],'/')[2]::varchar LIKE '%Outstream%' THEN 'Display'
183
+ WHEN split(body[0]['adUnitPath'],'/')[2]::varchar LIKE '%Video%' THEN 'Video'
184
+ ELSE 'Display'
185
+ END AS Ad_Format,
186
+ COUNT(*) AS CNT,
187
+ '2 Weeks Ago' AS timeframe
188
+ FROM {table} a
189
+ LEFT JOIN ANALYTICS.GAM360.ORDERS b
190
+ ON a.body[0]:campaignId::VARCHAR = b.ID::VARCHAR
191
+ LEFT JOIN ANALYTICS.GAM360.LINEITEM c
192
+ ON c.id::VARCHAR = a.body[0]:lineItemId::VARCHAR
193
+ WHERE convert_timezone('UTC','America/New_York',datetime)
194
+ BETWEEN dateadd(DAY,-14,'{start_datetime}')
195
+ AND dateadd(DAY,-14,'{end_datetime}')
196
+ AND message = 'SlotRenderEnded::adImpression'
197
+ GROUP BY ALL
198
+ ),
199
+ prev3 AS (
200
+ -- 3 Weeks Ago
201
+ SELECT
202
+ to_date(convert_timezone('UTC','America/New_York',datetime)) AS EST_DATE,
203
+ extract(hour FROM convert_timezone('UTC','America/New_York',datetime)) AS EST_HOUR,
204
+ extract(minute FROM convert_timezone('UTC','America/New_York',datetime)) AS EST_MINUTE,
205
+ CASE
206
+ WHEN body[0]:yieldGroupIds[0]::varchar IN ('397722') THEN 'HBT_OB'
207
+ WHEN body[0]:campaignId::varchar = '2204701358' THEN 'House'
208
+ WHEN b.name LIKE '%Prebid%' THEN 'Prebid'
209
+ WHEN b.name LIKE '%TAM%' OR b.name LIKE '%Amazon%' THEN 'TAM'
210
+ WHEN b.name LIKE '%AdX%' THEN 'AdX'
211
+ WHEN len(body[0]:lineItemId::varchar) > 10 THEN 'AdX'
212
+ WHEN c.name LIKE '%TAM%' OR c.name LIKE '%Amazon%' THEN 'TAM'
213
+ WHEN c.name LIKE '%AdX%' THEN 'AdX'
214
+ WHEN c.name LIKE '%CS%' OR c.name LIKE '%S2S%' THEN 'Prebid'
215
+ WHEN b.name LIKE '39_%_%' THEN 'Direct'
216
+ WHEN b.name LIKE '38_%_%' THEN 'Direct'
217
+ WHEN b.name LIKE '8_%_%_%' AND b.name LIKE '%IX%' THEN 'Prebid'
218
+ WHEN b.name LIKE '8_%_%_%' THEN 'Ignore - House, Test, Pub Deal'
219
+ WHEN b.name LIKE '7_%_%_%' THEN 'PG'
220
+ WHEN b.name LIKE '5_%_%_%' THEN 'PG'
221
+ WHEN LEFT(b.name,1) = '4'
222
+ AND RIGHT(LEFT(b.name,2),1) BETWEEN '0' AND '9'
223
+ AND RIGHT(LEFT(b.name,3),1) BETWEEN '0' AND '9'
224
+ AND RIGHT(LEFT(b.name,4),1) BETWEEN '0' AND '9'
225
+ AND RIGHT(LEFT(b.name,5),1) = '_' THEN 'Affiliate'
226
+ WHEN b.name LIKE '0_%_%_%' THEN 'Ignore'
227
+ WHEN (body[0]:campaignId IS NULL
228
+ AND body[0]:slotTargeting:hb_bidder[0]::varchar IS NOT NULL)
229
+ THEN 'Prebid'
230
+ WHEN body[0]:companyIds IS NOT NULL THEN 'OB'
231
+ WHEN c.id IS NOT NULL THEN 'Prebid'
232
+ ELSE 'OB'
233
+ END AS Integration,
234
+ CASE
235
+ WHEN split(body[0]['adUnitPath'],'/')[2]::varchar LIKE '%Outstream%' THEN 'Display'
236
+ WHEN split(body[0]['adUnitPath'],'/')[2]::varchar LIKE '%Video%' THEN 'Video'
237
+ ELSE 'Display'
238
+ END AS Ad_Format,
239
+ COUNT(*) AS CNT,
240
+ '3 Weeks Ago' AS timeframe
241
+ FROM {table} a
242
+ LEFT JOIN ANALYTICS.GAM360.ORDERS b
243
+ ON a.body[0]:campaignId::VARCHAR = b.ID::VARCHAR
244
+ LEFT JOIN ANALYTICS.GAM360.LINEITEM c
245
+ ON c.id::VARCHAR = a.body[0]:lineItemId::VARCHAR
246
+ WHERE convert_timezone('UTC','America/New_York',datetime)
247
+ BETWEEN dateadd(DAY,-21,'{start_datetime}')
248
+ AND dateadd(DAY,-21,'{end_datetime}')
249
+ AND message = 'SlotRenderEnded::adImpression'
250
+ GROUP BY ALL
251
+ )
252
+ SELECT * FROM (
253
+ SELECT * FROM today
254
+ UNION ALL SELECT * FROM prev1
255
+ UNION ALL SELECT * FROM prev2
256
+ UNION ALL SELECT * FROM prev3
257
+ )
258
+ WHERE 1=1 {post_union_filter}
259
+ """
260
+
261
+
262
+ def get_bidder_query(
263
+ table,
264
+ start_datetime,
265
+ end_datetime,
266
+ message_filter,
267
+ campaign_id,
268
+ integration_filter=None,
269
+ ad_format_filter=None,
270
+ ):
271
+ base = get_main_query(
272
+ table,
273
+ start_datetime,
274
+ end_datetime,
275
+ message_filter,
276
+ campaign_id,
277
+ integration_filter,
278
+ ad_format_filter,
279
+ )
280
+ # inject hb_bidder field
281
+ return base.replace(
282
+ "COUNT(*) AS CNT",
283
+ "COUNT(*) AS CNT, body[0]:slotTargeting:hb_bidder[0]::varchar AS hb_bidder",
284
+ )
285
+
286
+
287
+ def get_flex_bucket_query(
288
+ table,
289
+ start_datetime,
290
+ end_datetime,
291
+ message_filter,
292
+ campaign_id,
293
+ integration_filter=None,
294
+ ad_format_filter=None,
295
+ ):
296
+ base = get_main_query(
297
+ table,
298
+ start_datetime,
299
+ end_datetime,
300
+ message_filter,
301
+ campaign_id,
302
+ integration_filter,
303
+ ad_format_filter,
304
+ )
305
+ return base.replace("COUNT(*) AS CNT", "COUNT(*) AS CNT, bucket")
306
+
307
+
308
+ def get_device_query(
309
+ table,
310
+ start_datetime,
311
+ end_datetime,
312
+ message_filter,
313
+ campaign_id,
314
+ integration_filter=None,
315
+ ad_format_filter=None,
316
+ ):
317
+ base = get_main_query(
318
+ table,
319
+ start_datetime,
320
+ end_datetime,
321
+ message_filter,
322
+ campaign_id,
323
+ integration_filter,
324
+ ad_format_filter,
325
+ )
326
+ # inject device case
327
+ device_case = (
328
+ "CASE "
329
+ "WHEN useragent LIKE '%iPad%' OR useragent LIKE '%Tablet%' THEN 'tablet' "
330
+ "WHEN useragent LIKE '%Windows%' OR useragent LIKE '%Macintosh%' THEN 'desktop' "
331
+ "WHEN useragent LIKE '%Android%' OR useragent LIKE '%iPhone%' OR useragent LIKE '%Mobi%' THEN 'phone' "
332
+ "ELSE 'other' END AS device"
333
+ )
334
+ return base.replace("COUNT(*) AS CNT", f"COUNT(*) AS CNT, {device_case}")
335
+
336
+
337
+ def get_ad_unit_query(
338
+ table,
339
+ start_datetime,
340
+ end_datetime,
341
+ message_filter,
342
+ campaign_id,
343
+ integration_filter=None,
344
+ ad_format_filter=None,
345
+ ):
346
+ base = get_main_query(
347
+ table,
348
+ start_datetime,
349
+ end_datetime,
350
+ message_filter,
351
+ campaign_id,
352
+ integration_filter,
353
+ ad_format_filter,
354
+ )
355
+ ad_unit_case = (
356
+ "CASE "
357
+ "WHEN body[0]:slotElementId::varchar LIKE '%Content%' THEN 'Content' "
358
+ "WHEN body[0]:slotElementId::varchar LIKE '%Footer%' THEN 'Footer' "
359
+ "WHEN body[0]:slotElementId::varchar LIKE '%Recipe%' THEN 'Recipe' "
360
+ "WHEN body[0]:slotElementId::varchar LIKE '%Sidebar%' THEN 'Sidebar' "
361
+ "WHEN body[0]:slotElementId::varchar LIKE '%Header%' THEN 'Header' "
362
+ "WHEN body[0]:slotElementId::varchar LIKE '%Below_Post%' THEN 'Below_Post' "
363
+ "WHEN body[0]:slotElementId::varchar LIKE '%Outstream%' THEN 'Sticky Outstream' "
364
+ "WHEN body[0]:slotElementId::varchar LIKE '%Video%' THEN 'Video' "
365
+ "ELSE 'Other' END AS ad_unit_group"
366
+ )
367
+ return base.replace("COUNT(*) AS CNT", f"COUNT(*) AS CNT, {ad_unit_case}")
368
+
369
+
370
+ def get_refresh_query(
371
+ table,
372
+ start_datetime,
373
+ end_datetime,
374
+ message_filter,
375
+ campaign_id,
376
+ integration_filter=None,
377
+ ad_format_filter=None,
378
+ ):
379
+ base = get_main_query(
380
+ table,
381
+ start_datetime,
382
+ end_datetime,
383
+ message_filter,
384
+ campaign_id,
385
+ integration_filter,
386
+ ad_format_filter,
387
+ )
388
+ refresh_field = "body[0]:slotTargeting:refresh[0]::varchar AS Refresh"
389
+ return base.replace("COUNT(*) AS CNT", f"COUNT(*) AS CNT, {refresh_field}")
390
+
391
+
392
+ def get_main_int_sov_query(
393
+ table,
394
+ start_datetime,
395
+ end_datetime,
396
+ message_filter,
397
+ campaign_id,
398
+ # integration_filter no longer used for SOV
399
+ ad_format_filter=None,
400
+ ):
401
+ """
402
+ Returns the share-of-voice query filtered only by ad format.
403
+ """
404
+ table = _quote_identifier(table)
405
+
406
+ # Only apply Ad_Format filtering after the CTE so that the alias
407
+ # can be referenced safely.
408
+ post_union_filter = ""
409
+ if ad_format_filter:
410
+ post_union_filter = f" AND Ad_Format = '{ad_format_filter}'"
411
+
412
+ return f"""
413
+ WITH today AS (
414
+ SELECT
415
+ to_date(convert_timezone('UTC','America/New_York',datetime)) AS EST_DATE,
416
+ extract(hour FROM convert_timezone('UTC','America/New_York',datetime)) AS EST_HOUR,
417
+ extract(minute FROM convert_timezone('UTC','America/New_York',datetime)) AS EST_MINUTE,
418
+ CASE
419
+ WHEN body[0]:yieldGroupIds[0]::varchar IN ('397722') THEN 'HBT_OB'
420
+ WHEN body[0]:campaignId::varchar = '2204701358' THEN 'House'
421
+ WHEN b.name LIKE '%Prebid%' THEN 'Prebid'
422
+ WHEN b.name LIKE '%TAM%' OR b.name LIKE '%Amazon%' THEN 'TAM'
423
+ WHEN b.name LIKE '%AdX%' THEN 'AdX'
424
+ WHEN len(body[0]:lineItemId::varchar) > 10 THEN 'AdX'
425
+ WHEN c.name LIKE '%TAM%' OR c.name LIKE '%Amazon%' THEN 'TAM'
426
+ WHEN c.name LIKE '%AdX%' THEN 'AdX'
427
+ WHEN c.name LIKE '%CS%' OR c.name LIKE '%S2S%' THEN 'Prebid'
428
+ WHEN b.name LIKE '39_%_%' THEN 'Direct'
429
+ WHEN b.name LIKE '38_%_%' THEN 'Direct'
430
+ WHEN b.name LIKE '8_%_%_%' AND b.name LIKE '%IX%' THEN 'Prebid'
431
+ WHEN b.name LIKE '8_%_%_%' THEN 'Ignore - House, Test, Pub Deal'
432
+ WHEN b.name LIKE '7_%_%_%' THEN 'PG'
433
+ WHEN b.name LIKE '5_%_%_%' THEN 'PG'
434
+ WHEN LEFT(b.name,1) = '4'
435
+ AND RIGHT(LEFT(b.name,2),1) BETWEEN '0' AND '9'
436
+ AND RIGHT(LEFT(b.name,3),1) BETWEEN '0' AND '9'
437
+ AND RIGHT(LEFT(b.name,4),1) BETWEEN '0' AND '9'
438
+ AND RIGHT(LEFT(b.name,5),1) = '_' THEN 'Affiliate'
439
+ WHEN b.name LIKE '0_%_%_%' THEN 'Ignore'
440
+ WHEN (body[0]:campaignId IS NULL
441
+ AND body[0]:slotTargeting:hb_bidder[0]::varchar IS NOT NULL)
442
+ THEN 'Prebid'
443
+ WHEN body[0]:companyIds IS NOT NULL THEN 'OB'
444
+ WHEN c.id IS NOT NULL THEN 'Prebid'
445
+ ELSE 'OB'
446
+ END AS Integration,
447
+ CASE
448
+ WHEN split(body[0]['adUnitPath'],'/')[2]::varchar LIKE '%Outstream%' THEN 'Display'
449
+ WHEN split(body[0]['adUnitPath'],'/')[2]::varchar LIKE '%Video%' THEN 'Video'
450
+ ELSE 'Display'
451
+ END AS Ad_Format,
452
+ COUNT(*) AS CNT,
453
+ 'Today' AS timeframe
454
+ FROM {table} a
455
+ LEFT JOIN ANALYTICS.GAM360.ORDERS b
456
+ ON a.body[0]:campaignId::VARCHAR = b.ID::VARCHAR
457
+ LEFT JOIN ANALYTICS.GAM360.LINEITEM c
458
+ ON c.id::VARCHAR = a.body[0]:lineItemId::VARCHAR
459
+ WHERE convert_timezone('UTC','America/New_York',datetime)
460
+ BETWEEN '{start_datetime}' AND '{end_datetime}'
461
+ AND message = 'SlotRenderEnded::adImpression'
462
+ GROUP BY ALL
463
+ )
464
+ SELECT * FROM today
465
+ WHERE 1=1 {post_union_filter}
466
+ """
delivery_section_utils.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import pandas as pd
3
+ import plotly.express as px
4
+ import streamlit as st
5
+
6
+ # Map the section keys (space‐separated) to the DataFrame column to group by.
7
+ SECTION_CONFIG = {
8
+ "flex bucket": {
9
+ "group_col": "BUCKET",
10
+ "drop_percent": 0.10
11
+ },
12
+ "bidder": {
13
+ "group_col": "HB_BIDDER",
14
+ "drop_percent": 0.10
15
+ },
16
+ "device": {
17
+ "group_col": "DEVICE",
18
+ "drop_percent": 0.10
19
+ },
20
+ "ad unit": {
21
+ "group_col": "AD_UNIT_GROUP",
22
+ "drop_percent": 0.10
23
+ },
24
+ "refresh": {
25
+ "group_col": "REFRESH",
26
+ "drop_percent": 0.10
27
+ },
28
+ }
29
+
30
+ def update_section_generic_drop(key, df, start_times, container, drop_time):
31
+ """
32
+ A generic 5‑minute breakdown with drop detection.
33
+ 'key' can be 'flex_bucket' or 'flex bucket' (we normalize it).
34
+ """
35
+ elapsed = time.time() - start_times[key]
36
+ mins, secs = divmod(elapsed, 60)
37
+
38
+ # Standardize column names & build timestamp
39
+ df.columns = [c.upper() for c in df.columns]
40
+ df = df.sort_values(["EST_HOUR", "EST_MINUTE"])
41
+ df["timestamp"] = pd.to_datetime(
42
+ df["EST_DATE"].astype(str) + " " +
43
+ df["EST_HOUR"].astype(str).str.zfill(2) + ":" +
44
+ df["EST_MINUTE"].astype(str).str.zfill(2)
45
+ )
46
+ df["5MIN"] = df["timestamp"].dt.floor("5T")
47
+
48
+ # Normalize the lookup key to match SECTION_CONFIG
49
+ lookup = key.replace("_", " ").lower()
50
+ config = SECTION_CONFIG.get(lookup)
51
+ if not config:
52
+ st.error(f"No configuration for section '{key}'.")
53
+ return
54
+
55
+ group_col = config["group_col"]
56
+ drop_pct = config["drop_percent"]
57
+
58
+ with container:
59
+ st.subheader(f"{lookup.title()} Data")
60
+ st.info(f"Query completed in {int(mins)}m {secs:.2f}s")
61
+
62
+ # Filter to TODAY (uppercase)
63
+ today_data = df[df["TIMEFRAME"].str.upper() == "TODAY"]
64
+ if today_data.empty:
65
+ st.info("No TODAY data for this section.")
66
+ return
67
+
68
+ # Aggregate over 5‑min intervals & plot
69
+ agg_today = (
70
+ today_data
71
+ .groupby(["5MIN", group_col], as_index=False)["CNT"]
72
+ .sum()
73
+ )
74
+ title = f"{lookup.title()} Impressions by Time of Day (5‑min)"
75
+ fig = px.line(
76
+ agg_today,
77
+ x="5MIN",
78
+ y="CNT",
79
+ color=group_col,
80
+ title=title,
81
+ labels={"5MIN": "Time", "CNT": "Impressions", group_col: lookup.title()}
82
+ )
83
+ fig.update_xaxes(tickformat="%I:%M %p")
84
+ st.plotly_chart(fig, use_container_width=True)
85
+
86
+ # Drop detection at the flagged interval
87
+ drop_subset = agg_today[agg_today["5MIN"] == drop_time]
88
+ flagged = []
89
+ if not drop_subset.empty:
90
+ avg_cnt = drop_subset["CNT"].mean()
91
+ for grp, cnt in drop_subset.groupby(group_col)["CNT"].sum().items():
92
+ if cnt <= (1 - drop_pct) * avg_cnt:
93
+ flagged.append(grp)
94
+
95
+ drop_str = drop_time.strftime("%I:%M %p") if drop_time else "N/A"
96
+ if flagged:
97
+ st.warning(f"{lookup.title()}: At {drop_str}, these groups dropped: {', '.join(flagged)}.")
98
+ else:
99
+ st.info(f"{lookup.title()}: No significant drop at {drop_str}.")
100
+
101
+ # Optional raw TODAY data
102
+ with st.expander(f"Show Raw TODAY {lookup.title()} Data"):
103
+ st.dataframe(today_data)
.gitattributes → gitattributes RENAMED
File without changes
house_ad_instructions.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # instructions.py
2
+
3
+ NEXT_STEPS_INSTRUCTIONS = """
4
+ Flex:
5
+ If a single flex bucket is flagged as having a house ad spike, that flex bucket is the likley cause of the house ad spike.
6
+ Even if other sections are showing an increase in house ads, the single flex bucket is most likely the cause of the spike.
7
+ If other sections are showing increases in house ads, that information should be sent to the ad code team as additional reference for the flagged flex bucket.
8
+ The deployment information listed in the flex_section message, especially the deployment time and bucket, should be sent to the ad code team so that they can investigate the issue further.
9
+ Make sure to include a hyperlink to the Jira issue in your response.
10
+ If multiple flex buckets are flagged as having a house ad spike, the issue could be related to the flagged buckets.
11
+ If the majority of flex buckets are being flagged as having a house ad spike, the issue is likely not related to flex buckets.
12
+
13
+ hb_bidder:
14
+ If a single hb_bidder is flagged as having a house ad spike, the issue is likely caused by or related to that hb_bidder.
15
+ Even if other sections are showing an increase in house ads, the hb_bidder is most likely the cause of the spike.
16
+ The hb_bidder information should be sent to the Rev Ops team to investigate further.
17
+ The Ad Ops team should also investigate whether any GAM changes were recently made that could be impacting the hb_bidder.
18
+ The Ad Code team should also investigate if there were any recent ad code changes that could be impacting the hb_bidder.
19
+ If the majority of hb_bidder values are being flagged as having a house ad spike, the issue is likely not related to hb_bidder values.
20
+
21
+ hb_deal:
22
+ If a single hb_deal is flagged as having a house ad spike, the issue is likely caused by or related to that hb_deal.
23
+ Even if other sections are showing an increase in house ads, the hb_deal is most likely the cause of the spike.
24
+ The Ad Ops team should also investigate whether any GAM changes, especially changes to protections and/or UPRs, were recently made that could be impacting the hb_deal.
25
+ The hb_deal information should be sent to the Sales team to investigate further.
26
+ If the majority of hb_deal values are being flagged as having a house ad spike, the issue is likely not related to hb_deal values.
27
+
28
+ Ad Unit:
29
+ If a single ad unit is flagged as having a house ad spike, the issue is likely related to that ad unit.
30
+ The ad code team should also investigate if there were any recent ad code changes that could be impacting the ad unit.
31
+ If the majority of ad unit values are being flagged as having a house ad spike, the issue is likely not related to ad unit values.
32
+
33
+ Browser:
34
+ If a single browser is flagged as having a house ad spike, the issue is likely related to that browser.
35
+ The ad code team should investigate if there were any recent ad code changes that could be impacting the browser.
36
+ If the majority of browser values are being flagged as having a house ad spike, the issue is likely not related to browser values.
37
+
38
+ Device:
39
+ If a single device is flagged as having a house ad spike, the issue is likely related to that device.
40
+ The ad code team should investigate if there were any recent ad code changes that could be impacting the device.
41
+ If the majority of device values are being flagged as having a house ad spike, the issue is likely not related to device values.
42
+
43
+ Random Integer:
44
+ If a single random integer is flagged as having a house ad spike, the issue is likely caused by or related to that random integer.
45
+ If multiple random integer values are being flagged as having a house ad spike, the issue could be related to those random integer values.
46
+ The Ad Ops team should investigate whether any GAM changes were recently made that could be impacting the random integer value(s).
47
+ The ad code team should investigate if there were any recent ad code changes that could be impacting the random integer value(s).
48
+ If the majority of random integer values are being flagged as having a house ad spike, the issue is likely not related to random integer values.
49
+
50
+ hb_pb:
51
+ If a single hb_pb value is flagged as having a house ad spike, the issue is likely caused by or related to that hb_pb.
52
+ The Ad Ops team should also investigate whether any GAM changes were recently made that could be impacting the hb_pb.
53
+ The ad code team should investigate if there were any recent ad code changes that could be impacting the hb_pb.
54
+ If the majority of hb_pb values are being flagged as having a house ad spike, the issue is likely not related to hb_pb values.
55
+
56
+ hb_size:
57
+ If a single hb_size value is flagged as having a house ad spike, the issue is likely related to that hb_size.
58
+ The ad code team should investigate if there were any recent ad code changes that could be impacting the hb_size.
59
+ If the majority of hb_size values are being flagged as having a house ad spike, the issue is likely not related to hb_size values.
60
+ """
house_ad_main.py ADDED
@@ -0,0 +1,356 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import time
3
+ import pandas as pd
4
+ import plotly.express as px
5
+ import snowflake.connector
6
+ import base64
7
+ from datetime import timedelta, datetime
8
+ from cryptography.hazmat.primitives import serialization
9
+ from cryptography.hazmat.backends import default_backend
10
+ import concurrent.futures
11
+
12
+ # Import SQL query functions.
13
+ from house_ad_queries import (
14
+ get_main_query,
15
+ get_flex_query,
16
+ get_bidder_query,
17
+ get_deal_query,
18
+ get_ad_unit_query,
19
+ get_browser_query,
20
+ get_device_query,
21
+ get_random_integer_query,
22
+ get_hb_pb_query,
23
+ get_hb_size_query,
24
+ )
25
+
26
+ # Import the house ad section config.
27
+ from house_ad_section_utils import update_section_generic
28
+
29
+ # Import the NEXT_STEPS_INSTRUCTIONS at the top.
30
+ from house_ad_instructions import NEXT_STEPS_INSTRUCTIONS
31
+
32
+ # Initialize session state keys at the top so they only get set once.
33
+ st.session_state.setdefault("query_run", False)
34
+ st.session_state.setdefault("findings_messages", [])
35
+ st.session_state.setdefault("key_findings_output", None)
36
+ st.session_state.setdefault("query_df", None)
37
+ st.session_state.setdefault("agg_df", None)
38
+ st.session_state.setdefault("top_level_spike_time", None)
39
+
40
+ # --- Helper Functions ---
41
+
42
+ # def load_private_key(key_str):
43
+ # """Load a PEM-formatted private key."""
44
+ # return serialization.load_pem_private_key(
45
+ # key_str.encode("utf-8"),
46
+ # password=None,
47
+ # backend=default_backend()
48
+ # )
49
+
50
+
51
+ # Use caching to avoid re-running the same query on every interaction.
52
+ @st.cache_data(show_spinner=False)
53
+ def cached_run_query(
54
+ query,
55
+ private_key_b64: str,
56
+ user: str,
57
+ account_identifier: str,
58
+ warehouse: str,
59
+ database: str,
60
+ schema: str,
61
+ role: str,
62
+ ):
63
+ # 1) Decode the base64‐encoded DER key
64
+ der = base64.b64decode(private_key_b64)
65
+ """Connect to Snowflake and execute the given query. Cached to reduce re-runs."""
66
+ # private_key_obj = load_private_key(key_str=private_key_str)
67
+ conn = snowflake.connector.connect(
68
+ user=user,
69
+ account=account_identifier,
70
+ warehouse=warehouse,
71
+ database=database,
72
+ schema=schema,
73
+ role=role,
74
+ private_key=der,
75
+ )
76
+ cs = conn.cursor()
77
+ cs.execute("ALTER SESSION SET STATEMENT_TIMEOUT_IN_SECONDS = 1800")
78
+ cs.execute(query)
79
+ results = cs.fetchall()
80
+ columns = [col[0] for col in cs.description]
81
+ df = pd.DataFrame(results, columns=columns)
82
+ cs.close()
83
+ conn.close()
84
+ return df
85
+
86
+
87
+ # --- Main Function for House Ad Spike Analysis ---
88
+
89
+
90
+ def run_house_ad_spike_query(
91
+ table,
92
+ start_datetime,
93
+ end_datetime,
94
+ message_filter,
95
+ campaign_id,
96
+ private_key_str,
97
+ user,
98
+ account_identifier,
99
+ warehouse,
100
+ database,
101
+ schema,
102
+ role,
103
+ client,
104
+ ):
105
+ """
106
+ Run the house ad spike query along with additional dimensions,
107
+ generate key findings via OpenAI, and display the results.
108
+ """
109
+ # --- Generate SQL Queries ---
110
+ main_sql = get_main_query(
111
+ table, start_datetime, end_datetime, message_filter, campaign_id
112
+ )
113
+ flex_sql = get_flex_query(
114
+ table, start_datetime, end_datetime, message_filter, campaign_id
115
+ )
116
+ bidder_sql = get_bidder_query(
117
+ table, start_datetime, end_datetime, message_filter, campaign_id
118
+ )
119
+ deal_sql = get_deal_query(
120
+ table, start_datetime, end_datetime, message_filter, campaign_id
121
+ )
122
+ ad_unit_sql = get_ad_unit_query(
123
+ table, start_datetime, end_datetime, message_filter, campaign_id
124
+ )
125
+ browser_sql = get_browser_query(
126
+ table, start_datetime, end_datetime, message_filter, campaign_id
127
+ )
128
+ device_sql = get_device_query(
129
+ table, start_datetime, end_datetime, message_filter, campaign_id
130
+ )
131
+ random_integer_sql = get_random_integer_query(
132
+ table, start_datetime, end_datetime, message_filter, campaign_id
133
+ )
134
+ hb_pb_sql = get_hb_pb_query(
135
+ table, start_datetime, end_datetime, message_filter, campaign_id
136
+ )
137
+ hb_size_sql = get_hb_size_query(
138
+ table, start_datetime, end_datetime, message_filter, campaign_id
139
+ )
140
+
141
+ # --- Main Query Execution ---
142
+ # Run query only if it hasn't been run already.
143
+ if not st.session_state["query_run"]:
144
+ try:
145
+ start_main = time.time()
146
+ with st.spinner("Connecting to Snowflake and running top-level query..."):
147
+ df = cached_run_query(
148
+ main_sql,
149
+ private_key_str,
150
+ user,
151
+ account_identifier,
152
+ warehouse,
153
+ database,
154
+ schema,
155
+ role,
156
+ )
157
+ elapsed_main = time.time() - start_main
158
+ elapsed_minutes = int(elapsed_main // 60)
159
+ elapsed_seconds = elapsed_main % 60
160
+
161
+ st.info(
162
+ f"Top-level SQL query executed in {elapsed_minutes} minute(s) and {elapsed_seconds:.2f} seconds."
163
+ )
164
+
165
+ # Process the results.
166
+ df.columns = [col.upper() for col in df.columns]
167
+ df.sort_values(by=["EST_HOUR", "EST_MINUTE"], inplace=True)
168
+ df["timestamp"] = pd.to_datetime(
169
+ df["EST_DATE"].astype(str)
170
+ + " "
171
+ + df["EST_HOUR"].astype(str).str.zfill(2)
172
+ + ":"
173
+ + df["EST_MINUTE"].astype(str).str.zfill(2)
174
+ )
175
+ df["5min"] = df["timestamp"].dt.floor("5T")
176
+ agg_df = df.groupby("5min", as_index=False)["CNT"].sum()
177
+
178
+ st.session_state["query_df"] = df
179
+ st.session_state["agg_df"] = agg_df
180
+ st.session_state["query_run"] = True
181
+ except Exception as e:
182
+ st.error(f"Error during main query execution: {e}")
183
+ return
184
+ else:
185
+ # Use stored data.
186
+ df = st.session_state.get("query_df")
187
+ agg_df = st.session_state.get("agg_df")
188
+
189
+ # --- Display Main Query Results ---
190
+ st.header("Top-Level Data")
191
+ top_level_baseline = 30
192
+ agg_df["is_spike"] = agg_df.apply(
193
+ lambda row: row["CNT"] > top_level_baseline, axis=1
194
+ )
195
+ spike_start = None
196
+ consecutive = 0
197
+ for idx, row in agg_df.sort_values("5min").iterrows():
198
+ if row["is_spike"]:
199
+ consecutive += 1
200
+ if consecutive == 2:
201
+ spike_start = row["5min"] - timedelta(minutes=5)
202
+ break
203
+ else:
204
+ consecutive = 0
205
+
206
+ if spike_start:
207
+ msg = f"Top-Level: House ad increase detected starting around {spike_start.strftime('%I:%M %p')}."
208
+ st.success(msg)
209
+ else:
210
+ msg = "Top-Level: No large, consistent spike detected in the current data."
211
+ st.info(msg)
212
+ # Append the message only once.
213
+ findings_messages = st.session_state.setdefault("findings_messages", [])
214
+ if msg not in findings_messages:
215
+ findings_messages.append(msg)
216
+ st.session_state["top_level_spike_time"] = spike_start
217
+
218
+ with st.expander("Show Raw Data"):
219
+ st.dataframe(df)
220
+ with st.expander("Show Raw 5-Minute Aggregated Data with Spike Alert"):
221
+ st.dataframe(agg_df)
222
+
223
+ title_text = "House Ads Count by 5-Minute Interval"
224
+ fig = px.line(
225
+ agg_df,
226
+ x="5min",
227
+ y="CNT",
228
+ title=title_text,
229
+ labels={"5min": "Time", "CNT": "House Ads Count"},
230
+ )
231
+ fig.update_xaxes(tickformat="%I:%M %p")
232
+ st.plotly_chart(fig, use_container_width=True)
233
+
234
+ st.markdown("<hr style='border: 3px solid gray;'>", unsafe_allow_html=True)
235
+
236
+ # --- Key Findings via OpenAI ---
237
+ st.header("Key Findings and Next Steps")
238
+ # Create a container to hold the key findings output.
239
+ key_findings_container = st.container()
240
+
241
+ # Initially display what’s in session_state (if anything) or a placeholder.
242
+ with key_findings_container:
243
+ if st.session_state.get("key_findings_output"):
244
+ st.markdown(
245
+ st.session_state.get("key_findings_output"),
246
+ unsafe_allow_html=True,
247
+ )
248
+ else:
249
+ st.info(
250
+ "Key findings will appear here once additional queries have finished."
251
+ )
252
+
253
+ def generate_key_findings_callback():
254
+ findings = "\n".join(st.session_state.get("findings_messages", []))
255
+ flex_jira_info = st.session_state.get("flex_jira_info", "")
256
+ jira_section = (
257
+ f"\nJira Ticket Information from Flex Bucket section:\n{flex_jira_info}\n"
258
+ if flex_jira_info
259
+ else ""
260
+ )
261
+ prompt = (
262
+ "You are a helpful analyst investigating a spike in house ads. A house ad spike detection dashboard has compiled a list of findings "
263
+ "showing potential spikes across different dimensions. Below are the detailed findings from the dashboard, along with any flagged Jira ticket "
264
+ "information. The NEXT_STEPS_INSTRUCTIONS file contains recommended next steps for each section depending on the spike(s) flagged in the dashboard:\n\n"
265
+ f"Findings:\n{findings}\n"
266
+ f"{jira_section}\n"
267
+ "Next Steps Instructions:\n"
268
+ f"{NEXT_STEPS_INSTRUCTIONS}\n\n"
269
+ "Using the Findings, jira section information, and Next Steps Instructions as helpful context, create a concise summary "
270
+ "that identifies the likely cause/causes of any detected house ad spikes and recommends actionable next steps. The summary "
271
+ "should be a few sentences long followed by bullet points with the main findings and recommended next steps. Please output "
272
+ "the summary in Markdown format with each bullet point on a new line, and indent sub-bullets properly. Ensure that each bullet "
273
+ "point is on its own line. There is no need to explicitly mention the Instructions file in the summary, just use it to "
274
+ "inform your analysis. "
275
+ )
276
+ st.session_state["key_findings"] = prompt
277
+ try:
278
+ response = client.responses.create(
279
+ model="o3-mini",
280
+ instructions="You are a helpful analyst who provides insights and recommends next steps.",
281
+ input=prompt,
282
+ )
283
+ st.session_state["key_findings_output"] = response.output_text.strip()
284
+ except Exception as e:
285
+ st.session_state["key_findings_output"] = f"Error calling OpenAI API: {e}"
286
+
287
+ # --- Additional Queries for Specific Dimensions ---
288
+ st.header("Specific Dimensions Data")
289
+ st.info("Checking specific dimensions for house ad spikes...")
290
+
291
+ with st.spinner("Running additional queries..."):
292
+ with concurrent.futures.ThreadPoolExecutor() as executor:
293
+ futures = {}
294
+ start_times = {}
295
+ query_dict = {
296
+ "flex bucket": flex_sql,
297
+ "bidder": bidder_sql,
298
+ "deal": deal_sql,
299
+ "ad_unit": ad_unit_sql,
300
+ "browser": browser_sql,
301
+ "device": device_sql,
302
+ "random_integer": random_integer_sql,
303
+ "hb_pb": hb_pb_sql,
304
+ "hb_size": hb_size_sql,
305
+ }
306
+ for key, query in query_dict.items():
307
+ start_times[key] = time.time()
308
+ futures[key] = executor.submit(
309
+ cached_run_query,
310
+ query,
311
+ private_key_str,
312
+ user,
313
+ account_identifier,
314
+ warehouse,
315
+ database,
316
+ schema,
317
+ role,
318
+ )
319
+
320
+ containers = {
321
+ "flex bucket": st.container(),
322
+ "bidder": st.container(),
323
+ "deal": st.container(),
324
+ "ad_unit": st.container(),
325
+ "browser": st.container(),
326
+ "device": st.container(),
327
+ "random_integer": st.container(),
328
+ "hb_pb": st.container(),
329
+ "hb_size": st.container(),
330
+ }
331
+
332
+ spike_time = st.session_state.get("top_level_spike_time")
333
+
334
+ while futures:
335
+ done, _ = concurrent.futures.wait(
336
+ list(futures.values()),
337
+ timeout=0.5,
338
+ return_when=concurrent.futures.FIRST_COMPLETED,
339
+ )
340
+ for future in done:
341
+ key = [k for k, f in futures.items() if f == future][0]
342
+ df_result = future.result()
343
+ update_section_generic(
344
+ key, df_result, start_times, containers[key], spike_time
345
+ )
346
+ del futures[key]
347
+
348
+ # Once all additional queries have completed, automatically generate key findings.
349
+ generate_key_findings_callback()
350
+
351
+ # Update the key findings container with the new output.
352
+ with key_findings_container:
353
+ st.markdown(
354
+ st.session_state.get("key_findings_output", ""),
355
+ unsafe_allow_html=True,
356
+ )
house_ad_queries.py ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from functools import wraps
3
+
4
+
5
+ def _quote_identifier(identifier: str) -> str:
6
+ """Quote SQL identifiers that contain special characters."""
7
+
8
+ def quote_part(part: str) -> str:
9
+ if re.match(r"^[A-Za-z_][A-Za-z0-9_]*$", part):
10
+ return part
11
+ return f'"{part}"'
12
+
13
+ return ".".join(quote_part(p) for p in identifier.split("."))
14
+
15
+
16
+ def _sanitize_table(func):
17
+ @wraps(func)
18
+ def wrapper(table, *args, **kwargs):
19
+ table = _quote_identifier(table)
20
+ return func(table, *args, **kwargs)
21
+
22
+ return wrapper
23
+
24
+
25
+ @_sanitize_table
26
+ def get_main_query(table, start_datetime, end_datetime, message_filter, campaign_id):
27
+ return f"""
28
+ SELECT
29
+ to_date(convert_timezone('UTC', 'America/New_York', datetime)) AS EST_DATE,
30
+ extract(hour FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_HOUR,
31
+ extract(minute FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_MINUTE,
32
+ count(*) as CNT
33
+ FROM {table}
34
+ WHERE convert_timezone('UTC', 'America/New_York', datetime) BETWEEN '{start_datetime}' AND '{end_datetime}'
35
+ and message in ('{message_filter}')
36
+ and body[0]:campaignId::varchar in ('{campaign_id}')
37
+ and body[0]:slotTargeting:hb_pb[0]::DOUBLE >= 0.15
38
+ GROUP BY ALL
39
+ """
40
+
41
+
42
+ @_sanitize_table
43
+ def get_flex_query(table, start_datetime, end_datetime, message_filter, campaign_id):
44
+ return f"""
45
+ SELECT
46
+ to_date(convert_timezone('UTC', 'America/New_York', datetime)) AS EST_DATE,
47
+ extract(hour FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_HOUR,
48
+ extract(minute FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_MINUTE,
49
+ bucket,
50
+ count(*) as CNT
51
+ FROM {table}
52
+ WHERE convert_timezone('UTC', 'America/New_York', datetime) BETWEEN '{start_datetime}' AND '{end_datetime}'
53
+ and message in ('{message_filter}')
54
+ and body[0]:campaignId::varchar in ('{campaign_id}')
55
+ and body[0]:slotTargeting:hb_pb[0]::DOUBLE >= 0.15
56
+ GROUP BY ALL
57
+ """
58
+
59
+
60
+ @_sanitize_table
61
+ def get_bidder_query(table, start_datetime, end_datetime, message_filter, campaign_id):
62
+ return f"""
63
+ SELECT
64
+ to_date(convert_timezone('UTC', 'America/New_York', datetime)) AS EST_DATE,
65
+ extract(hour FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_HOUR,
66
+ extract(minute FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_MINUTE,
67
+ body[0]:slotTargeting:hb_bidder[0]::varchar as HB_BIDDER,
68
+ count(*) as CNT
69
+ FROM {table}
70
+ WHERE convert_timezone('UTC', 'America/New_York', datetime) BETWEEN '{start_datetime}' AND '{end_datetime}'
71
+ and message in ('{message_filter}')
72
+ and body[0]:campaignId::varchar in ('{campaign_id}')
73
+ and body[0]:slotTargeting:hb_pb[0]::DOUBLE >= 0.15
74
+ GROUP BY ALL
75
+ """
76
+
77
+
78
+ @_sanitize_table
79
+ def get_deal_query(table, start_datetime, end_datetime, message_filter, campaign_id):
80
+ return f"""
81
+ SELECT
82
+ to_date(convert_timezone('UTC', 'America/New_York', datetime)) AS EST_DATE,
83
+ extract(hour FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_HOUR,
84
+ extract(minute FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_MINUTE,
85
+ body[0]:slotTargeting:hb_deal[0]::varchar as HB_DEAL,
86
+ count(*) as CNT
87
+ FROM {table}
88
+ WHERE convert_timezone('UTC', 'America/New_York', datetime) BETWEEN '{start_datetime}' AND '{end_datetime}'
89
+ and message in ('{message_filter}')
90
+ and body[0]:campaignId::varchar in ('{campaign_id}')
91
+ and body[0]:slotTargeting:hb_pb[0]::DOUBLE >= 0.15
92
+ GROUP BY ALL
93
+ """
94
+
95
+
96
+ # New function for Ad Unit Data
97
+ @_sanitize_table
98
+ def get_ad_unit_query(table, start_datetime, end_datetime, message_filter, campaign_id):
99
+ return f"""
100
+ SELECT
101
+ to_date(convert_timezone('UTC', 'America/New_York', datetime)) AS EST_DATE,
102
+ extract(hour FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_HOUR,
103
+ extract(minute FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_MINUTE,
104
+ split(body[0]['adUnitPath'],'/')[2]::varchar as Ad_Unit,
105
+ count(*) as CNT
106
+ FROM {table}
107
+ WHERE convert_timezone('UTC', 'America/New_York', datetime) BETWEEN '{start_datetime}' AND '{end_datetime}'
108
+ and message in ('{message_filter}')
109
+ and body[0]:campaignId::varchar in ('{campaign_id}')
110
+ and body[0]:slotTargeting:hb_pb[0]::DOUBLE >= 0.15
111
+ GROUP BY ALL
112
+ """
113
+
114
+
115
+ # New function for Browser Data
116
+ @_sanitize_table
117
+ def get_browser_query(table, start_datetime, end_datetime, message_filter, campaign_id):
118
+ return f"""
119
+ SELECT
120
+ to_date(convert_timezone('UTC', 'America/New_York', datetime)) AS EST_DATE,
121
+ extract(hour FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_HOUR,
122
+ extract(minute FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_MINUTE,
123
+ case
124
+ when lower(useragent) like '%edg%' then 'Edge'
125
+ when (lower(useragent) like '%cros%' or lower(useragent) like '%chrome%' or lower(useragent) like '%crios%') then 'Chrome'
126
+ when lower(useragent) like '%firefox%' then 'Firefox'
127
+ when lower(useragent) like '%applewebkit%' then 'Safari'
128
+ else 'other'
129
+ end as browser,
130
+ count(*) as CNT
131
+ FROM {table}
132
+ WHERE convert_timezone('UTC', 'America/New_York', datetime) BETWEEN '{start_datetime}' AND '{end_datetime}'
133
+ and message in ('{message_filter}')
134
+ and body[0]:campaignId::varchar in ('{campaign_id}')
135
+ and body[0]:slotTargeting:hb_pb[0]::DOUBLE >= 0.15
136
+ GROUP BY ALL
137
+ """
138
+
139
+
140
+ # New function for Device Data
141
+ @_sanitize_table
142
+ def get_device_query(table, start_datetime, end_datetime, message_filter, campaign_id):
143
+ return f"""
144
+ SELECT
145
+ to_date(convert_timezone('UTC', 'America/New_York', datetime)) AS EST_DATE,
146
+ extract(hour FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_HOUR,
147
+ extract(minute FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_MINUTE,
148
+ case
149
+ when (useragent like '%Windows%' or useragent like '%Macintosh%') THEN 'desktop'
150
+ when (useragent like '%Android%' or useragent like '%iPhone%' or useragent like '%Mobi%') THEN 'phone'
151
+ when (useragent like '%iPad%' or useragent like '%Tablet%') THEN 'tablet'
152
+ else 'other'
153
+ end as device,
154
+ count(*) as CNT
155
+ FROM {table}
156
+ WHERE convert_timezone('UTC', 'America/New_York', datetime) BETWEEN '{start_datetime}' AND '{end_datetime}'
157
+ and message in ('{message_filter}')
158
+ and body[0]:campaignId::varchar in ('{campaign_id}')
159
+ and body[0]:slotTargeting:hb_pb[0]::DOUBLE >= 0.15
160
+ GROUP BY ALL
161
+ """
162
+
163
+
164
+ # New function for Random Integer Data
165
+ @_sanitize_table
166
+ def get_random_integer_query(
167
+ table, start_datetime, end_datetime, message_filter, campaign_id
168
+ ):
169
+ return f"""
170
+ SELECT
171
+ to_date(convert_timezone('UTC', 'America/New_York', datetime)) AS EST_DATE,
172
+ extract(hour FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_HOUR,
173
+ extract(minute FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_MINUTE,
174
+ body[0]:siteTargeting:ri[0]::varchar as Random_Integer,
175
+ count(*) as CNT
176
+ FROM {table}
177
+ WHERE convert_timezone('UTC', 'America/New_York', datetime) BETWEEN '{start_datetime}' AND '{end_datetime}'
178
+ and message in ('{message_filter}')
179
+ and body[0]:campaignId::varchar in ('{campaign_id}')
180
+ and body[0]:slotTargeting:hb_pb[0]::DOUBLE >= 0.15
181
+ GROUP BY ALL
182
+ """
183
+
184
+
185
+ # New function for hb_pb Data
186
+ @_sanitize_table
187
+ def get_hb_pb_query(table, start_datetime, end_datetime, message_filter, campaign_id):
188
+ return f"""
189
+ SELECT
190
+ to_date(convert_timezone('UTC', 'America/New_York', datetime)) AS EST_DATE,
191
+ extract(hour FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_HOUR,
192
+ extract(minute FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_MINUTE,
193
+ body[0]:slotTargeting:hb_pb[0]::varchar as hb_pb,
194
+ count(*) as CNT
195
+ FROM {table}
196
+ WHERE convert_timezone('UTC', 'America/New_York', datetime) BETWEEN '{start_datetime}' AND '{end_datetime}'
197
+ and message in ('{message_filter}')
198
+ and body[0]:campaignId::varchar in ('{campaign_id}')
199
+ and body[0]:slotTargeting:hb_pb[0]::DOUBLE >= 0.15
200
+ GROUP BY ALL
201
+ """
202
+
203
+
204
+ # New function for hb_size Data
205
+ @_sanitize_table
206
+ def get_hb_size_query(table, start_datetime, end_datetime, message_filter, campaign_id):
207
+ return f"""
208
+ SELECT
209
+ to_date(convert_timezone('UTC', 'America/New_York', datetime)) AS EST_DATE,
210
+ extract(hour FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_HOUR,
211
+ extract(minute FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_MINUTE,
212
+ body[0]:slotTargeting:hb_size[0]::varchar as hb_size,
213
+ count(*) as CNT
214
+ FROM {table}
215
+ WHERE convert_timezone('UTC', 'America/New_York', datetime) BETWEEN '{start_datetime}' AND '{end_datetime}'
216
+ and message in ('{message_filter}')
217
+ and body[0]:campaignId::varchar in ('{campaign_id}')
218
+ and body[0]:slotTargeting:hb_pb[0]::DOUBLE >= 0.15
219
+ GROUP BY ALL
220
+ """
house_ad_section_utils.py ADDED
@@ -0,0 +1,373 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import pandas as pd
3
+ import plotly.express as px
4
+ import streamlit as st
5
+ import os
6
+ import pytz
7
+ import re
8
+ from datetime import timedelta, date, datetime
9
+ from atlassian import Jira
10
+
11
+ # --- Jira API Configuration for Deployments ---
12
+ JIRA_URL = os.getenv("JIRA_URL")
13
+ JIRA_USERNAME = os.getenv("JIRA_USERNAME")
14
+ JIRA_API_TOKEN = os.getenv("JIRA_API_TOKEN")
15
+
16
+ # Initialize your Jira
17
+ jira_client = Jira(url=JIRA_URL, username=JIRA_USERNAME, password=JIRA_API_TOKEN)
18
+
19
+ # Configuration dictionary for sections.
20
+ SECTION_CONFIG = {
21
+ "flex bucket": {
22
+ "group_col": "BUCKET",
23
+ "chart_title": "Flex Bucket House Ads Count by 5-Minute Interval",
24
+ "baseline": 40,
25
+ "spike_threshold": 2,
26
+ },
27
+ "bidder": {
28
+ "group_col": "HB_BIDDER",
29
+ "chart_title": "hb_bidder House Ads Count by 5-Minute Interval",
30
+ "baseline": 40,
31
+ "spike_threshold": 2,
32
+ },
33
+ "deal": {
34
+ "group_col": "HB_DEAL",
35
+ "chart_title": "hb_deal House Ads Count by 5-Minute Interval",
36
+ "baseline": 40,
37
+ "spike_threshold": 2,
38
+ },
39
+ "ad_unit": {
40
+ "group_col": "AD_UNIT",
41
+ "chart_title": "Ad Unit House Ads Count by 5-Minute Interval",
42
+ "baseline": 40,
43
+ "spike_threshold": 2,
44
+ },
45
+ "browser": {
46
+ "group_col": "BROWSER",
47
+ "chart_title": "Browser House Ads Count by 5-Minute Interval",
48
+ "baseline": 40,
49
+ "spike_threshold": 1,
50
+ },
51
+ "device": {
52
+ "group_col": "DEVICE",
53
+ "chart_title": "Device House Ads Count by 5-Minute Interval",
54
+ "baseline": 40,
55
+ "spike_threshold": 1,
56
+ },
57
+ "random_integer": {
58
+ "group_col": "RANDOM_INTEGER",
59
+ "chart_title": "Random Integer House Ads Count by 5-Minute Interval",
60
+ "baseline": 40,
61
+ "spike_threshold": 2,
62
+ },
63
+ "hb_pb": {
64
+ "group_col": "HB_PB",
65
+ "chart_title": "hb_pb House Ads Count by 5-Minute Interval",
66
+ "baseline": 40,
67
+ "spike_threshold": 2,
68
+ },
69
+ "hb_size": {
70
+ "group_col": "HB_SIZE",
71
+ "chart_title": "hb_size House Ads Count by 5-Minute Interval",
72
+ "baseline": 40,
73
+ "spike_threshold": 2,
74
+ },
75
+ }
76
+
77
+
78
+ def parse_deployment_info(comment_text):
79
+ """
80
+ Parses a comment for deployment info if it follows the expected structure:
81
+
82
+ Deployed At: <timestamp>
83
+ Bucket: <bucket>
84
+ Traffic: <traffic>
85
+ Branch: <branch>
86
+
87
+ Returns a tuple: (deployed_at, bucket, traffic, branch).
88
+ If not all keys are found, returns empty strings.
89
+ """
90
+ deployed_at, bucket, traffic, branch = "", "", "", ""
91
+ keys_found = set()
92
+ lines = comment_text.splitlines()
93
+ for line in lines:
94
+ if "Deployed At:" in line:
95
+ deployed_at = line.split("Deployed At:")[1].strip()
96
+ keys_found.add("Deployed At")
97
+ elif "Bucket:" in line:
98
+ bucket = line.split("Bucket:")[1].strip()
99
+ keys_found.add("Bucket")
100
+ elif "Traffic:" in line:
101
+ traffic = line.split("Traffic:")[1].strip()
102
+ keys_found.add("Traffic")
103
+ elif "Branch:" in line:
104
+ branch = line.split("Branch:")[1].strip()
105
+ keys_found.add("Branch")
106
+ if keys_found == {"Deployed At", "Bucket", "Traffic", "Branch"}:
107
+ return deployed_at, bucket, traffic, branch
108
+ else:
109
+ return "", "", "", ""
110
+
111
+
112
+ def update_section_generic(key, df, start_times, container, spike_time):
113
+ """
114
+ Updates a section based on the provided key, using the top-level spike time to anchor
115
+ the pre- and post-window comparisons for share-of-voice.
116
+ """
117
+ # Compute elapsed time for the query.
118
+ elapsed_section = time.time() - start_times[key]
119
+ minutes_container = int(elapsed_section // 60)
120
+ seconds_container = elapsed_section % 60
121
+
122
+ # Standardize column names and create a unified timestamp.
123
+ df.columns = [col.upper() for col in df.columns]
124
+ df.sort_values(by=["EST_HOUR", "EST_MINUTE"], inplace=True)
125
+ df["timestamp"] = pd.to_datetime(
126
+ df["EST_DATE"].astype(str)
127
+ + " "
128
+ + df["EST_HOUR"].astype(str).str.zfill(2)
129
+ + ":"
130
+ + df["EST_MINUTE"].astype(str).str.zfill(2)
131
+ )
132
+ df["5min"] = df["timestamp"].dt.floor("5T")
133
+
134
+ # Retrieve configuration for the current section.
135
+ config = SECTION_CONFIG.get(key, {})
136
+ baseline = config.get("baseline", 30)
137
+ group_col = config.get("group_col")
138
+ spike_threshold = config.get("spike_threshold", 3)
139
+
140
+ with container:
141
+ st.subheader(f"{key.capitalize()} Data")
142
+ st.info(
143
+ f"{key.capitalize()} query completed in {minutes_container} minute(s) and {seconds_container:.2f} seconds."
144
+ )
145
+
146
+ # Group the data by 5-minute intervals and the configured grouping column.
147
+ agg_df = df.groupby(["5min", group_col], as_index=False)["CNT"].sum()
148
+
149
+ # Get the data corresponding to the spike time.
150
+ spike_row = agg_df[agg_df["5min"] == spike_time]
151
+
152
+ # Flag groups where the count exceeds the baseline.
153
+ flagged_groups = []
154
+ for grp in spike_row[group_col].unique():
155
+ group_count = spike_row[spike_row[group_col] == grp]["CNT"].sum()
156
+ if group_count > baseline:
157
+ flagged_groups.append(grp)
158
+
159
+ # Create the chart once.
160
+ fig = px.line(
161
+ agg_df,
162
+ x="5min",
163
+ y="CNT",
164
+ color=group_col,
165
+ title=config.get(
166
+ "chart_title",
167
+ f"{key.capitalize()} House Ads Count by 5-Minute Interval",
168
+ ),
169
+ labels={"5min": "Time", "CNT": "House Ads Count", group_col: key},
170
+ )
171
+ fig.update_xaxes(tickformat="%I:%M %p")
172
+
173
+ if flagged_groups:
174
+ if len(flagged_groups) > spike_threshold:
175
+ msg = f"{key.capitalize()}: House ad increase detected for multiple {key} groups starting around {spike_time.strftime('%I:%M %p')}."
176
+ st.warning(msg)
177
+ with st.expander(f"Show Raw {key.capitalize()} Data"):
178
+ st.dataframe(df)
179
+ with st.expander("Show Chart"):
180
+ st.plotly_chart(fig, use_container_width=True)
181
+ else:
182
+ msg = f"{key.capitalize()}: House ad increase detected for {', '.join(flagged_groups)} starting around {spike_time.strftime('%I:%M %p')}."
183
+ st.success(msg)
184
+ with st.expander(f"Show Raw {key.capitalize()} Data"):
185
+ st.dataframe(df)
186
+ st.plotly_chart(fig, use_container_width=True)
187
+ st.session_state.setdefault("findings_messages", []).append(msg)
188
+ else:
189
+ msg = f"{key.capitalize()}: No significant {key} spikes detected."
190
+ st.info(msg)
191
+ st.session_state.setdefault("findings_messages", []).append(msg)
192
+ with st.expander(f"Show Raw {key.capitalize()} Data"):
193
+ st.dataframe(df)
194
+ with st.expander("Show Chart"):
195
+ st.plotly_chart(fig, use_container_width=True)
196
+
197
+ if key == "flex bucket":
198
+ st.write("### Deployment Information")
199
+ flex_jira_info = "" # Initialize an empty variable.
200
+ try:
201
+ # Use the selected dashboard date to define the full day range.
202
+ start_date = st.session_state.get("start_date")
203
+ end_date = st.session_state.get("end_date")
204
+ eastern = st.session_state.get("eastern")
205
+ start_datetime = datetime.combine(start_date, datetime.min.time())
206
+ end_datetime = datetime.combine(end_date, datetime.max.time())
207
+ start_str = start_datetime.astimezone(pytz.utc).strftime(
208
+ "%Y-%m-%d %H:%M"
209
+ )
210
+ end_str = end_datetime.astimezone(pytz.utc).strftime("%Y-%m-%d %H:%M")
211
+ st.info("Fetching deployment information from Jira...")
212
+
213
+ # Build a JQL query for the selected date range.
214
+ dashboard_start_str = (
215
+ f"{start_date.month}/{start_date.day}/{start_date.strftime('%y')}"
216
+ )
217
+ dashboard_end_str = (
218
+ f"{end_date.month}/{end_date.day}/{end_date.strftime('%y')}"
219
+ )
220
+ jql = (
221
+ f'comment ~ "Deployed At: {dashboard_start_str}" '
222
+ f'OR comment ~ "Deployed At: {dashboard_end_str}" '
223
+ f'AND comment ~ "Bucket:" '
224
+ f'AND comment ~ "Traffic:" '
225
+ f'AND comment ~ "Branch:"'
226
+ )
227
+
228
+ # --- Pagination: Retrieve all matching issues ---
229
+ startAt = 0
230
+ limit = 50
231
+ deployments_list = []
232
+
233
+ while True:
234
+ response_page = jira_client.jql(
235
+ jql,
236
+ fields="key,summary,updated,comment",
237
+ start=startAt,
238
+ limit=limit,
239
+ )
240
+ issues = response_page.get("issues", [])
241
+ deployments_list.extend(issues)
242
+ if len(issues) < limit:
243
+ break
244
+ startAt += len(issues)
245
+
246
+ deployments = []
247
+ for issue in deployments_list:
248
+ key_val = issue["key"]
249
+ summary = issue["fields"]["summary"]
250
+ updated = issue["fields"]["updated"]
251
+ key_link = f'<a href="{JIRA_URL}/browse/{key_val}" target="_blank">{key_val}</a>'
252
+
253
+ try:
254
+ updated_dt = pd.to_datetime(updated, utc=True).astimezone(
255
+ eastern
256
+ )
257
+ except Exception:
258
+ updated_dt = None
259
+
260
+ comment_field = issue["fields"].get("comment", {})
261
+ comments = comment_field.get("comments", [])
262
+
263
+ deployment_found = False
264
+ deployment_comment = ""
265
+ if comments:
266
+ for comment in comments:
267
+ try:
268
+ comment_dt = pd.to_datetime(
269
+ comment["created"], utc=True
270
+ ).astimezone(eastern)
271
+ except Exception:
272
+ continue
273
+ # Check if the comment was created on the selected date.
274
+ if start_date <= comment_dt.date() <= end_date:
275
+ body = comment["body"].strip()
276
+ if body.lower().startswith("deployed"):
277
+ deployment_found = True
278
+ deployment_comment = body
279
+ break
280
+ if deployment_found:
281
+ dep_at, bucket, traffic, branch = parse_deployment_info(
282
+ deployment_comment
283
+ )
284
+ if not dep_at and deployment_comment.lower().startswith(
285
+ "deployed to prod"
286
+ ):
287
+ timestamp_text = re.sub(
288
+ r"(?i)^deployed\s+to\s+prod\s*(at\s*)?",
289
+ "",
290
+ deployment_comment,
291
+ ).strip()
292
+ if "." in timestamp_text:
293
+ timestamp_text = timestamp_text.split(".")[0].strip()
294
+ dep_at = timestamp_text
295
+ bucket, traffic, branch = "", "", ""
296
+ if dep_at:
297
+ try:
298
+ deployed_dt = pd.to_datetime(
299
+ dep_at, format="%m/%d/%y, %I:%M %p", errors="coerce"
300
+ )
301
+ except Exception:
302
+ deployed_dt = None
303
+ if deployed_dt is not None and deployed_dt is not pd.NaT:
304
+ deployed_dt = eastern.localize(
305
+ deployed_dt.replace(tzinfo=None)
306
+ )
307
+ deployments.append(
308
+ {
309
+ "Deployed Date": deployed_dt.strftime(
310
+ "%m/%d/%y"
311
+ ),
312
+ "Deployed Time": deployed_dt.strftime(
313
+ "%I:%M %p"
314
+ ),
315
+ "Key": key_link,
316
+ "Summary": summary,
317
+ "Bucket": bucket if bucket else "production",
318
+ }
319
+ )
320
+
321
+ if deployments:
322
+ df_deployments = pd.DataFrame(deployments).reset_index(drop=True)
323
+ df_deployments["Deployed_dt"] = pd.to_datetime(
324
+ df_deployments["Deployed Date"]
325
+ + " "
326
+ + df_deployments["Deployed Time"],
327
+ format="%m/%d/%y %I:%M %p",
328
+ errors="coerce",
329
+ )
330
+ df_deployments.sort_values(
331
+ "Deployed_dt", ascending=False, inplace=True
332
+ )
333
+ df_deployments.drop("Deployed_dt", axis=1, inplace=True)
334
+
335
+ # Filter the DataFrame to only show flagged deployments.
336
+ df_flagged = df_deployments[
337
+ df_deployments["Bucket"].isin(flagged_groups)
338
+ ]
339
+
340
+ if not df_flagged.empty:
341
+ # Build a string containing info for all flagged Jira tickets.
342
+ tickets_info_list = []
343
+ for _, row in df_flagged.iterrows():
344
+ tickets_info_list.append(
345
+ f"Jira Ticket: {row['Key']} - {row['Summary']}"
346
+ )
347
+ flex_jira_info = "\n".join(tickets_info_list)
348
+
349
+ # Reorder columns for display.
350
+ cols = [
351
+ "Deployed Date",
352
+ "Deployed Time",
353
+ "Key",
354
+ "Summary",
355
+ "Bucket",
356
+ ]
357
+ df_flagged = df_flagged[cols]
358
+ styled_df = df_flagged.style.hide(axis="index")
359
+ st.markdown(
360
+ styled_df.to_html(escape=False), unsafe_allow_html=True
361
+ )
362
+ else:
363
+ st.info(
364
+ "No flagged deployments found for the selected criteria."
365
+ )
366
+ else:
367
+ st.info("No deployments found for the selected criteria.")
368
+ except Exception as e:
369
+ st.error(f"Error fetching deployments: {e}")
370
+
371
+ # Save the Jira info (if any) to session state.
372
+ st.session_state["flex_jira_info"] = flex_jira_info
373
+ st.markdown("<hr style='border: 3px solid gray;'>", unsafe_allow_html=True)
index.html DELETED
@@ -1,19 +0,0 @@
1
- <!doctype html>
2
- <html>
3
- <head>
4
- <meta charset="utf-8" />
5
- <meta name="viewport" content="width=device-width" />
6
- <title>My static Space</title>
7
- <link rel="stylesheet" href="style.css" />
8
- </head>
9
- <body>
10
- <div class="card">
11
- <h1>Welcome to your static Space!</h1>
12
- <p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
13
- <p>
14
- Also don't forget to check the
15
- <a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
16
- </p>
17
- </div>
18
- </body>
19
- </html>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ snowflake-connector-python
3
+ cryptography
4
+ pandas
5
+ plotly
6
+ atlassian-python-api
7
+ pytz
8
+ openai
style.css DELETED
@@ -1,28 +0,0 @@
1
- body {
2
- padding: 2rem;
3
- font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif;
4
- }
5
-
6
- h1 {
7
- font-size: 16px;
8
- margin-top: 0;
9
- }
10
-
11
- p {
12
- color: rgb(107, 114, 128);
13
- font-size: 15px;
14
- margin-bottom: 10px;
15
- margin-top: 5px;
16
- }
17
-
18
- .card {
19
- max-width: 620px;
20
- margin: 0 auto;
21
- padding: 16px;
22
- border: 1px solid lightgray;
23
- border-radius: 16px;
24
- }
25
-
26
- .card p:last-child {
27
- margin-bottom: 0;
28
- }