Spaces:

Avisut
/

Red_Alert_Investigations

Sleeping

App Files Files Community

github-actions[bot] commited on Aug 14, 2025

Commit

08c9602

1 Parent(s): 0fc4a33

sync: automatic content update from github

Browse files

Files changed (15) hide show

README.md +7 -4
app.py +347 -0
changelog.md +5 -0
delivery_instructions.py +35 -0
delivery_main.py +420 -0
delivery_queries.py +466 -0
delivery_section_utils.py +103 -0
.gitattributes → gitattributes +0 -0
house_ad_instructions.py +60 -0
house_ad_main.py +356 -0
house_ad_queries.py +220 -0
house_ad_section_utils.py +373 -0
index.html +0 -19
requirements.txt +8 -0
style.css +0 -28

README.md CHANGED Viewed

@@ -1,10 +1,13 @@
 ---
 title: Red Alert Investigations
-emoji: 💻
-colorFrom: purple
-colorTo: purple
-sdk: static
 pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Red Alert Investigations
+emoji: 📈
+colorFrom: gray
+colorTo: blue
+sdk: streamlit
+sdk_version: 1.43.1
+app_file: app.py
 pinned: false
+short_description: Automate Red Alert Investigations
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,347 @@

+import streamlit as st
+import os
+import pandas as pd
+import pytz
+import base64
+import altair as alt
+from datetime import datetime, date, time, timedelta
+from zoneinfo import ZoneInfo
+import snowflake.connector
+from cryptography.hazmat.primitives import serialization
+from cryptography.hazmat.backends import default_backend
+# --- Secrets and Key Handling ---
+private_key_pem = os.getenv("SNOWFLAKE_PRIVATE_KEY").replace('\\n', "\n").encode()
+private_key_obj = serialization.load_pem_private_key(
+    private_key_pem,
+    password=None,
+    backend=default_backend()
+)
+private_key_der = private_key_obj.private_bytes(
+    encoding=serialization.Encoding.DER,
+    format=serialization.PrivateFormat.PKCS8,
+    encryption_algorithm=serialization.NoEncryption()
+)
+private_key_b64 = base64.b64encode(private_key_der).decode('utf-8')
+# Connection params
+account_identifier = os.getenv("SNOWFLAKE_ACCOUNT_IDENTIFIER")
+user = os.getenv("SNOWFLAKE_USER")
+warehouse = os.getenv("SNOWFLAKE_WAREHOUSE")
+database = os.getenv("SNOWFLAKE_DATABASE")
+schema = os.getenv("SNOWFLAKE_SCHEMA")
+role = os.getenv("SNOWFLAKE_ROLE")
+table = os.getenv("SNOWFLAKE_TABLE")
+message_filter = os.getenv("SNOWFLAKE_MESSAGE_FILTER")
+campaign_id = os.getenv("SNOWFLAKE_CAMPAIGN_ID")
+# Import query builders
+from house_ad_main import run_house_ad_spike_query
+from delivery_main import run_drop_query
+from delivery_queries import (
+    get_main_query as get_main_delivery_query,
+    get_main_int_sov_query,
+    get_bidder_query as get_bidder_delivery_query,
+    get_flex_bucket_query,
+    get_device_query as get_device_delivery_query,
+    get_ad_unit_query as get_ad_unit_delivery_query,
+    get_refresh_query
+)
+from house_ad_queries import (
+    get_main_query as get_main_house_query,
+    get_flex_query as get_flex_house_query,
+    get_bidder_query as get_bidder_house_query,
+    get_deal_query,
+    get_ad_unit_query as get_ad_unit_house_query,
+    get_browser_query,
+    get_device_query as get_device_house_query,
+    get_random_integer_query,
+    get_hb_pb_query,
+    get_hb_size_query
+)
+# OpenAI (if required)
+from openai import OpenAI
+client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+# Session defaults
+if "deep_values" not in st.session_state:
+    st.session_state["deep_values"] = {}
+# Sidebar filters
+st.sidebar.title("Red Alert Investigations Filters")
+analysis_type = st.sidebar.radio(
+    "Analysis Type",
+    ["House_Ads","Display_Prebid","Display_OB","Display_AdX","Display_HBT_OB","Display_TAM",
+     "Video_Prebid","Video_OB","Video_AdX","Video_TAM"]
+)
+if analysis_type == "House_Ads":
+    ad_format_filter = integration_filter = None
+else:
+    ad_format_filter, integration_filter = analysis_type.split("_",1)
+# Time defaults
+now_edt = datetime.now(ZoneInfo("America/New_York"))
+default_start = now_edt - timedelta(hours=3)
+default_end = now_edt
+start_date = st.sidebar.date_input("Start Date", default_start.date())
+start_hour = st.sidebar.selectbox("Start Hour", list(range(24)), index=default_start.hour)
+end_date = st.sidebar.date_input("End Date", default_end.date())
+end_hour = st.sidebar.selectbox("End Hour", list(range(24)), index=default_end.hour)
+start_dt = datetime.combine(start_date, time(start_hour))
+end_dt = datetime.combine(end_date, time(end_hour,59,59))
+start_str = start_dt.strftime('%Y-%m-%d %H:%M:%S')
+end_str = end_dt.strftime('%Y-%m-%d %H:%M:%S')
+st.session_state["start_date"] = start_date
+st.session_state["end_date"]   = end_date
+st.session_state["eastern"]    = pytz.timezone("America/New_York")
+# Data fetch helper
+def fetch_df(sql: str) -> pd.DataFrame:
+    conn = snowflake.connector.connect(
+        account=account_identifier,
+        user=user,
+        private_key=private_key_b64,
+        warehouse=warehouse,
+        database=database,
+        schema=schema,
+        role=role,
+    )
+    return pd.read_sql(sql, conn)
+# Tabs layout
+tab_auto, tab_deep = st.tabs(["Auto-Analysis","Deep Dive"])
+# Auto-Analysis Tab
+with tab_auto:
+    st.title("Red Alert Investigations")
+    if analysis_type == "House_Ads":
+        st.header("House Ad Analysis")
+        if st.button("Run Analysis"):
+            st.session_state["query_run"] = False
+            run_house_ad_spike_query(
+                table, start_str, end_str,
+                message_filter, campaign_id,
+                private_key_b64, user,
+                account_identifier, warehouse,
+                database, schema, role,
+                client
+            )
+    else:
+        st.header(f"{ad_format_filter} {integration_filter} Analysis")
+        if st.button("Run Analysis"):
+            st.session_state["query_run"] = False
+            run_drop_query(
+                table, start_str, end_str,
+                message_filter, campaign_id,
+                private_key_b64, user,
+                account_identifier, warehouse,
+                database, schema, role,
+                client,
+                integration_filter, ad_format_filter
+            )
+with tab_deep:
+    st.header("Deep Dive")
+    # 1) Select dimensions
+    if analysis_type == "House_Ads":
+        all_dims = [
+            "Flex Bucket","Bidder","Deal","Ad Unit","Browser",
+            "Device","Random Integer","HB Price Buckets","HB Size"
+        ]
+    else:
+        all_dims = [
+            "Integration SOV","Bidder","Flex Bucket",
+            "Device","Ad Unit Group","Refresh"
+        ]
+    to_plot = st.multiselect("1. Select dimensions", all_dims, key="dims")
+    # 2) Fetch unique values per dimension
+    if st.button("2. Fetch Values") and to_plot:
+        vals = {}
+        for dim in to_plot:
+            if dim == "Integration SOV" and analysis_type != "House_Ads":
+                dfv = fetch_df(get_main_int_sov_query(
+                    table, start_str, end_str, message_filter,
+                    campaign_id, ad_format_filter
+                ))
+                col = "Integration"
+            elif analysis_type == "House_Ads":
+                fn_map = {
+                    "Flex Bucket":     get_flex_house_query,
+                    "Bidder":          get_bidder_house_query,
+                    "Deal":            get_deal_query,
+                    "Ad Unit":         get_ad_unit_house_query,
+                    "Browser":         get_browser_query,
+                    "Device":          get_device_house_query,
+                    "Random Integer":  get_random_integer_query,
+                    "HB Price Buckets":get_hb_pb_query,
+                    "HB Size":         get_hb_size_query,
+                }
+                dfv = fetch_df(fn_map[dim](
+                    table, start_str, end_str, message_filter, campaign_id
+                ))
+                col = [c for c in dfv.columns
+                       if c not in ("EST_DATE","EST_HOUR","EST_MINUTE","CNT")][0]
+            else:
+                fn_map = {
+                    "Bidder":         get_bidder_delivery_query,
+                    "Flex Bucket":    get_flex_bucket_query,
+                    "Device":         get_device_delivery_query,
+                    "Ad Unit Group":  get_ad_unit_delivery_query,
+                    "Refresh":        get_refresh_query,
+                }
+                dfv = fetch_df(fn_map[dim](
+                    table, start_str, end_str, message_filter,
+                    campaign_id, integration_filter, ad_format_filter
+                ))
+                col = [c for c in dfv.columns
+                       if c not in ("EST_DATE","EST_HOUR","EST_MINUTE","CNT")][0]
+            vals[dim] = sorted(dfv[col].dropna().unique())
+        st.session_state["deep_values"] = vals
+    # 3) Select filters & run the combined query
+    if st.session_state.get("deep_values"):
+        filters = {}
+        for dim, options in st.session_state["deep_values"].items():
+            filters[dim] = st.multiselect(
+                f"Filter {dim}", options, default=options,
+                key=f"fv_{dim}"
+            )
+        if st.button("3. Run Deep Dive"):
+            # 3a) Build the base CTE
+            if analysis_type == "House_Ads":
+                base = get_main_house_query(
+                    table, start_str, end_str, message_filter, campaign_id
+                )
+                snippet_map = {
+                    "Flex Bucket":     "bucket",
+                    "Bidder":           "body[0]:slotTargeting:hb_bidder[0]::varchar AS BIDDER",
+                    "Deal":             "body[0]:slotTargeting:hb_deal[0]::varchar AS HB_DEAL",
+                    "Ad Unit":          "split(body[0]['adUnitPath'],'/')[2]::varchar AS AD_UNIT",
+                    "Browser":          "CASE WHEN lower(useragent) LIKE '%edg%' THEN 'Edge' WHEN lower(useragent) LIKE '%chrome%' THEN 'Chrome' WHEN lower(useragent) LIKE '%firefox%' THEN 'Firefox' WHEN lower(useragent) LIKE '%safari%' THEN 'Safari' ELSE 'Other' END AS BROWSER",
+                    "Device":           "CASE WHEN useragent LIKE '%Windows%' OR useragent LIKE '%Macintosh%' THEN 'desktop' WHEN useragent LIKE '%Android%' OR useragent LIKE '%Mobi%' THEN 'phone' WHEN useragent LIKE '%iPad%' OR useragent LIKE '%Tablet%' THEN 'tablet' ELSE 'other' END AS DEVICE",
+                    "Random Integer":   "body[0]:siteTargeting:ri[0]::varchar AS RANDOM_INTEGER",
+                    "HB Price Buckets": "body[0]:slotTargeting:hb_pb[0]::varchar AS HB_PB",
+                    "HB Size":          "body[0]:slotTargeting:hb_size[0]::varchar AS HB_SIZE",
+                }
+            else:
+                base = get_main_delivery_query(
+                    table, start_str, end_str,
+                    message_filter, campaign_id,
+                    integration_filter, ad_format_filter
+                )
+                snippet_map = {
+                    "Integration SOV":"INTEGRATION",
+                    "Bidder":         "body[0]:slotTargeting:hb_bidder[0]::varchar AS HB_BIDDER",
+                    "Flex Bucket":    "bucket",
+                    "Device":         "CASE WHEN useragent LIKE '%Windows%' OR useragent LIKE '%Macintosh%' THEN 'desktop' WHEN useragent LIKE '%Android%' OR useragent LIKE '%Mobi%' THEN 'phone' WHEN useragent LIKE '%iPad%' OR useragent LIKE '%Tablet%' THEN 'tablet' ELSE 'other' END AS DEVICE",
+                    "Ad Unit Group":  "CASE WHEN split(body[0]['adUnitPath'],'/')[2]::varchar LIKE '%Outstream%' THEN 'Sticky_Outstream' WHEN split(body[0]['adUnitPath'],'/')[2]::varchar LIKE '%Video%' THEN 'Video' ELSE 'Other' END AS AD_UNIT_GROUP",
+                    "Refresh":        "body[0]:slotTargeting:refresh[0]::varchar AS REFRESH",
+                }
+            # 3b) Inject all selected dimension snippets, matching both lowercase & uppercase
+            select_snippets = [snippet_map[dim] for dim in to_plot]
+            dynamic_cte = (
+                base
+                .replace(
+                    "count(*) as CNT",
+                    f"count(*) as CNT, {', '.join(select_snippets)}"
+                )
+                .replace(
+                    "COUNT(*) AS CNT",
+                    f"COUNT(*) AS CNT, {', '.join(select_snippets)}"
+                )
+            )
+            # 3c) Build WHERE clauses from the filters
+            where_clauses = []
+            for dim, vals in filters.items():
+                alias = snippet_map[dim].split(" AS ")[-1]
+                val_list = ", ".join(f"'{v}'" for v in vals)
+                where_clauses.append(f"{alias} IN ({val_list})")
+            final_sql = (
+                f"SELECT *\n"
+                f"FROM (\n{dynamic_cte}\n) sub\n"
+                f"WHERE {' AND '.join(where_clauses)}"
+            )
+            # 3d) Execute & display
+            df_final = fetch_df(final_sql)
+            for dim, snippet in snippet_map.items():
+                alias = snippet.split(" AS ")[-1]   # e.g. "bucket", "BROWSER", etc.
+                # find the actual DataFrame column (which will be uppercase)
+                match = next((c for c in df_final.columns if c.upper() == alias.upper()), None)
+                if match:
+                    df_final.rename(columns={match: dim}, inplace=True)
+            # Build the minute‐precision datetime index
+            df_final["EST_DATETIME"] = (
+                pd.to_datetime(df_final["EST_DATE"]) +
+                pd.to_timedelta(df_final["EST_HOUR"], unit="h") +
+                pd.to_timedelta(df_final["EST_MINUTE"], unit="m")
+            )
+            st.subheader("Deep Dive Results")
+            st.dataframe(df_final)
+            # Build the Series column off your filtered dims
+            df_final["Series"] = (
+                df_final[list(filters.keys())]
+                  .astype(str)
+                  .agg(":".join, axis=1)
+            )
+            # Pivot on EST_DATETIME instead of EST_DATE
+            pivot = (
+                df_final
+                  .pivot_table(
+                      index="EST_DATETIME",       # ← minute‐level axis
+                      columns="Series",
+                      values="CNT",
+                      aggfunc="sum"
+                  )
+                  .fillna(0)
+                  .sort_index()
+            )
+            pivot.columns = [col.replace(":", "_") for col in pivot.columns]
+            pivot_df = (
+                pivot
+                .reset_index()
+                .melt(id_vars="EST_DATETIME", var_name="Series", value_name="CNT")
+            )
+            # Build an Altair line chart:
+            chart = (
+                alt.Chart(pivot_df)
+                   .mark_line(point=True)
+                   .encode(
+                      x=alt.X(
+                          "EST_DATETIME:T",
+                          axis=alt.Axis(
+                              title="Time (NY)",
+                              format="%H:%M",      # show hour:minute on the axis
+                              tickCount="hour"     # one tick per hour
+                          )
+                      ),
+                      y=alt.Y("CNT:Q", title="Count"),
+                      color=alt.Color("Series:N", title="Dimension"),
+                      tooltip=[
+                         alt.Tooltip("EST_DATETIME:T", title="Timestamp", format="%Y-%m-%d %H:%M"),
+                         alt.Tooltip("Series:N", title="Series"),
+                         alt.Tooltip("CNT:Q", title="Count"),
+                      ]
+                   )
+                   .properties(width=700, height=400)
+                   .interactive()  # allow pan/zoom
+            )
+            st.subheader("Deep Dive Trend")
+            st.altair_chart(chart, use_container_width=True)

changelog.md ADDED Viewed

	@@ -0,0 +1,5 @@

+# Changelog
+- 2025-08-07 19:58 UTC: Cast LineItem IDs to VARCHAR in delivery queries to avoid numeric conversion errors.
+- 2025-08-07 17:28 UTC: Quote table identifiers in queries to support hyphenated table names.
+- 2025-08-07 14:28 UTC: Initialized changelog to track project updates.

delivery_instructions.py ADDED Viewed

	@@ -0,0 +1,35 @@

+# instructions.py
+NEXT_STEPS_INSTRUCTIONS = """
+Flex Bucket:
+If a single flex bucket is flagged as having a delivery drop, that bucket is the most likely source of the issue.
+Check whether there was a recent deployment impacting that bucket—refer to the deployment time and bucket name in the flex_section message.
+Send the flagged flex bucket details, along with deployment context, to the Ad Code team for investigation.
+Include a hyperlink to the related Jira ticket.
+If multiple buckets are flagged, the issue may be shared among them. If most or all are impacted, flex buckets may not be the root cause.
+Bidder:
+If a single hb_bidder is flagged as having a delivery drop, it is likely the source of the issue.
+Check for any recent changes in GAM related to this bidder—this includes targeting changes, blocking rules, or budget issues.
+Send the flagged bidder information to the Rev Ops team for deeper investigation.
+The Ad Ops and Ad Code teams should also verify if there were any recent changes in GAM setup or ad code logic affecting bidder behavior.
+If most or all bidders are flagged, it’s likely the drop is not specific to a single bidder.
+Device:
+If a single device type is flagged (e.g., desktop, phone, tablet), the issue is likely related to that device category.
+Investigate whether there were recent front-end or ad code changes that could be suppressing impressions on that device type.
+The Ad Code team should verify targeting and rendering conditions. The Ad Ops team should check for any targeting changes in GAM.
+If multiple or all device types are flagged, the issue may lie upstream, not within device-specific rendering or targeting logic.
+Ad Unit:
+If a single ad unit group (e.g., Sidebar, Content, Footer) is flagged, investigate whether recent changes affected the structure or availability of that unit.
+Escalate the findings to the Ad Code team. The Ad Ops team should check for any targeting changes in GAM.
+If most or all ad unit groups are flagged, the issue is less likely to be specific to a single ad unit and may be campaign- or integration-related.
+Refresh:
+If a single refresh value (e.g., 1, 2, 3...) is flagged, it may indicate a technical issue affecting impressions during specific refresh cycles.
+Investigate whether recent ad code changes modified refresh logic or behavior.
+Coordinate with the Ad Code team to confirm if affected refresh values correspond with known logic updates.
+The Ad Ops team should check for any targeting changes in GAM.
+If most or all refresh values are flagged, the issue likely lies outside of refresh logic, possibly within broader rendering or integration pipelines.
+"""

delivery_main.py ADDED Viewed

	@@ -0,0 +1,420 @@

+import streamlit as st
+import time
+import pandas as pd
+import plotly.express as px
+import snowflake.connector
+import base64
+from datetime import timedelta, datetime
+from cryptography.hazmat.primitives import serialization
+from cryptography.hazmat.backends import default_backend
+import concurrent.futures
+# Import SQL query functions
+from delivery_queries import (
+    get_main_query,
+    get_main_int_sov_query,
+    get_bidder_query,
+    get_flex_bucket_query,
+    get_device_query,
+    get_ad_unit_query,
+    get_refresh_query,
+)
+from delivery_section_utils import update_section_generic_drop
+# Import the NEXT_STEPS_INSTRUCTIONS for delivery drops
+from delivery_instructions import NEXT_STEPS_INSTRUCTIONS
+# Initialize session state
+st.session_state.setdefault("query_run", False)
+st.session_state.setdefault("findings_messages", [])
+st.session_state.setdefault("query_df", None)
+st.session_state.setdefault("agg_df", None)
+st.session_state.setdefault("top_level_drop_time", None)
+st.session_state.setdefault("key_findings_output", None)
+@st.cache_data(show_spinner=False)
+def cached_run_query(
+    query,
+    private_key_b64: str,
+    user: str,
+    account_identifier: str,
+    warehouse: str,
+    database: str,
+    schema: str,
+    role: str,
+):
+    """Run a Snowflake query and return a DataFrame."""
+    der = base64.b64decode(private_key_b64)
+    conn = snowflake.connector.connect(
+        user=user,
+        account=account_identifier,
+        warehouse=warehouse,
+        database=database,
+        schema=schema,
+        role=role,
+        private_key=der,
+    )
+    cs = conn.cursor()
+    cs.execute("ALTER SESSION SET STATEMENT_TIMEOUT_IN_SECONDS = 1800")
+    cs.execute(query)
+    rows = cs.fetchall()
+    cols = [c[0] for c in cs.description]
+    df = pd.DataFrame(rows, columns=cols)
+    cs.close()
+    conn.close()
+    return df
+def run_drop_query(
+    table,
+    start_datetime,
+    end_datetime,
+    message_filter,
+    campaign_id,
+    private_key_str,
+    user,
+    account_identifier,
+    warehouse,
+    database,
+    schema,
+    role,
+    client,
+    integration_filter=None,
+    ad_format_filter=None,
+):
+    """
+    Universal drop analysis for any Integration + Ad_Format.
+    """
+    # 1) Build SQL statements with filters
+    main_sql = get_main_query(
+        table,
+        start_datetime,
+        end_datetime,
+        message_filter,
+        campaign_id,
+        integration_filter,
+        ad_format_filter,
+    )
+    flex_sql = get_flex_bucket_query(
+        table,
+        start_datetime,
+        end_datetime,
+        message_filter,
+        campaign_id,
+        integration_filter,
+        ad_format_filter,
+    )
+    bidder_sql = get_bidder_query(
+        table,
+        start_datetime,
+        end_datetime,
+        message_filter,
+        campaign_id,
+        integration_filter,
+        ad_format_filter,
+    )
+    device_sql = get_device_query(
+        table,
+        start_datetime,
+        end_datetime,
+        message_filter,
+        campaign_id,
+        integration_filter,
+        ad_format_filter,
+    )
+    ad_unit_sql = get_ad_unit_query(
+        table,
+        start_datetime,
+        end_datetime,
+        message_filter,
+        campaign_id,
+        integration_filter,
+        ad_format_filter,
+    )
+    refresh_sql = get_refresh_query(
+        table,
+        start_datetime,
+        end_datetime,
+        message_filter,
+        campaign_id,
+        integration_filter,
+        ad_format_filter,
+    )
+    # 2) Run top-level query once
+    if not st.session_state["query_run"]:
+        try:
+            t0 = time.time()
+            with st.spinner("Running top-level impressions query..."):
+                df = cached_run_query(
+                    main_sql,
+                    private_key_str,
+                    user,
+                    account_identifier,
+                    warehouse,
+                    database,
+                    schema,
+                    role,
+                )
+            elapsed = time.time() - t0
+            mins, secs = divmod(elapsed, 60)
+            st.info(f"Query ran in {int(mins)}m {secs:.2f}s")
+            # Normalize timestamps
+            df.columns = [c.upper() for c in df.columns]
+            df = df.sort_values(["EST_HOUR", "EST_MINUTE"])
+            df["timestamp"] = pd.to_datetime(
+                df["EST_DATE"].astype(str)
+                + " "
+                + df["EST_HOUR"].astype(str).str.zfill(2)
+                + ":"
+                + df["EST_MINUTE"].astype(str).str.zfill(2)
+            )
+            df["5min"] = df["timestamp"].dt.floor("5T")
+            base_date = (
+                df[df["TIMEFRAME"] == "TODAY"]["5min"].iloc[0].normalize()
+                if not df[df["TIMEFRAME"] == "TODAY"].empty
+                else pd.Timestamp("today").normalize()
+            )
+            start_hour = int(st.session_state.get("start_hour", 23))
+            def norm(ts):
+                return ts + pd.Timedelta(hours=24) if ts.hour < start_hour else ts
+            df["normalized_time"] = (
+                base_date + (df["5min"] - df["5min"].dt.normalize())
+            ).apply(norm)
+            # Aggregate
+            agg_df = df.groupby(["TIMEFRAME", "normalized_time"], as_index=False)[
+                "CNT"
+            ].sum()
+            # Save to state
+            st.session_state.update(
+                query_df=df, agg_df=agg_df, query_run=True, top_level_drop_time=None
+            )
+        except Exception as e:
+            st.error(f"Main query error: {e}")
+            return
+    else:
+        df = st.session_state["query_df"]
+        agg_df = st.session_state["agg_df"]
+    # 3) Display top-level
+    st.header("Top-Level Impressions Data")
+    drop_time = None
+    for ts in sorted(agg_df["normalized_time"].unique()):
+        today_cnt = agg_df[
+            (agg_df["normalized_time"] == ts) & (agg_df["TIMEFRAME"] == "TODAY")
+        ]["CNT"]
+        other_cnt = agg_df[
+            (agg_df["normalized_time"] == ts) & (agg_df["TIMEFRAME"] != "TODAY")
+        ]["CNT"]
+        if (
+            not today_cnt.empty
+            and not other_cnt.empty
+            and today_cnt.values[0] <= 0.9 * other_cnt.mean()
+        ):
+            drop_time = ts
+            break
+    if drop_time:
+        msg = f"Top-Level: Delivery drop detected at {drop_time.strftime('%I:%M %p')}."
+        st.warning(msg)
+    else:
+        msg = "Top-Level: No significant delivery drop detected."
+        st.info(msg)
+    # Append message once
+    findings_messages = st.session_state.setdefault("findings_messages", [])
+    if msg not in findings_messages:
+        findings_messages.append(msg)
+    st.session_state["top_level_drop_time"] = drop_time
+    with st.expander("Raw Data"):
+        st.dataframe(df)
+    with st.expander("Aggregated Data"):
+        st.dataframe(agg_df)
+    fig = px.line(
+        agg_df,
+        x="normalized_time",
+        y="CNT",
+        color="TIMEFRAME",
+        labels={"normalized_time": "Time of Day", "CNT": "Impressions"},
+    )
+    fig.update_xaxes(tickformat="%I:%M %p")
+    st.plotly_chart(fig, use_container_width=True)
+    # 4) Share-of-Voice
+    st.markdown("<hr>", unsafe_allow_html=True)
+    st.header("Share of Voice Analysis")
+    sov_sql = get_main_int_sov_query(
+        table,
+        start_datetime,
+        end_datetime,
+        message_filter,
+        campaign_id,
+        ad_format_filter=ad_format_filter,
+    )
+    try:
+        with st.spinner("Running SOV query..."):
+            sov_df = cached_run_query(
+                sov_sql,
+                private_key_str,
+                user,
+                account_identifier,
+                warehouse,
+                database,
+                schema,
+                role,
+            )
+        # Normalize same as above
+        sov_df["timestamp"] = pd.to_datetime(
+            sov_df["EST_DATE"].astype(str)
+            + " "
+            + sov_df["EST_HOUR"].astype(str).str.zfill(2)
+            + ":"
+            + sov_df["EST_MINUTE"].astype(str).str.zfill(2)
+        )
+        sov_df["5min"] = sov_df["timestamp"].dt.floor("5T")
+        base = pd.Timestamp("today").normalize()
+        sov_df["normalized_time"] = (
+            base + (sov_df["5min"] - sov_df["5min"].dt.normalize())
+        ).apply(lambda ts: ts + pd.Timedelta(hours=24) if ts.hour < start_hour else ts)
+        # Group, exclude, percent, order
+        sov_grp = sov_df.groupby(["normalized_time", "INTEGRATION"], as_index=False)[
+            "CNT"
+        ].sum()
+        sov_grp = sov_grp[~sov_grp["INTEGRATION"].str.contains("Ignore|Affiliate|PG")]
+        sov_grp["share"] = sov_grp["CNT"] / sov_grp.groupby("normalized_time")[
+            "CNT"
+        ].transform("sum")
+        order = (
+            sov_grp.groupby("INTEGRATION")["share"]
+            .sum()
+            .sort_values(ascending=False)
+            .index.tolist()
+        )
+        fig2 = px.line(
+            sov_grp,
+            x="normalized_time",
+            y="share",
+            color="INTEGRATION",
+            category_orders={"INTEGRATION": order},
+            labels={"share": "Share of Total Impressions"},
+        )
+        fig2.update_xaxes(tickformat="%I:%M %p")
+        fig2.update_yaxes(tickformat=".2%")
+        st.plotly_chart(fig2, use_container_width=True)
+    except Exception as e:
+        st.error(f"SOV error: {e}")
+    # 5) Key Findings via OpenAI  <-- CUT starts here
+    st.markdown("<hr>", unsafe_allow_html=True)
+    st.header("Key Findings and Next Steps")
+    key_findings_container = st.container()
+    with key_findings_container:
+        if st.session_state.get("key_findings_output"):
+            st.markdown(
+                st.session_state.get("key_findings_output"),
+                unsafe_allow_html=True,
+            )
+        else:
+            st.info(
+                "Key findings will appear here once additional queries have finished."
+            )
+    def generate_key_findings_callback():
+        findings = "\n".join(st.session_state.get("findings_messages", []))
+        flex_jira_info = st.session_state.get("flex_jira_info", "")
+        jira_section = (
+            f"\nJira Ticket Information from Flex Bucket section:\n{flex_jira_info}\n"
+            if flex_jira_info
+            else ""
+        )
+        prompt = (
+            "You are a helpful analyst investigating a drop in ad delivery. "
+            "A delivery drop detection dashboard has compiled a list of findings "
+            "showing potential drops across different dimensions. Below are the detailed findings "
+            "from the dashboard, along with any flagged Jira ticket information. "
+            "The NEXT_STEPS_INSTRUCTIONS file contains recommended next steps for each section "
+            "depending on the drop(s) flagged in the dashboard:\n\n"
+            f"Findings:\n{findings}\n"
+            f"{jira_section}\n"
+            "Next Steps Instructions:\n"
+            f"{NEXT_STEPS_INSTRUCTIONS}\n\n"
+            "Using the Findings, Jira section information, and Next Steps Instructions as helpful context, "
+            "create a concise summary that identifies the likely cause/causes of any detected delivery drops "
+            "and recommends actionable next steps. The summary should be a few sentences long followed by bullet points "
+            "with the main findings and recommended next steps. Please output the summary in Markdown format with each bullet "
+            "point on a new line, and indent sub-bullets properly. Ensure that each bullet point is on its own line. "
+            "There is no need to explicitly mention the Instructions file in the summary; just use it to inform your analysis."
+        )
+        st.session_state["key_findings"] = prompt
+        try:
+            response = client.responses.create(
+                model="o3-mini",
+                instructions="You are a helpful analyst who provides insights and recommends next steps.",
+                input=prompt,
+            )
+            st.session_state["key_findings_output"] = response.output_text.strip()
+        except Exception as e:
+            st.session_state["key_findings_output"] = f"Error calling OpenAI API: {e}"
+    # Once additional queries complete (below), automatically generate key findings:
+    generate_key_findings_callback()
+    # 6) Breakdown dimensions
+    st.markdown("<hr>", unsafe_allow_html=True)
+    st.header("Specific Dimensions Data")
+    st.info("Running breakdown queries...")
+    queries = {
+        "flex_bucket": flex_sql,
+        "bidder": bidder_sql,
+        "device": device_sql,
+        "ad_unit": ad_unit_sql,
+        "refresh": refresh_sql,
+    }
+    with st.spinner("Running additional queries..."):
+        with concurrent.futures.ThreadPoolExecutor() as ex:
+            futures = {
+                k: ex.submit(
+                    cached_run_query,
+                    q,
+                    private_key_str,
+                    user,
+                    account_identifier,
+                    warehouse,
+                    database,
+                    schema,
+                    role,
+                )
+                for k, q in queries.items()
+            }
+            start_ts = {k: time.time() for k in queries}
+            conts = {k: st.container() for k in queries}
+            while futures:
+                done, _ = concurrent.futures.wait(
+                    futures.values(),
+                    timeout=0.5,
+                    return_when=concurrent.futures.FIRST_COMPLETED,
+                )
+                for fut in done:
+                    key = next(k for k, v in futures.items() if v is fut)
+                    df_res = fut.result()
+                    update_section_generic_drop(
+                        key, df_res, start_ts, conts[key], drop_time
+                    )
+                    del futures[key]
+    # Update the key findings container with the new output.
+    with key_findings_container:
+        st.markdown(
+            st.session_state.get("key_findings_output", ""),
+            unsafe_allow_html=True,
+        )

delivery_queries.py ADDED Viewed

	@@ -0,0 +1,466 @@

+import re
+def _quote_identifier(identifier: str) -> str:
+    """Quote SQL identifiers that contain special characters."""
+    def quote_part(part: str) -> str:
+        if re.match(r"^[A-Za-z_][A-Za-z0-9_]*$", part):
+            return part
+        return f'"{part}"'
+    return ".".join(quote_part(p) for p in identifier.split("."))
+def get_main_query(
+    table,
+    start_datetime,
+    end_datetime,
+    message_filter,
+    campaign_id,
+    integration_filter=None,
+    ad_format_filter=None,
+):
+    """Returns the main impression count query filtered by integration and ad format."""
+    table = _quote_identifier(table)
+    # Build optional filters to apply after the CTE union.
+    # Filtering on the calculated aliases (Integration/Ad_Format)
+    # in the CTE `WHERE` clause would force Snowflake to interpret
+    # those names as existing columns and attempt type coercion,
+    # which triggered errors like:
+    #   Numeric value 'bciq1rts' is not recognized
+    # Instead we apply the filters on the outer SELECT where the
+    # aliases are available.
+    post_union_filter = ""
+    if integration_filter:
+        post_union_filter += f" AND Integration = '{integration_filter}'"
+    if ad_format_filter:
+        post_union_filter += f" AND Ad_Format = '{ad_format_filter}'"
+    return f"""
+    WITH today AS (
+      SELECT
+        to_date(convert_timezone('UTC','America/New_York',datetime)) AS EST_DATE,
+        extract(hour FROM convert_timezone('UTC','America/New_York',datetime)) AS EST_HOUR,
+        extract(minute FROM convert_timezone('UTC','America/New_York',datetime)) AS EST_MINUTE,
+        CASE
+            WHEN body[0]:yieldGroupIds[0]::varchar IN ('397722') THEN 'HBT_OB'
+            WHEN body[0]:campaignId::varchar = '2204701358' THEN 'House'
+            WHEN b.name LIKE '%Prebid%' THEN 'Prebid'
+            WHEN b.name LIKE '%TAM%' OR b.name LIKE '%Amazon%' THEN 'TAM'
+            WHEN b.name LIKE '%AdX%' THEN 'AdX'
+            WHEN len(body[0]:lineItemId::varchar) > 10 THEN 'AdX'
+            WHEN c.name LIKE '%TAM%' OR c.name LIKE '%Amazon%' THEN 'TAM'
+            WHEN c.name LIKE '%AdX%' THEN 'AdX'
+            WHEN c.name LIKE '%CS%' OR c.name LIKE '%S2S%' THEN 'Prebid'
+            WHEN b.name LIKE '39_%_%' THEN 'Direct'
+            WHEN b.name LIKE '38_%_%' THEN 'Direct'
+            WHEN b.name LIKE '8_%_%_%' AND b.name LIKE '%IX%' THEN 'Prebid'
+            WHEN b.name LIKE '8_%_%_%' THEN 'Ignore - House, Test, Pub Deal'
+            WHEN b.name LIKE '7_%_%_%' THEN 'PG'
+            WHEN b.name LIKE '5_%_%_%' THEN 'PG'
+            WHEN LEFT(b.name,1) = '4'
+              AND RIGHT(LEFT(b.name,2),1) BETWEEN '0' AND '9'
+              AND RIGHT(LEFT(b.name,3),1) BETWEEN '0' AND '9'
+              AND RIGHT(LEFT(b.name,4),1) BETWEEN '0' AND '9'
+              AND RIGHT(LEFT(b.name,5),1) = '_' THEN 'Affiliate'
+            WHEN b.name LIKE '0_%_%_%' THEN 'Ignore'
+            WHEN (body[0]:campaignId IS NULL
+                  AND body[0]:slotTargeting:hb_bidder[0]::varchar IS NOT NULL)
+              THEN 'Prebid'
+            WHEN body[0]:companyIds IS NOT NULL THEN 'OB'
+            WHEN c.id IS NOT NULL THEN 'Prebid'
+            ELSE 'OB'
+        END AS Integration,
+        CASE
+            WHEN split(body[0]['adUnitPath'],'/')[2]::varchar LIKE '%Outstream%' THEN 'Display'
+            WHEN split(body[0]['adUnitPath'],'/')[2]::varchar LIKE '%Video%' THEN 'Video'
+            ELSE 'Display'
+        END AS Ad_Format,
+        COUNT(*) AS CNT,
+        'Today' AS timeframe
+      FROM {table} a
+      LEFT JOIN ANALYTICS.GAM360.ORDERS b
+        ON a.body[0]:campaignId::VARCHAR = b.ID::VARCHAR
+      LEFT JOIN ANALYTICS.GAM360.LINEITEM c
+        ON c.id::VARCHAR = a.body[0]:lineItemId::VARCHAR
+      WHERE convert_timezone('UTC','America/New_York',datetime)
+        BETWEEN '{start_datetime}' AND '{end_datetime}'
+        AND message = 'SlotRenderEnded::adImpression'
+      GROUP BY ALL
+    ),
+    prev1 AS (
+      -- 1 Week Ago
+      SELECT
+        to_date(convert_timezone('UTC','America/New_York',datetime)) AS EST_DATE,
+        extract(hour FROM convert_timezone('UTC','America/New_York',datetime)) AS EST_HOUR,
+        extract(minute FROM convert_timezone('UTC','America/New_York',datetime)) AS EST_MINUTE,
+        CASE
+            WHEN body[0]:yieldGroupIds[0]::varchar IN ('397722') THEN 'HBT_OB'
+            WHEN body[0]:campaignId::varchar = '2204701358' THEN 'House'
+            WHEN b.name LIKE '%Prebid%' THEN 'Prebid'
+            WHEN b.name LIKE '%TAM%' OR b.name LIKE '%Amazon%' THEN 'TAM'
+            WHEN b.name LIKE '%AdX%' THEN 'AdX'
+            WHEN len(body[0]:lineItemId::varchar) > 10 THEN 'AdX'
+            WHEN c.name LIKE '%TAM%' OR c.name LIKE '%Amazon%' THEN 'TAM'
+            WHEN c.name LIKE '%AdX%' THEN 'AdX'
+            WHEN c.name LIKE '%CS%' OR c.name LIKE '%S2S%' THEN 'Prebid'
+            WHEN b.name LIKE '39_%_%' THEN 'Direct'
+            WHEN b.name LIKE '38_%_%' THEN 'Direct'
+            WHEN b.name LIKE '8_%_%_%' AND b.name LIKE '%IX%' THEN 'Prebid'
+            WHEN b.name LIKE '8_%_%_%' THEN 'Ignore - House, Test, Pub Deal'
+            WHEN b.name LIKE '7_%_%_%' THEN 'PG'
+            WHEN b.name LIKE '5_%_%_%' THEN 'PG'
+            WHEN LEFT(b.name,1) = '4'
+              AND RIGHT(LEFT(b.name,2),1) BETWEEN '0' AND '9'
+              AND RIGHT(LEFT(b.name,3),1) BETWEEN '0' AND '9'
+              AND RIGHT(LEFT(b.name,4),1) BETWEEN '0' AND '9'
+              AND RIGHT(LEFT(b.name,5),1) = '_' THEN 'Affiliate'
+            WHEN b.name LIKE '0_%_%_%' THEN 'Ignore'
+            WHEN (body[0]:campaignId IS NULL
+                  AND body[0]:slotTargeting:hb_bidder[0]::varchar IS NOT NULL)
+              THEN 'Prebid'
+            WHEN body[0]:companyIds IS NOT NULL THEN 'OB'
+            WHEN c.id IS NOT NULL THEN 'Prebid'
+            ELSE 'OB'
+        END AS Integration,
+        CASE
+            WHEN split(body[0]['adUnitPath'],'/')[2]::varchar LIKE '%Outstream%' THEN 'Display'
+            WHEN split(body[0]['adUnitPath'],'/')[2]::varchar LIKE '%Video%' THEN 'Video'
+            ELSE 'Display'
+        END AS Ad_Format,
+        COUNT(*) AS CNT,
+        '1 Week Ago' AS timeframe
+      FROM {table} a
+      LEFT JOIN ANALYTICS.GAM360.ORDERS b
+        ON a.body[0]:campaignId::VARCHAR = b.ID::VARCHAR
+      LEFT JOIN ANALYTICS.GAM360.LINEITEM c
+        ON c.id::VARCHAR = a.body[0]:lineItemId::VARCHAR
+      WHERE convert_timezone('UTC','America/New_York',datetime)
+        BETWEEN dateadd(DAY,-7,'{start_datetime}') AND dateadd(DAY,-7,'{end_datetime}')
+        AND message = 'SlotRenderEnded::adImpression'
+      GROUP BY ALL
+    ),
+    prev2 AS (
+      -- 2 Weeks Ago
+      SELECT
+        to_date(convert_timezone('UTC','America/New_York',datetime)) AS EST_DATE,
+        extract(hour FROM convert_timezone('UTC','America/New_York',datetime)) AS EST_HOUR,
+        extract(minute FROM convert_timezone('UTC','America/New_York',datetime)) AS EST_MINUTE,
+        CASE
+            WHEN body[0]:yieldGroupIds[0]::varchar IN ('397722') THEN 'HBT_OB'
+            WHEN body[0]:campaignId::varchar = '2204701358' THEN 'House'
+            WHEN b.name LIKE '%Prebid%' THEN 'Prebid'
+            WHEN b.name LIKE '%TAM%' OR b.name LIKE '%Amazon%' THEN 'TAM'
+            WHEN b.name LIKE '%AdX%' THEN 'AdX'
+            WHEN len(body[0]:lineItemId::varchar) > 10 THEN 'AdX'
+            WHEN c.name LIKE '%TAM%' OR c.name LIKE '%Amazon%' THEN 'TAM'
+            WHEN c.name LIKE '%AdX%' THEN 'AdX'
+            WHEN c.name LIKE '%CS%' OR c.name LIKE '%S2S%' THEN 'Prebid'
+            WHEN b.name LIKE '39_%_%' THEN 'Direct'
+            WHEN b.name LIKE '38_%_%' THEN 'Direct'
+            WHEN b.name LIKE '8_%_%_%' AND b.name LIKE '%IX%' THEN 'Prebid'
+            WHEN b.name LIKE '8_%_%_%' THEN 'Ignore - House, Test, Pub Deal'
+            WHEN b.name LIKE '7_%_%_%' THEN 'PG'
+            WHEN b.name LIKE '5_%_%_%' THEN 'PG'
+            WHEN LEFT(b.name,1) = '4'
+              AND RIGHT(LEFT(b.name,2),1) BETWEEN '0' AND '9'
+              AND RIGHT(LEFT(b.name,3),1) BETWEEN '0' AND '9'
+              AND RIGHT(LEFT(b.name,4),1) BETWEEN '0' AND '9'
+              AND RIGHT(LEFT(b.name,5),1) = '_' THEN 'Affiliate'
+            WHEN b.name LIKE '0_%_%_%' THEN 'Ignore'
+            WHEN (body[0]:campaignId IS NULL
+                  AND body[0]:slotTargeting:hb_bidder[0]::varchar IS NOT NULL)
+              THEN 'Prebid'
+            WHEN body[0]:companyIds IS NOT NULL THEN 'OB'
+            WHEN c.id IS NOT NULL THEN 'Prebid'
+            ELSE 'OB'
+        END AS Integration,
+        CASE
+            WHEN split(body[0]['adUnitPath'],'/')[2]::varchar LIKE '%Outstream%' THEN 'Display'
+            WHEN split(body[0]['adUnitPath'],'/')[2]::varchar LIKE '%Video%' THEN 'Video'
+            ELSE 'Display'
+        END AS Ad_Format,
+        COUNT(*) AS CNT,
+        '2 Weeks Ago' AS timeframe
+      FROM {table} a
+      LEFT JOIN ANALYTICS.GAM360.ORDERS b
+        ON a.body[0]:campaignId::VARCHAR = b.ID::VARCHAR
+      LEFT JOIN ANALYTICS.GAM360.LINEITEM c
+        ON c.id::VARCHAR = a.body[0]:lineItemId::VARCHAR
+      WHERE convert_timezone('UTC','America/New_York',datetime)
+        BETWEEN dateadd(DAY,-14,'{start_datetime}')
+            AND dateadd(DAY,-14,'{end_datetime}')
+        AND message = 'SlotRenderEnded::adImpression'
+      GROUP BY ALL
+    ),
+    prev3 AS (
+      -- 3 Weeks Ago
+      SELECT
+        to_date(convert_timezone('UTC','America/New_York',datetime)) AS EST_DATE,
+        extract(hour FROM convert_timezone('UTC','America/New_York',datetime)) AS EST_HOUR,
+        extract(minute FROM convert_timezone('UTC','America/New_York',datetime)) AS EST_MINUTE,
+        CASE
+            WHEN body[0]:yieldGroupIds[0]::varchar IN ('397722') THEN 'HBT_OB'
+            WHEN body[0]:campaignId::varchar = '2204701358' THEN 'House'
+            WHEN b.name LIKE '%Prebid%' THEN 'Prebid'
+            WHEN b.name LIKE '%TAM%' OR b.name LIKE '%Amazon%' THEN 'TAM'
+            WHEN b.name LIKE '%AdX%' THEN 'AdX'
+            WHEN len(body[0]:lineItemId::varchar) > 10 THEN 'AdX'
+            WHEN c.name LIKE '%TAM%' OR c.name LIKE '%Amazon%' THEN 'TAM'
+            WHEN c.name LIKE '%AdX%' THEN 'AdX'
+            WHEN c.name LIKE '%CS%' OR c.name LIKE '%S2S%' THEN 'Prebid'
+            WHEN b.name LIKE '39_%_%' THEN 'Direct'
+            WHEN b.name LIKE '38_%_%' THEN 'Direct'
+            WHEN b.name LIKE '8_%_%_%' AND b.name LIKE '%IX%' THEN 'Prebid'
+            WHEN b.name LIKE '8_%_%_%' THEN 'Ignore - House, Test, Pub Deal'
+            WHEN b.name LIKE '7_%_%_%' THEN 'PG'
+            WHEN b.name LIKE '5_%_%_%' THEN 'PG'
+            WHEN LEFT(b.name,1) = '4'
+              AND RIGHT(LEFT(b.name,2),1) BETWEEN '0' AND '9'
+              AND RIGHT(LEFT(b.name,3),1) BETWEEN '0' AND '9'
+              AND RIGHT(LEFT(b.name,4),1) BETWEEN '0' AND '9'
+              AND RIGHT(LEFT(b.name,5),1) = '_' THEN 'Affiliate'
+            WHEN b.name LIKE '0_%_%_%' THEN 'Ignore'
+            WHEN (body[0]:campaignId IS NULL
+                  AND body[0]:slotTargeting:hb_bidder[0]::varchar IS NOT NULL)
+              THEN 'Prebid'
+            WHEN body[0]:companyIds IS NOT NULL THEN 'OB'
+            WHEN c.id IS NOT NULL THEN 'Prebid'
+            ELSE 'OB'
+        END AS Integration,
+        CASE
+            WHEN split(body[0]['adUnitPath'],'/')[2]::varchar LIKE '%Outstream%' THEN 'Display'
+            WHEN split(body[0]['adUnitPath'],'/')[2]::varchar LIKE '%Video%' THEN 'Video'
+            ELSE 'Display'
+        END AS Ad_Format,
+        COUNT(*) AS CNT,
+        '3 Weeks Ago' AS timeframe
+      FROM {table} a
+      LEFT JOIN ANALYTICS.GAM360.ORDERS b
+        ON a.body[0]:campaignId::VARCHAR = b.ID::VARCHAR
+      LEFT JOIN ANALYTICS.GAM360.LINEITEM c
+        ON c.id::VARCHAR = a.body[0]:lineItemId::VARCHAR
+      WHERE convert_timezone('UTC','America/New_York',datetime)
+        BETWEEN dateadd(DAY,-21,'{start_datetime}')
+            AND dateadd(DAY,-21,'{end_datetime}')
+        AND message = 'SlotRenderEnded::adImpression'
+      GROUP BY ALL
+    )
+    SELECT * FROM (
+        SELECT * FROM today
+        UNION ALL SELECT * FROM prev1
+        UNION ALL SELECT * FROM prev2
+        UNION ALL SELECT * FROM prev3
+    )
+    WHERE 1=1 {post_union_filter}
+    """
+def get_bidder_query(
+    table,
+    start_datetime,
+    end_datetime,
+    message_filter,
+    campaign_id,
+    integration_filter=None,
+    ad_format_filter=None,
+):
+    base = get_main_query(
+        table,
+        start_datetime,
+        end_datetime,
+        message_filter,
+        campaign_id,
+        integration_filter,
+        ad_format_filter,
+    )
+    # inject hb_bidder field
+    return base.replace(
+        "COUNT(*) AS CNT",
+        "COUNT(*) AS CNT, body[0]:slotTargeting:hb_bidder[0]::varchar AS hb_bidder",
+    )
+def get_flex_bucket_query(
+    table,
+    start_datetime,
+    end_datetime,
+    message_filter,
+    campaign_id,
+    integration_filter=None,
+    ad_format_filter=None,
+):
+    base = get_main_query(
+        table,
+        start_datetime,
+        end_datetime,
+        message_filter,
+        campaign_id,
+        integration_filter,
+        ad_format_filter,
+    )
+    return base.replace("COUNT(*) AS CNT", "COUNT(*) AS CNT, bucket")
+def get_device_query(
+    table,
+    start_datetime,
+    end_datetime,
+    message_filter,
+    campaign_id,
+    integration_filter=None,
+    ad_format_filter=None,
+):
+    base = get_main_query(
+        table,
+        start_datetime,
+        end_datetime,
+        message_filter,
+        campaign_id,
+        integration_filter,
+        ad_format_filter,
+    )
+    # inject device case
+    device_case = (
+        "CASE "
+        "WHEN useragent LIKE '%iPad%' OR useragent LIKE '%Tablet%' THEN 'tablet' "
+        "WHEN useragent LIKE '%Windows%' OR useragent LIKE '%Macintosh%' THEN 'desktop' "
+        "WHEN useragent LIKE '%Android%' OR useragent LIKE '%iPhone%' OR useragent LIKE '%Mobi%' THEN 'phone' "
+        "ELSE 'other' END AS device"
+    )
+    return base.replace("COUNT(*) AS CNT", f"COUNT(*) AS CNT, {device_case}")
+def get_ad_unit_query(
+    table,
+    start_datetime,
+    end_datetime,
+    message_filter,
+    campaign_id,
+    integration_filter=None,
+    ad_format_filter=None,
+):
+    base = get_main_query(
+        table,
+        start_datetime,
+        end_datetime,
+        message_filter,
+        campaign_id,
+        integration_filter,
+        ad_format_filter,
+    )
+    ad_unit_case = (
+        "CASE "
+        "WHEN body[0]:slotElementId::varchar LIKE '%Content%' THEN 'Content' "
+        "WHEN body[0]:slotElementId::varchar LIKE '%Footer%' THEN 'Footer' "
+        "WHEN body[0]:slotElementId::varchar LIKE '%Recipe%' THEN 'Recipe' "
+        "WHEN body[0]:slotElementId::varchar LIKE '%Sidebar%' THEN 'Sidebar' "
+        "WHEN body[0]:slotElementId::varchar LIKE '%Header%' THEN 'Header' "
+        "WHEN body[0]:slotElementId::varchar LIKE '%Below_Post%' THEN 'Below_Post' "
+        "WHEN body[0]:slotElementId::varchar LIKE '%Outstream%' THEN 'Sticky Outstream' "
+        "WHEN body[0]:slotElementId::varchar LIKE '%Video%' THEN 'Video' "
+        "ELSE 'Other' END AS ad_unit_group"
+    )
+    return base.replace("COUNT(*) AS CNT", f"COUNT(*) AS CNT, {ad_unit_case}")
+def get_refresh_query(
+    table,
+    start_datetime,
+    end_datetime,
+    message_filter,
+    campaign_id,
+    integration_filter=None,
+    ad_format_filter=None,
+):
+    base = get_main_query(
+        table,
+        start_datetime,
+        end_datetime,
+        message_filter,
+        campaign_id,
+        integration_filter,
+        ad_format_filter,
+    )
+    refresh_field = "body[0]:slotTargeting:refresh[0]::varchar AS Refresh"
+    return base.replace("COUNT(*) AS CNT", f"COUNT(*) AS CNT, {refresh_field}")
+def get_main_int_sov_query(
+    table,
+    start_datetime,
+    end_datetime,
+    message_filter,
+    campaign_id,
+    # integration_filter no longer used for SOV
+    ad_format_filter=None,
+):
+    """
+    Returns the share-of-voice query filtered only by ad format.
+    """
+    table = _quote_identifier(table)
+    # Only apply Ad_Format filtering after the CTE so that the alias
+    # can be referenced safely.
+    post_union_filter = ""
+    if ad_format_filter:
+        post_union_filter = f" AND Ad_Format = '{ad_format_filter}'"
+    return f"""
+    WITH today AS (
+      SELECT
+        to_date(convert_timezone('UTC','America/New_York',datetime)) AS EST_DATE,
+        extract(hour FROM convert_timezone('UTC','America/New_York',datetime)) AS EST_HOUR,
+        extract(minute FROM convert_timezone('UTC','America/New_York',datetime)) AS EST_MINUTE,
+        CASE
+            WHEN body[0]:yieldGroupIds[0]::varchar IN ('397722') THEN 'HBT_OB'
+            WHEN body[0]:campaignId::varchar = '2204701358' THEN 'House'
+            WHEN b.name LIKE '%Prebid%' THEN 'Prebid'
+            WHEN b.name LIKE '%TAM%' OR b.name LIKE '%Amazon%' THEN 'TAM'
+            WHEN b.name LIKE '%AdX%' THEN 'AdX'
+            WHEN len(body[0]:lineItemId::varchar) > 10 THEN 'AdX'
+            WHEN c.name LIKE '%TAM%' OR c.name LIKE '%Amazon%' THEN 'TAM'
+            WHEN c.name LIKE '%AdX%' THEN 'AdX'
+            WHEN c.name LIKE '%CS%' OR c.name LIKE '%S2S%' THEN 'Prebid'
+            WHEN b.name LIKE '39_%_%' THEN 'Direct'
+            WHEN b.name LIKE '38_%_%' THEN 'Direct'
+            WHEN b.name LIKE '8_%_%_%' AND b.name LIKE '%IX%' THEN 'Prebid'
+            WHEN b.name LIKE '8_%_%_%' THEN 'Ignore - House, Test, Pub Deal'
+            WHEN b.name LIKE '7_%_%_%' THEN 'PG'
+            WHEN b.name LIKE '5_%_%_%' THEN 'PG'
+            WHEN LEFT(b.name,1) = '4'
+              AND RIGHT(LEFT(b.name,2),1) BETWEEN '0' AND '9'
+              AND RIGHT(LEFT(b.name,3),1) BETWEEN '0' AND '9'
+              AND RIGHT(LEFT(b.name,4),1) BETWEEN '0' AND '9'
+              AND RIGHT(LEFT(b.name,5),1) = '_' THEN 'Affiliate'
+            WHEN b.name LIKE '0_%_%_%' THEN 'Ignore'
+            WHEN (body[0]:campaignId IS NULL
+                  AND body[0]:slotTargeting:hb_bidder[0]::varchar IS NOT NULL)
+              THEN 'Prebid'
+            WHEN body[0]:companyIds IS NOT NULL THEN 'OB'
+            WHEN c.id IS NOT NULL THEN 'Prebid'
+            ELSE 'OB'
+        END AS Integration,
+        CASE
+            WHEN split(body[0]['adUnitPath'],'/')[2]::varchar LIKE '%Outstream%' THEN 'Display'
+            WHEN split(body[0]['adUnitPath'],'/')[2]::varchar LIKE '%Video%' THEN 'Video'
+            ELSE 'Display'
+        END AS Ad_Format,
+        COUNT(*) AS CNT,
+        'Today' AS timeframe
+      FROM {table} a
+      LEFT JOIN ANALYTICS.GAM360.ORDERS b
+        ON a.body[0]:campaignId::VARCHAR = b.ID::VARCHAR
+      LEFT JOIN ANALYTICS.GAM360.LINEITEM c
+        ON c.id::VARCHAR = a.body[0]:lineItemId::VARCHAR
+      WHERE convert_timezone('UTC','America/New_York',datetime)
+        BETWEEN '{start_datetime}' AND '{end_datetime}'
+        AND message = 'SlotRenderEnded::adImpression'
+      GROUP BY ALL
+    )
+    SELECT * FROM today
+    WHERE 1=1 {post_union_filter}
+    """

delivery_section_utils.py ADDED Viewed

	@@ -0,0 +1,103 @@

+import time
+import pandas as pd
+import plotly.express as px
+import streamlit as st
+# Map the section keys (space‐separated) to the DataFrame column to group by.
+SECTION_CONFIG = {
+    "flex bucket": {
+        "group_col": "BUCKET",
+        "drop_percent": 0.10
+    },
+    "bidder": {
+        "group_col": "HB_BIDDER",
+        "drop_percent": 0.10
+    },
+    "device": {
+        "group_col": "DEVICE",
+        "drop_percent": 0.10
+    },
+    "ad unit": {
+        "group_col": "AD_UNIT_GROUP",
+        "drop_percent": 0.10
+    },
+    "refresh": {
+        "group_col": "REFRESH",
+        "drop_percent": 0.10
+    },
+}
+def update_section_generic_drop(key, df, start_times, container, drop_time):
+    """
+    A generic 5‑minute breakdown with drop detection.
+    'key' can be 'flex_bucket' or 'flex bucket' (we normalize it).
+    """
+    elapsed = time.time() - start_times[key]
+    mins, secs = divmod(elapsed, 60)
+    # Standardize column names & build timestamp
+    df.columns = [c.upper() for c in df.columns]
+    df = df.sort_values(["EST_HOUR", "EST_MINUTE"])
+    df["timestamp"] = pd.to_datetime(
+        df["EST_DATE"].astype(str) + " " +
+        df["EST_HOUR"].astype(str).str.zfill(2) + ":" +
+        df["EST_MINUTE"].astype(str).str.zfill(2)
+    )
+    df["5MIN"] = df["timestamp"].dt.floor("5T")
+    # Normalize the lookup key to match SECTION_CONFIG
+    lookup = key.replace("_", " ").lower()
+    config = SECTION_CONFIG.get(lookup)
+    if not config:
+        st.error(f"No configuration for section '{key}'.")
+        return
+    group_col   = config["group_col"]
+    drop_pct    = config["drop_percent"]
+    with container:
+        st.subheader(f"{lookup.title()} Data")
+        st.info(f"Query completed in {int(mins)}m {secs:.2f}s")
+        # Filter to TODAY (uppercase)
+        today_data = df[df["TIMEFRAME"].str.upper() == "TODAY"]
+        if today_data.empty:
+            st.info("No TODAY data for this section.")
+            return
+        # Aggregate over 5‑min intervals & plot
+        agg_today = (
+            today_data
+            .groupby(["5MIN", group_col], as_index=False)["CNT"]
+            .sum()
+        )
+        title = f"{lookup.title()} Impressions by Time of Day (5‑min)"
+        fig = px.line(
+            agg_today,
+            x="5MIN",
+            y="CNT",
+            color=group_col,
+            title=title,
+            labels={"5MIN": "Time", "CNT": "Impressions", group_col: lookup.title()}
+        )
+        fig.update_xaxes(tickformat="%I:%M %p")
+        st.plotly_chart(fig, use_container_width=True)
+        # Drop detection at the flagged interval
+        drop_subset = agg_today[agg_today["5MIN"] == drop_time]
+        flagged = []
+        if not drop_subset.empty:
+            avg_cnt = drop_subset["CNT"].mean()
+            for grp, cnt in drop_subset.groupby(group_col)["CNT"].sum().items():
+                if cnt <= (1 - drop_pct) * avg_cnt:
+                    flagged.append(grp)
+        drop_str = drop_time.strftime("%I:%M %p") if drop_time else "N/A"
+        if flagged:
+            st.warning(f"{lookup.title()}: At {drop_str}, these groups dropped: {', '.join(flagged)}.")
+        else:
+            st.info(f"{lookup.title()}: No significant drop at {drop_str}.")
+        # Optional raw TODAY data
+        with st.expander(f"Show Raw TODAY {lookup.title()} Data"):
+            st.dataframe(today_data)

.gitattributes → gitattributes RENAMED Viewed

File without changes

house_ad_instructions.py ADDED Viewed

	@@ -0,0 +1,60 @@

+# instructions.py
+NEXT_STEPS_INSTRUCTIONS = """
+Flex:
+If a single flex bucket is flagged as having a house ad spike, that flex bucket is the likley cause of the house ad spike.
+Even if other sections are showing an increase in house ads, the single flex bucket is most likely the cause of the spike.
+If other sections are showing increases in house ads, that information should be sent to the ad code team as additional reference for the flagged flex bucket.
+The deployment information listed in the flex_section message, especially the deployment time and bucket, should be sent to the ad code team so that they can investigate the issue further.
+Make sure to include a hyperlink to the Jira issue in your response.
+If multiple flex buckets are flagged as having a house ad spike, the issue could be related to the flagged buckets.
+If the majority of flex buckets are being flagged as having a house ad spike, the issue is likely not related to flex buckets.
+hb_bidder:
+If a single hb_bidder is flagged as having a house ad spike, the issue is likely caused by or related to that hb_bidder.
+Even if other sections are showing an increase in house ads, the hb_bidder is most likely the cause of the spike.
+The hb_bidder information should be sent to the Rev Ops team to investigate further.
+The Ad Ops team should also investigate whether any GAM changes were recently made that could be impacting the hb_bidder.
+The Ad Code team should also investigate if there were any recent ad code changes that could be impacting the hb_bidder.
+If the majority of hb_bidder values are being flagged as having a house ad spike, the issue is likely not related to hb_bidder values.
+hb_deal:
+If a single hb_deal is flagged as having a house ad spike, the issue is likely caused by or related to that hb_deal.
+Even if other sections are showing an increase in house ads, the hb_deal is most likely the cause of the spike.
+The Ad Ops team should also investigate whether any GAM changes, especially changes to protections and/or UPRs, were recently made that could be impacting the hb_deal.
+The hb_deal information should be sent to the Sales team to investigate further.
+If the majority of hb_deal values are being flagged as having a house ad spike, the issue is likely not related to hb_deal values.
+Ad Unit:
+If a single ad unit is flagged as having a house ad spike, the issue is likely related to that ad unit.
+The ad code team should also investigate if there were any recent ad code changes that could be impacting the ad unit.
+If the majority of ad unit values are being flagged as having a house ad spike, the issue is likely not related to ad unit values.
+Browser:
+If a single browser is flagged as having a house ad spike, the issue is likely related to that browser.
+The ad code team should investigate if there were any recent ad code changes that could be impacting the browser.
+If the majority of browser values are being flagged as having a house ad spike, the issue is likely not related to browser values.
+Device:
+If a single device is flagged as having a house ad spike, the issue is likely related to that device.
+The ad code team should investigate if there were any recent ad code changes that could be impacting the device.
+If the majority of device values are being flagged as having a house ad spike, the issue is likely not related to device values.
+Random Integer:
+If a single random integer is flagged as having a house ad spike, the issue is likely caused by or related to that random integer.
+If multiple random integer values are being flagged as having a house ad spike, the issue could be related to those random integer values.
+The Ad Ops team should investigate whether any GAM changes were recently made that could be impacting the random integer value(s).
+The ad code team should investigate if there were any recent ad code changes that could be impacting the random integer value(s).
+If the majority of random integer values are being flagged as having a house ad spike, the issue is likely not related to random integer values.
+hb_pb:
+If a single hb_pb value is flagged as having a house ad spike, the issue is likely caused by or related to that hb_pb.
+The Ad Ops team should also investigate whether any GAM changes were recently made that could be impacting the hb_pb.
+The ad code team should investigate if there were any recent ad code changes that could be impacting the hb_pb.
+If the majority of hb_pb values are being flagged as having a house ad spike, the issue is likely not related to hb_pb values.
+hb_size:
+If a single hb_size value is flagged as having a house ad spike, the issue is likely related to that hb_size.
+The ad code team should investigate if there were any recent ad code changes that could be impacting the hb_size.
+If the majority of hb_size values are being flagged as having a house ad spike, the issue is likely not related to hb_size values.
+"""

house_ad_main.py ADDED Viewed

	@@ -0,0 +1,356 @@

+import streamlit as st
+import time
+import pandas as pd
+import plotly.express as px
+import snowflake.connector
+import base64
+from datetime import timedelta, datetime
+from cryptography.hazmat.primitives import serialization
+from cryptography.hazmat.backends import default_backend
+import concurrent.futures
+# Import SQL query functions.
+from house_ad_queries import (
+    get_main_query,
+    get_flex_query,
+    get_bidder_query,
+    get_deal_query,
+    get_ad_unit_query,
+    get_browser_query,
+    get_device_query,
+    get_random_integer_query,
+    get_hb_pb_query,
+    get_hb_size_query,
+)
+# Import the house ad section config.
+from house_ad_section_utils import update_section_generic
+# Import the NEXT_STEPS_INSTRUCTIONS at the top.
+from house_ad_instructions import NEXT_STEPS_INSTRUCTIONS
+# Initialize session state keys at the top so they only get set once.
+st.session_state.setdefault("query_run", False)
+st.session_state.setdefault("findings_messages", [])
+st.session_state.setdefault("key_findings_output", None)
+st.session_state.setdefault("query_df", None)
+st.session_state.setdefault("agg_df", None)
+st.session_state.setdefault("top_level_spike_time", None)
+# --- Helper Functions ---
+# def load_private_key(key_str):
+#    """Load a PEM-formatted private key."""
+#    return serialization.load_pem_private_key(
+#        key_str.encode("utf-8"),
+#        password=None,
+#        backend=default_backend()
+#    )
+# Use caching to avoid re-running the same query on every interaction.
+@st.cache_data(show_spinner=False)
+def cached_run_query(
+    query,
+    private_key_b64: str,
+    user: str,
+    account_identifier: str,
+    warehouse: str,
+    database: str,
+    schema: str,
+    role: str,
+):
+    # 1) Decode the base64‐encoded DER key
+    der = base64.b64decode(private_key_b64)
+    """Connect to Snowflake and execute the given query. Cached to reduce re-runs."""
+    #    private_key_obj = load_private_key(key_str=private_key_str)
+    conn = snowflake.connector.connect(
+        user=user,
+        account=account_identifier,
+        warehouse=warehouse,
+        database=database,
+        schema=schema,
+        role=role,
+        private_key=der,
+    )
+    cs = conn.cursor()
+    cs.execute("ALTER SESSION SET STATEMENT_TIMEOUT_IN_SECONDS = 1800")
+    cs.execute(query)
+    results = cs.fetchall()
+    columns = [col[0] for col in cs.description]
+    df = pd.DataFrame(results, columns=columns)
+    cs.close()
+    conn.close()
+    return df
+# --- Main Function for House Ad Spike Analysis ---
+def run_house_ad_spike_query(
+    table,
+    start_datetime,
+    end_datetime,
+    message_filter,
+    campaign_id,
+    private_key_str,
+    user,
+    account_identifier,
+    warehouse,
+    database,
+    schema,
+    role,
+    client,
+):
+    """
+    Run the house ad spike query along with additional dimensions,
+    generate key findings via OpenAI, and display the results.
+    """
+    # --- Generate SQL Queries ---
+    main_sql = get_main_query(
+        table, start_datetime, end_datetime, message_filter, campaign_id
+    )
+    flex_sql = get_flex_query(
+        table, start_datetime, end_datetime, message_filter, campaign_id
+    )
+    bidder_sql = get_bidder_query(
+        table, start_datetime, end_datetime, message_filter, campaign_id
+    )
+    deal_sql = get_deal_query(
+        table, start_datetime, end_datetime, message_filter, campaign_id
+    )
+    ad_unit_sql = get_ad_unit_query(
+        table, start_datetime, end_datetime, message_filter, campaign_id
+    )
+    browser_sql = get_browser_query(
+        table, start_datetime, end_datetime, message_filter, campaign_id
+    )
+    device_sql = get_device_query(
+        table, start_datetime, end_datetime, message_filter, campaign_id
+    )
+    random_integer_sql = get_random_integer_query(
+        table, start_datetime, end_datetime, message_filter, campaign_id
+    )
+    hb_pb_sql = get_hb_pb_query(
+        table, start_datetime, end_datetime, message_filter, campaign_id
+    )
+    hb_size_sql = get_hb_size_query(
+        table, start_datetime, end_datetime, message_filter, campaign_id
+    )
+    # --- Main Query Execution ---
+    # Run query only if it hasn't been run already.
+    if not st.session_state["query_run"]:
+        try:
+            start_main = time.time()
+            with st.spinner("Connecting to Snowflake and running top-level query..."):
+                df = cached_run_query(
+                    main_sql,
+                    private_key_str,
+                    user,
+                    account_identifier,
+                    warehouse,
+                    database,
+                    schema,
+                    role,
+                )
+            elapsed_main = time.time() - start_main
+            elapsed_minutes = int(elapsed_main // 60)
+            elapsed_seconds = elapsed_main % 60
+            st.info(
+                f"Top-level SQL query executed in {elapsed_minutes} minute(s) and {elapsed_seconds:.2f} seconds."
+            )
+            # Process the results.
+            df.columns = [col.upper() for col in df.columns]
+            df.sort_values(by=["EST_HOUR", "EST_MINUTE"], inplace=True)
+            df["timestamp"] = pd.to_datetime(
+                df["EST_DATE"].astype(str)
+                + " "
+                + df["EST_HOUR"].astype(str).str.zfill(2)
+                + ":"
+                + df["EST_MINUTE"].astype(str).str.zfill(2)
+            )
+            df["5min"] = df["timestamp"].dt.floor("5T")
+            agg_df = df.groupby("5min", as_index=False)["CNT"].sum()
+            st.session_state["query_df"] = df
+            st.session_state["agg_df"] = agg_df
+            st.session_state["query_run"] = True
+        except Exception as e:
+            st.error(f"Error during main query execution: {e}")
+            return
+    else:
+        # Use stored data.
+        df = st.session_state.get("query_df")
+        agg_df = st.session_state.get("agg_df")
+    # --- Display Main Query Results ---
+    st.header("Top-Level Data")
+    top_level_baseline = 30
+    agg_df["is_spike"] = agg_df.apply(
+        lambda row: row["CNT"] > top_level_baseline, axis=1
+    )
+    spike_start = None
+    consecutive = 0
+    for idx, row in agg_df.sort_values("5min").iterrows():
+        if row["is_spike"]:
+            consecutive += 1
+            if consecutive == 2:
+                spike_start = row["5min"] - timedelta(minutes=5)
+                break
+        else:
+            consecutive = 0
+    if spike_start:
+        msg = f"Top-Level: House ad increase detected starting around {spike_start.strftime('%I:%M %p')}."
+        st.success(msg)
+    else:
+        msg = "Top-Level: No large, consistent spike detected in the current data."
+        st.info(msg)
+    # Append the message only once.
+    findings_messages = st.session_state.setdefault("findings_messages", [])
+    if msg not in findings_messages:
+        findings_messages.append(msg)
+    st.session_state["top_level_spike_time"] = spike_start
+    with st.expander("Show Raw Data"):
+        st.dataframe(df)
+    with st.expander("Show Raw 5-Minute Aggregated Data with Spike Alert"):
+        st.dataframe(agg_df)
+    title_text = "House Ads Count by 5-Minute Interval"
+    fig = px.line(
+        agg_df,
+        x="5min",
+        y="CNT",
+        title=title_text,
+        labels={"5min": "Time", "CNT": "House Ads Count"},
+    )
+    fig.update_xaxes(tickformat="%I:%M %p")
+    st.plotly_chart(fig, use_container_width=True)
+    st.markdown("<hr style='border: 3px solid gray;'>", unsafe_allow_html=True)
+    # --- Key Findings via OpenAI ---
+    st.header("Key Findings and Next Steps")
+    # Create a container to hold the key findings output.
+    key_findings_container = st.container()
+    # Initially display what’s in session_state (if anything) or a placeholder.
+    with key_findings_container:
+        if st.session_state.get("key_findings_output"):
+            st.markdown(
+                st.session_state.get("key_findings_output"),
+                unsafe_allow_html=True,
+            )
+        else:
+            st.info(
+                "Key findings will appear here once additional queries have finished."
+            )
+    def generate_key_findings_callback():
+        findings = "\n".join(st.session_state.get("findings_messages", []))
+        flex_jira_info = st.session_state.get("flex_jira_info", "")
+        jira_section = (
+            f"\nJira Ticket Information from Flex Bucket section:\n{flex_jira_info}\n"
+            if flex_jira_info
+            else ""
+        )
+        prompt = (
+            "You are a helpful analyst investigating a spike in house ads. A house ad spike detection dashboard has compiled a list of findings "
+            "showing potential spikes across different dimensions. Below are the detailed findings from the dashboard, along with any flagged Jira ticket "
+            "information. The NEXT_STEPS_INSTRUCTIONS file contains recommended next steps for each section depending on the spike(s) flagged in the dashboard:\n\n"
+            f"Findings:\n{findings}\n"
+            f"{jira_section}\n"
+            "Next Steps Instructions:\n"
+            f"{NEXT_STEPS_INSTRUCTIONS}\n\n"
+            "Using the Findings, jira section information, and Next Steps Instructions as helpful context, create a concise summary "
+            "that identifies the likely cause/causes of any detected house ad spikes and recommends actionable next steps. The summary "
+            "should be a few sentences long followed by bullet points with the main findings and recommended next steps. Please output "
+            "the summary in Markdown format with each bullet point on a new line, and indent sub-bullets properly. Ensure that each bullet "
+            "point is on its own line. There is no need to explicitly mention the Instructions file in the summary, just use it to "
+            "inform your analysis. "
+        )
+        st.session_state["key_findings"] = prompt
+        try:
+            response = client.responses.create(
+                model="o3-mini",
+                instructions="You are a helpful analyst who provides insights and recommends next steps.",
+                input=prompt,
+            )
+            st.session_state["key_findings_output"] = response.output_text.strip()
+        except Exception as e:
+            st.session_state["key_findings_output"] = f"Error calling OpenAI API: {e}"
+    # --- Additional Queries for Specific Dimensions ---
+    st.header("Specific Dimensions Data")
+    st.info("Checking specific dimensions for house ad spikes...")
+    with st.spinner("Running additional queries..."):
+        with concurrent.futures.ThreadPoolExecutor() as executor:
+            futures = {}
+            start_times = {}
+            query_dict = {
+                "flex bucket": flex_sql,
+                "bidder": bidder_sql,
+                "deal": deal_sql,
+                "ad_unit": ad_unit_sql,
+                "browser": browser_sql,
+                "device": device_sql,
+                "random_integer": random_integer_sql,
+                "hb_pb": hb_pb_sql,
+                "hb_size": hb_size_sql,
+            }
+            for key, query in query_dict.items():
+                start_times[key] = time.time()
+                futures[key] = executor.submit(
+                    cached_run_query,
+                    query,
+                    private_key_str,
+                    user,
+                    account_identifier,
+                    warehouse,
+                    database,
+                    schema,
+                    role,
+                )
+            containers = {
+                "flex bucket": st.container(),
+                "bidder": st.container(),
+                "deal": st.container(),
+                "ad_unit": st.container(),
+                "browser": st.container(),
+                "device": st.container(),
+                "random_integer": st.container(),
+                "hb_pb": st.container(),
+                "hb_size": st.container(),
+            }
+            spike_time = st.session_state.get("top_level_spike_time")
+            while futures:
+                done, _ = concurrent.futures.wait(
+                    list(futures.values()),
+                    timeout=0.5,
+                    return_when=concurrent.futures.FIRST_COMPLETED,
+                )
+                for future in done:
+                    key = [k for k, f in futures.items() if f == future][0]
+                    df_result = future.result()
+                    update_section_generic(
+                        key, df_result, start_times, containers[key], spike_time
+                    )
+                    del futures[key]
+    # Once all additional queries have completed, automatically generate key findings.
+    generate_key_findings_callback()
+    # Update the key findings container with the new output.
+    with key_findings_container:
+        st.markdown(
+            st.session_state.get("key_findings_output", ""),
+            unsafe_allow_html=True,
+        )

house_ad_queries.py ADDED Viewed

	@@ -0,0 +1,220 @@

+import re
+from functools import wraps
+def _quote_identifier(identifier: str) -> str:
+    """Quote SQL identifiers that contain special characters."""
+    def quote_part(part: str) -> str:
+        if re.match(r"^[A-Za-z_][A-Za-z0-9_]*$", part):
+            return part
+        return f'"{part}"'
+    return ".".join(quote_part(p) for p in identifier.split("."))
+def _sanitize_table(func):
+    @wraps(func)
+    def wrapper(table, *args, **kwargs):
+        table = _quote_identifier(table)
+        return func(table, *args, **kwargs)
+    return wrapper
+@_sanitize_table
+def get_main_query(table, start_datetime, end_datetime, message_filter, campaign_id):
+    return f"""
+    SELECT
+        to_date(convert_timezone('UTC', 'America/New_York', datetime)) AS EST_DATE,
+        extract(hour FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_HOUR,
+        extract(minute FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_MINUTE,
+        count(*) as CNT
+    FROM {table}
+    WHERE convert_timezone('UTC', 'America/New_York', datetime) BETWEEN '{start_datetime}' AND '{end_datetime}'
+      and message in ('{message_filter}')
+      and body[0]:campaignId::varchar in ('{campaign_id}')
+      and body[0]:slotTargeting:hb_pb[0]::DOUBLE >= 0.15
+    GROUP BY ALL
+    """
+@_sanitize_table
+def get_flex_query(table, start_datetime, end_datetime, message_filter, campaign_id):
+    return f"""
+    SELECT
+        to_date(convert_timezone('UTC', 'America/New_York', datetime)) AS EST_DATE,
+        extract(hour FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_HOUR,
+        extract(minute FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_MINUTE,
+        bucket,
+        count(*) as CNT
+    FROM {table}
+    WHERE convert_timezone('UTC', 'America/New_York', datetime) BETWEEN '{start_datetime}' AND '{end_datetime}'
+      and message in ('{message_filter}')
+      and body[0]:campaignId::varchar in ('{campaign_id}')
+      and body[0]:slotTargeting:hb_pb[0]::DOUBLE >= 0.15
+    GROUP BY ALL
+    """
+@_sanitize_table
+def get_bidder_query(table, start_datetime, end_datetime, message_filter, campaign_id):
+    return f"""
+    SELECT
+        to_date(convert_timezone('UTC', 'America/New_York', datetime)) AS EST_DATE,
+        extract(hour FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_HOUR,
+        extract(minute FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_MINUTE,
+        body[0]:slotTargeting:hb_bidder[0]::varchar as HB_BIDDER,
+        count(*) as CNT
+    FROM {table}
+    WHERE convert_timezone('UTC', 'America/New_York', datetime) BETWEEN '{start_datetime}' AND '{end_datetime}'
+      and message in ('{message_filter}')
+      and body[0]:campaignId::varchar in ('{campaign_id}')
+      and body[0]:slotTargeting:hb_pb[0]::DOUBLE >= 0.15
+    GROUP BY ALL
+    """
+@_sanitize_table
+def get_deal_query(table, start_datetime, end_datetime, message_filter, campaign_id):
+    return f"""
+    SELECT
+        to_date(convert_timezone('UTC', 'America/New_York', datetime)) AS EST_DATE,
+        extract(hour FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_HOUR,
+        extract(minute FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_MINUTE,
+        body[0]:slotTargeting:hb_deal[0]::varchar as HB_DEAL,
+        count(*) as CNT
+    FROM {table}
+    WHERE convert_timezone('UTC', 'America/New_York', datetime) BETWEEN '{start_datetime}' AND '{end_datetime}'
+      and message in ('{message_filter}')
+      and body[0]:campaignId::varchar in ('{campaign_id}')
+      and body[0]:slotTargeting:hb_pb[0]::DOUBLE >= 0.15
+    GROUP BY ALL
+    """
+# New function for Ad Unit Data
+@_sanitize_table
+def get_ad_unit_query(table, start_datetime, end_datetime, message_filter, campaign_id):
+    return f"""
+    SELECT
+        to_date(convert_timezone('UTC', 'America/New_York', datetime)) AS EST_DATE,
+        extract(hour FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_HOUR,
+        extract(minute FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_MINUTE,
+        split(body[0]['adUnitPath'],'/')[2]::varchar as Ad_Unit,
+        count(*) as CNT
+    FROM {table}
+    WHERE convert_timezone('UTC', 'America/New_York', datetime) BETWEEN '{start_datetime}' AND '{end_datetime}'
+      and message in ('{message_filter}')
+      and body[0]:campaignId::varchar in ('{campaign_id}')
+      and body[0]:slotTargeting:hb_pb[0]::DOUBLE >= 0.15
+    GROUP BY ALL
+    """
+# New function for Browser Data
+@_sanitize_table
+def get_browser_query(table, start_datetime, end_datetime, message_filter, campaign_id):
+    return f"""
+    SELECT
+        to_date(convert_timezone('UTC', 'America/New_York', datetime)) AS EST_DATE,
+        extract(hour FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_HOUR,
+        extract(minute FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_MINUTE,
+         case
+                when lower(useragent) like '%edg%' then 'Edge'
+                when (lower(useragent) like '%cros%' or lower(useragent) like '%chrome%' or lower(useragent) like '%crios%') then 'Chrome'
+                when lower(useragent) like '%firefox%' then 'Firefox'
+                when lower(useragent) like '%applewebkit%' then 'Safari'
+                else 'other'
+                end as browser,
+        count(*) as CNT
+    FROM {table}
+    WHERE convert_timezone('UTC', 'America/New_York', datetime) BETWEEN '{start_datetime}' AND '{end_datetime}'
+      and message in ('{message_filter}')
+      and body[0]:campaignId::varchar in ('{campaign_id}')
+      and body[0]:slotTargeting:hb_pb[0]::DOUBLE >= 0.15
+    GROUP BY ALL
+    """
+# New function for Device Data
+@_sanitize_table
+def get_device_query(table, start_datetime, end_datetime, message_filter, campaign_id):
+    return f"""
+    SELECT
+        to_date(convert_timezone('UTC', 'America/New_York', datetime)) AS EST_DATE,
+        extract(hour FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_HOUR,
+        extract(minute FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_MINUTE,
+         case
+                when (useragent like '%Windows%' or useragent like '%Macintosh%') THEN 'desktop'
+                when (useragent like '%Android%' or useragent like '%iPhone%' or useragent like '%Mobi%') THEN 'phone'
+                when (useragent like '%iPad%' or useragent like '%Tablet%') THEN 'tablet'
+                else 'other'
+                end as device,
+        count(*) as CNT
+    FROM {table}
+    WHERE convert_timezone('UTC', 'America/New_York', datetime) BETWEEN '{start_datetime}' AND '{end_datetime}'
+      and message in ('{message_filter}')
+      and body[0]:campaignId::varchar in ('{campaign_id}')
+      and body[0]:slotTargeting:hb_pb[0]::DOUBLE >= 0.15
+    GROUP BY ALL
+    """
+# New function for Random Integer Data
+@_sanitize_table
+def get_random_integer_query(
+    table, start_datetime, end_datetime, message_filter, campaign_id
+):
+    return f"""
+    SELECT
+        to_date(convert_timezone('UTC', 'America/New_York', datetime)) AS EST_DATE,
+        extract(hour FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_HOUR,
+        extract(minute FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_MINUTE,
+        body[0]:siteTargeting:ri[0]::varchar as Random_Integer,
+        count(*) as CNT
+    FROM {table}
+    WHERE convert_timezone('UTC', 'America/New_York', datetime) BETWEEN '{start_datetime}' AND '{end_datetime}'
+      and message in ('{message_filter}')
+      and body[0]:campaignId::varchar in ('{campaign_id}')
+      and body[0]:slotTargeting:hb_pb[0]::DOUBLE >= 0.15
+    GROUP BY ALL
+    """
+# New function for hb_pb Data
+@_sanitize_table
+def get_hb_pb_query(table, start_datetime, end_datetime, message_filter, campaign_id):
+    return f"""
+    SELECT
+        to_date(convert_timezone('UTC', 'America/New_York', datetime)) AS EST_DATE,
+        extract(hour FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_HOUR,
+        extract(minute FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_MINUTE,
+        body[0]:slotTargeting:hb_pb[0]::varchar as hb_pb,
+        count(*) as CNT
+    FROM {table}
+    WHERE convert_timezone('UTC', 'America/New_York', datetime) BETWEEN '{start_datetime}' AND '{end_datetime}'
+      and message in ('{message_filter}')
+      and body[0]:campaignId::varchar in ('{campaign_id}')
+      and body[0]:slotTargeting:hb_pb[0]::DOUBLE >= 0.15
+    GROUP BY ALL
+    """
+# New function for hb_size Data
+@_sanitize_table
+def get_hb_size_query(table, start_datetime, end_datetime, message_filter, campaign_id):
+    return f"""
+    SELECT
+        to_date(convert_timezone('UTC', 'America/New_York', datetime)) AS EST_DATE,
+        extract(hour FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_HOUR,
+        extract(minute FROM convert_timezone('UTC', 'America/New_York', datetime)) AS EST_MINUTE,
+        body[0]:slotTargeting:hb_size[0]::varchar as hb_size,
+        count(*) as CNT
+    FROM {table}
+    WHERE convert_timezone('UTC', 'America/New_York', datetime) BETWEEN '{start_datetime}' AND '{end_datetime}'
+      and message in ('{message_filter}')
+      and body[0]:campaignId::varchar in ('{campaign_id}')
+      and body[0]:slotTargeting:hb_pb[0]::DOUBLE >= 0.15
+    GROUP BY ALL
+    """

house_ad_section_utils.py ADDED Viewed

	@@ -0,0 +1,373 @@

+import time
+import pandas as pd
+import plotly.express as px
+import streamlit as st
+import os
+import pytz
+import re
+from datetime import timedelta, date, datetime
+from atlassian import Jira
+# --- Jira API Configuration for Deployments ---
+JIRA_URL = os.getenv("JIRA_URL")
+JIRA_USERNAME = os.getenv("JIRA_USERNAME")
+JIRA_API_TOKEN = os.getenv("JIRA_API_TOKEN")
+# Initialize your Jira
+jira_client = Jira(url=JIRA_URL, username=JIRA_USERNAME, password=JIRA_API_TOKEN)
+# Configuration dictionary for sections.
+SECTION_CONFIG = {
+    "flex bucket": {
+        "group_col": "BUCKET",
+        "chart_title": "Flex Bucket House Ads Count by 5-Minute Interval",
+        "baseline": 40,
+        "spike_threshold": 2,
+    },
+    "bidder": {
+        "group_col": "HB_BIDDER",
+        "chart_title": "hb_bidder House Ads Count by 5-Minute Interval",
+        "baseline": 40,
+        "spike_threshold": 2,
+    },
+    "deal": {
+        "group_col": "HB_DEAL",
+        "chart_title": "hb_deal House Ads Count by 5-Minute Interval",
+        "baseline": 40,
+        "spike_threshold": 2,
+    },
+    "ad_unit": {
+        "group_col": "AD_UNIT",
+        "chart_title": "Ad Unit House Ads Count by 5-Minute Interval",
+        "baseline": 40,
+        "spike_threshold": 2,
+    },
+    "browser": {
+        "group_col": "BROWSER",
+        "chart_title": "Browser House Ads Count by 5-Minute Interval",
+        "baseline": 40,
+        "spike_threshold": 1,
+    },
+    "device": {
+        "group_col": "DEVICE",
+        "chart_title": "Device House Ads Count by 5-Minute Interval",
+        "baseline": 40,
+        "spike_threshold": 1,
+    },
+    "random_integer": {
+        "group_col": "RANDOM_INTEGER",
+        "chart_title": "Random Integer House Ads Count by 5-Minute Interval",
+        "baseline": 40,
+        "spike_threshold": 2,
+    },
+    "hb_pb": {
+        "group_col": "HB_PB",
+        "chart_title": "hb_pb House Ads Count by 5-Minute Interval",
+        "baseline": 40,
+        "spike_threshold": 2,
+    },
+    "hb_size": {
+        "group_col": "HB_SIZE",
+        "chart_title": "hb_size House Ads Count by 5-Minute Interval",
+        "baseline": 40,
+        "spike_threshold": 2,
+    },
+}
+def parse_deployment_info(comment_text):
+    """
+    Parses a comment for deployment info if it follows the expected structure:
+    Deployed At: <timestamp>
+    Bucket: <bucket>
+    Traffic: <traffic>
+    Branch: <branch>
+    Returns a tuple: (deployed_at, bucket, traffic, branch).
+    If not all keys are found, returns empty strings.
+    """
+    deployed_at, bucket, traffic, branch = "", "", "", ""
+    keys_found = set()
+    lines = comment_text.splitlines()
+    for line in lines:
+        if "Deployed At:" in line:
+            deployed_at = line.split("Deployed At:")[1].strip()
+            keys_found.add("Deployed At")
+        elif "Bucket:" in line:
+            bucket = line.split("Bucket:")[1].strip()
+            keys_found.add("Bucket")
+        elif "Traffic:" in line:
+            traffic = line.split("Traffic:")[1].strip()
+            keys_found.add("Traffic")
+        elif "Branch:" in line:
+            branch = line.split("Branch:")[1].strip()
+            keys_found.add("Branch")
+    if keys_found == {"Deployed At", "Bucket", "Traffic", "Branch"}:
+        return deployed_at, bucket, traffic, branch
+    else:
+        return "", "", "", ""
+def update_section_generic(key, df, start_times, container, spike_time):
+    """
+    Updates a section based on the provided key, using the top-level spike time to anchor
+    the pre- and post-window comparisons for share-of-voice.
+    """
+    # Compute elapsed time for the query.
+    elapsed_section = time.time() - start_times[key]
+    minutes_container = int(elapsed_section // 60)
+    seconds_container = elapsed_section % 60
+    # Standardize column names and create a unified timestamp.
+    df.columns = [col.upper() for col in df.columns]
+    df.sort_values(by=["EST_HOUR", "EST_MINUTE"], inplace=True)
+    df["timestamp"] = pd.to_datetime(
+        df["EST_DATE"].astype(str)
+        + " "
+        + df["EST_HOUR"].astype(str).str.zfill(2)
+        + ":"
+        + df["EST_MINUTE"].astype(str).str.zfill(2)
+    )
+    df["5min"] = df["timestamp"].dt.floor("5T")
+    # Retrieve configuration for the current section.
+    config = SECTION_CONFIG.get(key, {})
+    baseline = config.get("baseline", 30)
+    group_col = config.get("group_col")
+    spike_threshold = config.get("spike_threshold", 3)
+    with container:
+        st.subheader(f"{key.capitalize()} Data")
+        st.info(
+            f"{key.capitalize()} query completed in {minutes_container} minute(s) and {seconds_container:.2f} seconds."
+        )
+        # Group the data by 5-minute intervals and the configured grouping column.
+        agg_df = df.groupby(["5min", group_col], as_index=False)["CNT"].sum()
+        # Get the data corresponding to the spike time.
+        spike_row = agg_df[agg_df["5min"] == spike_time]
+        # Flag groups where the count exceeds the baseline.
+        flagged_groups = []
+        for grp in spike_row[group_col].unique():
+            group_count = spike_row[spike_row[group_col] == grp]["CNT"].sum()
+            if group_count > baseline:
+                flagged_groups.append(grp)
+        # Create the chart once.
+        fig = px.line(
+            agg_df,
+            x="5min",
+            y="CNT",
+            color=group_col,
+            title=config.get(
+                "chart_title",
+                f"{key.capitalize()} House Ads Count by 5-Minute Interval",
+            ),
+            labels={"5min": "Time", "CNT": "House Ads Count", group_col: key},
+        )
+        fig.update_xaxes(tickformat="%I:%M %p")
+        if flagged_groups:
+            if len(flagged_groups) > spike_threshold:
+                msg = f"{key.capitalize()}: House ad increase detected for multiple {key} groups starting around {spike_time.strftime('%I:%M %p')}."
+                st.warning(msg)
+                with st.expander(f"Show Raw {key.capitalize()} Data"):
+                    st.dataframe(df)
+                with st.expander("Show Chart"):
+                    st.plotly_chart(fig, use_container_width=True)
+            else:
+                msg = f"{key.capitalize()}: House ad increase detected for {', '.join(flagged_groups)} starting around {spike_time.strftime('%I:%M %p')}."
+                st.success(msg)
+                with st.expander(f"Show Raw {key.capitalize()} Data"):
+                    st.dataframe(df)
+                st.plotly_chart(fig, use_container_width=True)
+            st.session_state.setdefault("findings_messages", []).append(msg)
+        else:
+            msg = f"{key.capitalize()}: No significant {key} spikes detected."
+            st.info(msg)
+            st.session_state.setdefault("findings_messages", []).append(msg)
+            with st.expander(f"Show Raw {key.capitalize()} Data"):
+                st.dataframe(df)
+            with st.expander("Show Chart"):
+                st.plotly_chart(fig, use_container_width=True)
+        if key == "flex bucket":
+            st.write("### Deployment Information")
+            flex_jira_info = ""  # Initialize an empty variable.
+            try:
+                # Use the selected dashboard date to define the full day range.
+                start_date = st.session_state.get("start_date")
+                end_date = st.session_state.get("end_date")
+                eastern = st.session_state.get("eastern")
+                start_datetime = datetime.combine(start_date, datetime.min.time())
+                end_datetime = datetime.combine(end_date, datetime.max.time())
+                start_str = start_datetime.astimezone(pytz.utc).strftime(
+                    "%Y-%m-%d %H:%M"
+                )
+                end_str = end_datetime.astimezone(pytz.utc).strftime("%Y-%m-%d %H:%M")
+                st.info("Fetching deployment information from Jira...")
+                # Build a JQL query for the selected date range.
+                dashboard_start_str = (
+                    f"{start_date.month}/{start_date.day}/{start_date.strftime('%y')}"
+                )
+                dashboard_end_str = (
+                    f"{end_date.month}/{end_date.day}/{end_date.strftime('%y')}"
+                )
+                jql = (
+                    f'comment ~ "Deployed At: {dashboard_start_str}" '
+                    f'OR comment ~ "Deployed At: {dashboard_end_str}" '
+                    f'AND comment ~ "Bucket:" '
+                    f'AND comment ~ "Traffic:" '
+                    f'AND comment ~ "Branch:"'
+                )
+                # --- Pagination: Retrieve all matching issues ---
+                startAt = 0
+                limit = 50
+                deployments_list = []
+                while True:
+                    response_page = jira_client.jql(
+                        jql,
+                        fields="key,summary,updated,comment",
+                        start=startAt,
+                        limit=limit,
+                    )
+                    issues = response_page.get("issues", [])
+                    deployments_list.extend(issues)
+                    if len(issues) < limit:
+                        break
+                    startAt += len(issues)
+                deployments = []
+                for issue in deployments_list:
+                    key_val = issue["key"]
+                    summary = issue["fields"]["summary"]
+                    updated = issue["fields"]["updated"]
+                    key_link = f'<a href="{JIRA_URL}/browse/{key_val}" target="_blank">{key_val}</a>'
+                    try:
+                        updated_dt = pd.to_datetime(updated, utc=True).astimezone(
+                            eastern
+                        )
+                    except Exception:
+                        updated_dt = None
+                    comment_field = issue["fields"].get("comment", {})
+                    comments = comment_field.get("comments", [])
+                    deployment_found = False
+                    deployment_comment = ""
+                    if comments:
+                        for comment in comments:
+                            try:
+                                comment_dt = pd.to_datetime(
+                                    comment["created"], utc=True
+                                ).astimezone(eastern)
+                            except Exception:
+                                continue
+                            # Check if the comment was created on the selected date.
+                            if start_date <= comment_dt.date() <= end_date:
+                                body = comment["body"].strip()
+                                if body.lower().startswith("deployed"):
+                                    deployment_found = True
+                                    deployment_comment = body
+                                    break
+                    if deployment_found:
+                        dep_at, bucket, traffic, branch = parse_deployment_info(
+                            deployment_comment
+                        )
+                        if not dep_at and deployment_comment.lower().startswith(
+                            "deployed to prod"
+                        ):
+                            timestamp_text = re.sub(
+                                r"(?i)^deployed\s+to\s+prod\s*(at\s*)?",
+                                "",
+                                deployment_comment,
+                            ).strip()
+                            if "." in timestamp_text:
+                                timestamp_text = timestamp_text.split(".")[0].strip()
+                            dep_at = timestamp_text
+                            bucket, traffic, branch = "", "", ""
+                        if dep_at:
+                            try:
+                                deployed_dt = pd.to_datetime(
+                                    dep_at, format="%m/%d/%y, %I:%M %p", errors="coerce"
+                                )
+                            except Exception:
+                                deployed_dt = None
+                            if deployed_dt is not None and deployed_dt is not pd.NaT:
+                                deployed_dt = eastern.localize(
+                                    deployed_dt.replace(tzinfo=None)
+                                )
+                                deployments.append(
+                                    {
+                                        "Deployed Date": deployed_dt.strftime(
+                                            "%m/%d/%y"
+                                        ),
+                                        "Deployed Time": deployed_dt.strftime(
+                                            "%I:%M %p"
+                                        ),
+                                        "Key": key_link,
+                                        "Summary": summary,
+                                        "Bucket": bucket if bucket else "production",
+                                    }
+                                )
+                if deployments:
+                    df_deployments = pd.DataFrame(deployments).reset_index(drop=True)
+                    df_deployments["Deployed_dt"] = pd.to_datetime(
+                        df_deployments["Deployed Date"]
+                        + " "
+                        + df_deployments["Deployed Time"],
+                        format="%m/%d/%y %I:%M %p",
+                        errors="coerce",
+                    )
+                    df_deployments.sort_values(
+                        "Deployed_dt", ascending=False, inplace=True
+                    )
+                    df_deployments.drop("Deployed_dt", axis=1, inplace=True)
+                    # Filter the DataFrame to only show flagged deployments.
+                    df_flagged = df_deployments[
+                        df_deployments["Bucket"].isin(flagged_groups)
+                    ]
+                    if not df_flagged.empty:
+                        # Build a string containing info for all flagged Jira tickets.
+                        tickets_info_list = []
+                        for _, row in df_flagged.iterrows():
+                            tickets_info_list.append(
+                                f"Jira Ticket: {row['Key']} - {row['Summary']}"
+                            )
+                        flex_jira_info = "\n".join(tickets_info_list)
+                        # Reorder columns for display.
+                        cols = [
+                            "Deployed Date",
+                            "Deployed Time",
+                            "Key",
+                            "Summary",
+                            "Bucket",
+                        ]
+                        df_flagged = df_flagged[cols]
+                        styled_df = df_flagged.style.hide(axis="index")
+                        st.markdown(
+                            styled_df.to_html(escape=False), unsafe_allow_html=True
+                        )
+                    else:
+                        st.info(
+                            "No flagged deployments found for the selected criteria."
+                        )
+                else:
+                    st.info("No deployments found for the selected criteria.")
+            except Exception as e:
+                st.error(f"Error fetching deployments: {e}")
+            # Save the Jira info (if any) to session state.
+            st.session_state["flex_jira_info"] = flex_jira_info
+            st.markdown("<hr style='border: 3px solid gray;'>", unsafe_allow_html=True)

index.html DELETED Viewed

@@ -1,19 +0,0 @@
-<!doctype html>
-<html>
-	<head>
-		<meta charset="utf-8" />
-		<meta name="viewport" content="width=device-width" />
-		<title>My static Space</title>
-		<link rel="stylesheet" href="style.css" />
-	</head>
-	<body>
-		<div class="card">
-			<h1>Welcome to your static Space!</h1>
-			<p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
-			<p>
-				Also don't forget to check the
-				<a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
-			</p>
-		</div>
-	</body>
-</html>

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+streamlit
+snowflake-connector-python
+cryptography
+pandas
+plotly
+atlassian-python-api
+pytz
+openai

style.css DELETED Viewed

@@ -1,28 +0,0 @@
-body {
-	padding: 2rem;
-	font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif;
-}
-h1 {
-	font-size: 16px;
-	margin-top: 0;
-}
-p {
-	color: rgb(107, 114, 128);
-	font-size: 15px;
-	margin-bottom: 10px;
-	margin-top: 5px;
-}
-.card {
-	max-width: 620px;
-	margin: 0 auto;
-	padding: 16px;
-	border: 1px solid lightgray;
-	border-radius: 16px;
-}
-.card p:last-child {
-	margin-bottom: 0;
-}