import time import pandas as pd import plotly.express as px import streamlit as st import os import pytz import re from datetime import timedelta, date, datetime from atlassian import Jira # --- Jira API Configuration for Deployments --- JIRA_URL = os.getenv("JIRA_URL") JIRA_USERNAME = os.getenv("JIRA_USERNAME") JIRA_API_TOKEN = os.getenv("JIRA_API_TOKEN") # Initialize your Jira jira_client = Jira(url=JIRA_URL, username=JIRA_USERNAME, password=JIRA_API_TOKEN) # Configuration dictionary for sections. SECTION_CONFIG = { "flex bucket": { "group_col": "BUCKET", "chart_title": "Flex Bucket House Ads Count by 5-Minute Interval", "baseline": 40, "spike_threshold": 2, }, "bidder": { "group_col": "HB_BIDDER", "chart_title": "hb_bidder House Ads Count by 5-Minute Interval", "baseline": 40, "spike_threshold": 2, }, "deal": { "group_col": "HB_DEAL", "chart_title": "hb_deal House Ads Count by 5-Minute Interval", "baseline": 40, "spike_threshold": 2, }, "ad_unit": { "group_col": "AD_UNIT", "chart_title": "Ad Unit House Ads Count by 5-Minute Interval", "baseline": 40, "spike_threshold": 2, }, "browser": { "group_col": "BROWSER", "chart_title": "Browser House Ads Count by 5-Minute Interval", "baseline": 40, "spike_threshold": 1, }, "device": { "group_col": "DEVICE", "chart_title": "Device House Ads Count by 5-Minute Interval", "baseline": 40, "spike_threshold": 1, }, "random_integer": { "group_col": "RANDOM_INTEGER", "chart_title": "Random Integer House Ads Count by 5-Minute Interval", "baseline": 40, "spike_threshold": 2, }, "hb_pb": { "group_col": "HB_PB", "chart_title": "hb_pb House Ads Count by 5-Minute Interval", "baseline": 40, "spike_threshold": 2, }, "hb_size": { "group_col": "HB_SIZE", "chart_title": "hb_size House Ads Count by 5-Minute Interval", "baseline": 40, "spike_threshold": 2, }, } def parse_deployment_info(comment_text): """ Parses a comment for deployment info if it follows the expected structure: Deployed At: Bucket: Traffic: Branch: Returns a tuple: (deployed_at, bucket, traffic, branch). If not all keys are found, returns empty strings. """ deployed_at, bucket, traffic, branch = "", "", "", "" keys_found = set() lines = comment_text.splitlines() for line in lines: if "Deployed At:" in line: deployed_at = line.split("Deployed At:")[1].strip() keys_found.add("Deployed At") elif "Bucket:" in line: bucket = line.split("Bucket:")[1].strip() keys_found.add("Bucket") elif "Traffic:" in line: traffic = line.split("Traffic:")[1].strip() keys_found.add("Traffic") elif "Branch:" in line: branch = line.split("Branch:")[1].strip() keys_found.add("Branch") if keys_found == {"Deployed At", "Bucket", "Traffic", "Branch"}: return deployed_at, bucket, traffic, branch else: return "", "", "", "" def update_section_generic(key, df, start_times, container, spike_time): """ Updates a section based on the provided key, using the top-level spike time to anchor the pre- and post-window comparisons for share-of-voice. """ # Compute elapsed time for the query. elapsed_section = time.time() - start_times[key] minutes_container = int(elapsed_section // 60) seconds_container = elapsed_section % 60 # Standardize column names and create a unified timestamp. df.columns = [col.upper() for col in df.columns] df.sort_values(by=["EST_HOUR", "EST_MINUTE"], inplace=True) df["timestamp"] = pd.to_datetime( df["EST_DATE"].astype(str) + " " + df["EST_HOUR"].astype(str).str.zfill(2) + ":" + df["EST_MINUTE"].astype(str).str.zfill(2) ) df["5min"] = df["timestamp"].dt.floor("5T") # Retrieve configuration for the current section. config = SECTION_CONFIG.get(key, {}) baseline = config.get("baseline", 30) group_col = config.get("group_col") spike_threshold = config.get("spike_threshold", 3) with container: st.subheader(f"{key.capitalize()} Data") st.info( f"{key.capitalize()} query completed in {minutes_container} minute(s) and {seconds_container:.2f} seconds." ) # Group the data by 5-minute intervals and the configured grouping column. agg_df = df.groupby(["5min", group_col], as_index=False)["CNT"].sum() # Get the data corresponding to the spike time. spike_row = agg_df[agg_df["5min"] == spike_time] # Flag groups where the count exceeds the baseline. flagged_groups = [] for grp in spike_row[group_col].unique(): group_count = spike_row[spike_row[group_col] == grp]["CNT"].sum() if group_count > baseline: flagged_groups.append(grp) # Create the chart once. fig = px.line( agg_df, x="5min", y="CNT", color=group_col, title=config.get( "chart_title", f"{key.capitalize()} House Ads Count by 5-Minute Interval", ), labels={"5min": "Time", "CNT": "House Ads Count", group_col: key}, ) fig.update_xaxes(tickformat="%I:%M %p") if flagged_groups: if len(flagged_groups) > spike_threshold: msg = f"{key.capitalize()}: House ad increase detected for multiple {key} groups starting around {spike_time.strftime('%I:%M %p')}." st.warning(msg) with st.expander(f"Show Raw {key.capitalize()} Data"): st.dataframe(df) with st.expander("Show Chart"): st.plotly_chart(fig, use_container_width=True) else: msg = f"{key.capitalize()}: House ad increase detected for {', '.join(flagged_groups)} starting around {spike_time.strftime('%I:%M %p')}." st.success(msg) with st.expander(f"Show Raw {key.capitalize()} Data"): st.dataframe(df) st.plotly_chart(fig, use_container_width=True) st.session_state.setdefault("findings_messages", []).append(msg) else: msg = f"{key.capitalize()}: No significant {key} spikes detected." st.info(msg) st.session_state.setdefault("findings_messages", []).append(msg) with st.expander(f"Show Raw {key.capitalize()} Data"): st.dataframe(df) with st.expander("Show Chart"): st.plotly_chart(fig, use_container_width=True) if key == "flex bucket": st.write("### Deployment Information") flex_jira_info = "" # Initialize an empty variable. try: # Use the selected dashboard date to define the full day range. start_date = st.session_state.get("start_date") end_date = st.session_state.get("end_date") eastern = st.session_state.get("eastern") start_datetime = datetime.combine(start_date, datetime.min.time()) end_datetime = datetime.combine(end_date, datetime.max.time()) start_str = start_datetime.astimezone(pytz.utc).strftime( "%Y-%m-%d %H:%M" ) end_str = end_datetime.astimezone(pytz.utc).strftime("%Y-%m-%d %H:%M") st.info("Fetching deployment information from Jira...") # Build a JQL query for the selected date range. dashboard_start_str = ( f"{start_date.month}/{start_date.day}/{start_date.strftime('%y')}" ) dashboard_end_str = ( f"{end_date.month}/{end_date.day}/{end_date.strftime('%y')}" ) jql = ( f'comment ~ "Deployed At: {dashboard_start_str}" ' f'OR comment ~ "Deployed At: {dashboard_end_str}" ' f'AND comment ~ "Bucket:" ' f'AND comment ~ "Traffic:" ' f'AND comment ~ "Branch:"' ) # --- Pagination: Retrieve all matching issues --- startAt = 0 limit = 50 deployments_list = [] while True: response_page = jira_client.jql( jql, fields="key,summary,updated,comment", start=startAt, limit=limit, ) issues = response_page.get("issues", []) deployments_list.extend(issues) if len(issues) < limit: break startAt += len(issues) deployments = [] for issue in deployments_list: key_val = issue["key"] summary = issue["fields"]["summary"] updated = issue["fields"]["updated"] key_link = f'{key_val}' try: updated_dt = pd.to_datetime(updated, utc=True).astimezone( eastern ) except Exception: updated_dt = None comment_field = issue["fields"].get("comment", {}) comments = comment_field.get("comments", []) deployment_found = False deployment_comment = "" if comments: for comment in comments: try: comment_dt = pd.to_datetime( comment["created"], utc=True ).astimezone(eastern) except Exception: continue # Check if the comment was created on the selected date. if start_date <= comment_dt.date() <= end_date: body = comment["body"].strip() if body.lower().startswith("deployed"): deployment_found = True deployment_comment = body break if deployment_found: dep_at, bucket, traffic, branch = parse_deployment_info( deployment_comment ) if not dep_at and deployment_comment.lower().startswith( "deployed to prod" ): timestamp_text = re.sub( r"(?i)^deployed\s+to\s+prod\s*(at\s*)?", "", deployment_comment, ).strip() if "." in timestamp_text: timestamp_text = timestamp_text.split(".")[0].strip() dep_at = timestamp_text bucket, traffic, branch = "", "", "" if dep_at: try: deployed_dt = pd.to_datetime( dep_at, format="%m/%d/%y, %I:%M %p", errors="coerce" ) except Exception: deployed_dt = None if deployed_dt is not None and deployed_dt is not pd.NaT: deployed_dt = eastern.localize( deployed_dt.replace(tzinfo=None) ) deployments.append( { "Deployed Date": deployed_dt.strftime( "%m/%d/%y" ), "Deployed Time": deployed_dt.strftime( "%I:%M %p" ), "Key": key_link, "Summary": summary, "Bucket": bucket if bucket else "production", } ) if deployments: df_deployments = pd.DataFrame(deployments).reset_index(drop=True) df_deployments["Deployed_dt"] = pd.to_datetime( df_deployments["Deployed Date"] + " " + df_deployments["Deployed Time"], format="%m/%d/%y %I:%M %p", errors="coerce", ) df_deployments.sort_values( "Deployed_dt", ascending=False, inplace=True ) df_deployments.drop("Deployed_dt", axis=1, inplace=True) # Filter the DataFrame to only show flagged deployments. df_flagged = df_deployments[ df_deployments["Bucket"].isin(flagged_groups) ] if not df_flagged.empty: # Build a string containing info for all flagged Jira tickets. tickets_info_list = [] for _, row in df_flagged.iterrows(): tickets_info_list.append( f"Jira Ticket: {row['Key']} - {row['Summary']}" ) flex_jira_info = "\n".join(tickets_info_list) # Reorder columns for display. cols = [ "Deployed Date", "Deployed Time", "Key", "Summary", "Bucket", ] df_flagged = df_flagged[cols] styled_df = df_flagged.style.hide(axis="index") st.markdown( styled_df.to_html(escape=False), unsafe_allow_html=True ) else: st.info( "No flagged deployments found for the selected criteria." ) else: st.info("No deployments found for the selected criteria.") except Exception as e: st.error(f"Error fetching deployments: {e}") # Save the Jira info (if any) to session state. st.session_state["flex_jira_info"] = flex_jira_info st.markdown("
", unsafe_allow_html=True)