Spaces:
Sleeping
Sleeping
| import time | |
| import pandas as pd | |
| import plotly.express as px | |
| import streamlit as st | |
| import os | |
| import pytz | |
| import re | |
| from datetime import timedelta, date, datetime | |
| from atlassian import Jira | |
| # --- Jira API Configuration for Deployments --- | |
| JIRA_URL = os.getenv("JIRA_URL") | |
| JIRA_USERNAME = os.getenv("JIRA_USERNAME") | |
| JIRA_API_TOKEN = os.getenv("JIRA_API_TOKEN") | |
| # Initialize your Jira | |
| jira_client = Jira(url=JIRA_URL, username=JIRA_USERNAME, password=JIRA_API_TOKEN) | |
| # Configuration dictionary for sections. | |
| SECTION_CONFIG = { | |
| "flex bucket": { | |
| "group_col": "BUCKET", | |
| "chart_title": "Flex Bucket House Ads Count by 5-Minute Interval", | |
| "baseline": 40, | |
| "spike_threshold": 2, | |
| }, | |
| "bidder": { | |
| "group_col": "HB_BIDDER", | |
| "chart_title": "hb_bidder House Ads Count by 5-Minute Interval", | |
| "baseline": 40, | |
| "spike_threshold": 2, | |
| }, | |
| "deal": { | |
| "group_col": "HB_DEAL", | |
| "chart_title": "hb_deal House Ads Count by 5-Minute Interval", | |
| "baseline": 40, | |
| "spike_threshold": 2, | |
| }, | |
| "ad_unit": { | |
| "group_col": "AD_UNIT", | |
| "chart_title": "Ad Unit House Ads Count by 5-Minute Interval", | |
| "baseline": 40, | |
| "spike_threshold": 2, | |
| }, | |
| "browser": { | |
| "group_col": "BROWSER", | |
| "chart_title": "Browser House Ads Count by 5-Minute Interval", | |
| "baseline": 40, | |
| "spike_threshold": 1, | |
| }, | |
| "device": { | |
| "group_col": "DEVICE", | |
| "chart_title": "Device House Ads Count by 5-Minute Interval", | |
| "baseline": 40, | |
| "spike_threshold": 1, | |
| }, | |
| "random_integer": { | |
| "group_col": "RANDOM_INTEGER", | |
| "chart_title": "Random Integer House Ads Count by 5-Minute Interval", | |
| "baseline": 40, | |
| "spike_threshold": 2, | |
| }, | |
| "hb_pb": { | |
| "group_col": "HB_PB", | |
| "chart_title": "hb_pb House Ads Count by 5-Minute Interval", | |
| "baseline": 40, | |
| "spike_threshold": 2, | |
| }, | |
| "hb_size": { | |
| "group_col": "HB_SIZE", | |
| "chart_title": "hb_size House Ads Count by 5-Minute Interval", | |
| "baseline": 40, | |
| "spike_threshold": 2, | |
| }, | |
| } | |
| def parse_deployment_info(comment_text): | |
| """ | |
| Parses a comment for deployment info if it follows the expected structure: | |
| Deployed At: <timestamp> | |
| Bucket: <bucket> | |
| Traffic: <traffic> | |
| Branch: <branch> | |
| Returns a tuple: (deployed_at, bucket, traffic, branch). | |
| If not all keys are found, returns empty strings. | |
| """ | |
| deployed_at, bucket, traffic, branch = "", "", "", "" | |
| keys_found = set() | |
| lines = comment_text.splitlines() | |
| for line in lines: | |
| if "Deployed At:" in line: | |
| deployed_at = line.split("Deployed At:")[1].strip() | |
| keys_found.add("Deployed At") | |
| elif "Bucket:" in line: | |
| bucket = line.split("Bucket:")[1].strip() | |
| keys_found.add("Bucket") | |
| elif "Traffic:" in line: | |
| traffic = line.split("Traffic:")[1].strip() | |
| keys_found.add("Traffic") | |
| elif "Branch:" in line: | |
| branch = line.split("Branch:")[1].strip() | |
| keys_found.add("Branch") | |
| if keys_found == {"Deployed At", "Bucket", "Traffic", "Branch"}: | |
| return deployed_at, bucket, traffic, branch | |
| else: | |
| return "", "", "", "" | |
| def update_section_generic(key, df, start_times, container, spike_time): | |
| """ | |
| Updates a section based on the provided key, using the top-level spike time to anchor | |
| the pre- and post-window comparisons for share-of-voice. | |
| """ | |
| # Compute elapsed time for the query. | |
| elapsed_section = time.time() - start_times[key] | |
| minutes_container = int(elapsed_section // 60) | |
| seconds_container = elapsed_section % 60 | |
| # Standardize column names and create a unified timestamp. | |
| df.columns = [col.upper() for col in df.columns] | |
| df.sort_values(by=["EST_HOUR", "EST_MINUTE"], inplace=True) | |
| df["timestamp"] = pd.to_datetime( | |
| df["EST_DATE"].astype(str) | |
| + " " | |
| + df["EST_HOUR"].astype(str).str.zfill(2) | |
| + ":" | |
| + df["EST_MINUTE"].astype(str).str.zfill(2) | |
| ) | |
| df["5min"] = df["timestamp"].dt.floor("5T") | |
| # Retrieve configuration for the current section. | |
| config = SECTION_CONFIG.get(key, {}) | |
| baseline = config.get("baseline", 30) | |
| group_col = config.get("group_col") | |
| spike_threshold = config.get("spike_threshold", 3) | |
| with container: | |
| st.subheader(f"{key.capitalize()} Data") | |
| st.info( | |
| f"{key.capitalize()} query completed in {minutes_container} minute(s) and {seconds_container:.2f} seconds." | |
| ) | |
| # Group the data by 5-minute intervals and the configured grouping column. | |
| agg_df = df.groupby(["5min", group_col], as_index=False)["CNT"].sum() | |
| # Get the data corresponding to the spike time. | |
| spike_row = agg_df[agg_df["5min"] == spike_time] | |
| # Flag groups where the count exceeds the baseline. | |
| flagged_groups = [] | |
| for grp in spike_row[group_col].unique(): | |
| group_count = spike_row[spike_row[group_col] == grp]["CNT"].sum() | |
| if group_count > baseline: | |
| flagged_groups.append(grp) | |
| # Create the chart once. | |
| fig = px.line( | |
| agg_df, | |
| x="5min", | |
| y="CNT", | |
| color=group_col, | |
| title=config.get( | |
| "chart_title", | |
| f"{key.capitalize()} House Ads Count by 5-Minute Interval", | |
| ), | |
| labels={"5min": "Time", "CNT": "House Ads Count", group_col: key}, | |
| ) | |
| fig.update_xaxes(tickformat="%I:%M %p") | |
| if flagged_groups: | |
| if len(flagged_groups) > spike_threshold: | |
| msg = f"{key.capitalize()}: House ad increase detected for multiple {key} groups starting around {spike_time.strftime('%I:%M %p')}." | |
| st.warning(msg) | |
| with st.expander(f"Show Raw {key.capitalize()} Data"): | |
| st.dataframe(df) | |
| with st.expander("Show Chart"): | |
| st.plotly_chart(fig, use_container_width=True) | |
| else: | |
| msg = f"{key.capitalize()}: House ad increase detected for {', '.join(flagged_groups)} starting around {spike_time.strftime('%I:%M %p')}." | |
| st.success(msg) | |
| with st.expander(f"Show Raw {key.capitalize()} Data"): | |
| st.dataframe(df) | |
| st.plotly_chart(fig, use_container_width=True) | |
| st.session_state.setdefault("findings_messages", []).append(msg) | |
| else: | |
| msg = f"{key.capitalize()}: No significant {key} spikes detected." | |
| st.info(msg) | |
| st.session_state.setdefault("findings_messages", []).append(msg) | |
| with st.expander(f"Show Raw {key.capitalize()} Data"): | |
| st.dataframe(df) | |
| with st.expander("Show Chart"): | |
| st.plotly_chart(fig, use_container_width=True) | |
| if key == "flex bucket": | |
| st.write("### Deployment Information") | |
| flex_jira_info = "" # Initialize an empty variable. | |
| try: | |
| # Use the selected dashboard date to define the full day range. | |
| start_date = st.session_state.get("start_date") | |
| end_date = st.session_state.get("end_date") | |
| eastern = st.session_state.get("eastern") | |
| start_datetime = datetime.combine(start_date, datetime.min.time()) | |
| end_datetime = datetime.combine(end_date, datetime.max.time()) | |
| start_str = start_datetime.astimezone(pytz.utc).strftime( | |
| "%Y-%m-%d %H:%M" | |
| ) | |
| end_str = end_datetime.astimezone(pytz.utc).strftime("%Y-%m-%d %H:%M") | |
| st.info("Fetching deployment information from Jira...") | |
| # Build a JQL query for the selected date range. | |
| dashboard_start_str = ( | |
| f"{start_date.month}/{start_date.day}/{start_date.strftime('%y')}" | |
| ) | |
| dashboard_end_str = ( | |
| f"{end_date.month}/{end_date.day}/{end_date.strftime('%y')}" | |
| ) | |
| jql = ( | |
| f'comment ~ "Deployed At: {dashboard_start_str}" ' | |
| f'OR comment ~ "Deployed At: {dashboard_end_str}" ' | |
| f'AND comment ~ "Bucket:" ' | |
| f'AND comment ~ "Traffic:" ' | |
| f'AND comment ~ "Branch:"' | |
| ) | |
| # --- Pagination: Retrieve all matching issues --- | |
| startAt = 0 | |
| limit = 50 | |
| deployments_list = [] | |
| while True: | |
| response_page = jira_client.jql( | |
| jql, | |
| fields="key,summary,updated,comment", | |
| start=startAt, | |
| limit=limit, | |
| ) | |
| issues = response_page.get("issues", []) | |
| deployments_list.extend(issues) | |
| if len(issues) < limit: | |
| break | |
| startAt += len(issues) | |
| deployments = [] | |
| for issue in deployments_list: | |
| key_val = issue["key"] | |
| summary = issue["fields"]["summary"] | |
| updated = issue["fields"]["updated"] | |
| key_link = f'<a href="{JIRA_URL}/browse/{key_val}" target="_blank">{key_val}</a>' | |
| try: | |
| updated_dt = pd.to_datetime(updated, utc=True).astimezone( | |
| eastern | |
| ) | |
| except Exception: | |
| updated_dt = None | |
| comment_field = issue["fields"].get("comment", {}) | |
| comments = comment_field.get("comments", []) | |
| deployment_found = False | |
| deployment_comment = "" | |
| if comments: | |
| for comment in comments: | |
| try: | |
| comment_dt = pd.to_datetime( | |
| comment["created"], utc=True | |
| ).astimezone(eastern) | |
| except Exception: | |
| continue | |
| # Check if the comment was created on the selected date. | |
| if start_date <= comment_dt.date() <= end_date: | |
| body = comment["body"].strip() | |
| if body.lower().startswith("deployed"): | |
| deployment_found = True | |
| deployment_comment = body | |
| break | |
| if deployment_found: | |
| dep_at, bucket, traffic, branch = parse_deployment_info( | |
| deployment_comment | |
| ) | |
| if not dep_at and deployment_comment.lower().startswith( | |
| "deployed to prod" | |
| ): | |
| timestamp_text = re.sub( | |
| r"(?i)^deployed\s+to\s+prod\s*(at\s*)?", | |
| "", | |
| deployment_comment, | |
| ).strip() | |
| if "." in timestamp_text: | |
| timestamp_text = timestamp_text.split(".")[0].strip() | |
| dep_at = timestamp_text | |
| bucket, traffic, branch = "", "", "" | |
| if dep_at: | |
| try: | |
| deployed_dt = pd.to_datetime( | |
| dep_at, format="%m/%d/%y, %I:%M %p", errors="coerce" | |
| ) | |
| except Exception: | |
| deployed_dt = None | |
| if deployed_dt is not None and deployed_dt is not pd.NaT: | |
| deployed_dt = eastern.localize( | |
| deployed_dt.replace(tzinfo=None) | |
| ) | |
| deployments.append( | |
| { | |
| "Deployed Date": deployed_dt.strftime( | |
| "%m/%d/%y" | |
| ), | |
| "Deployed Time": deployed_dt.strftime( | |
| "%I:%M %p" | |
| ), | |
| "Key": key_link, | |
| "Summary": summary, | |
| "Bucket": bucket if bucket else "production", | |
| } | |
| ) | |
| if deployments: | |
| df_deployments = pd.DataFrame(deployments).reset_index(drop=True) | |
| df_deployments["Deployed_dt"] = pd.to_datetime( | |
| df_deployments["Deployed Date"] | |
| + " " | |
| + df_deployments["Deployed Time"], | |
| format="%m/%d/%y %I:%M %p", | |
| errors="coerce", | |
| ) | |
| df_deployments.sort_values( | |
| "Deployed_dt", ascending=False, inplace=True | |
| ) | |
| df_deployments.drop("Deployed_dt", axis=1, inplace=True) | |
| # Filter the DataFrame to only show flagged deployments. | |
| df_flagged = df_deployments[ | |
| df_deployments["Bucket"].isin(flagged_groups) | |
| ] | |
| if not df_flagged.empty: | |
| # Build a string containing info for all flagged Jira tickets. | |
| tickets_info_list = [] | |
| for _, row in df_flagged.iterrows(): | |
| tickets_info_list.append( | |
| f"Jira Ticket: {row['Key']} - {row['Summary']}" | |
| ) | |
| flex_jira_info = "\n".join(tickets_info_list) | |
| # Reorder columns for display. | |
| cols = [ | |
| "Deployed Date", | |
| "Deployed Time", | |
| "Key", | |
| "Summary", | |
| "Bucket", | |
| ] | |
| df_flagged = df_flagged[cols] | |
| styled_df = df_flagged.style.hide(axis="index") | |
| st.markdown( | |
| styled_df.to_html(escape=False), unsafe_allow_html=True | |
| ) | |
| else: | |
| st.info( | |
| "No flagged deployments found for the selected criteria." | |
| ) | |
| else: | |
| st.info("No deployments found for the selected criteria.") | |
| except Exception as e: | |
| st.error(f"Error fetching deployments: {e}") | |
| # Save the Jira info (if any) to session state. | |
| st.session_state["flex_jira_info"] = flex_jira_info | |
| st.markdown("<hr style='border: 3px solid gray;'>", unsafe_allow_html=True) | |