Spaces:

phitoduck
/

cloudwatch-simulator

Sleeping

App Files Files Community

phitoduck commited on Jul 27, 2024

Commit

bc28645

1 Parent(s): 755ac75

works

Browse files

Files changed (2) hide show

streamlit_app.py +20 -9
utils.py +20 -17

streamlit_app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import matplotlib.pyplot as plt
 from datetime import datetime, time, date
 from typing import List, Dict, Any, Tuple
 from utils import generate_random_data, calculate_percentile, evaluate_alarm_state, aggregate_data
 # Constants
 HARD_CODED_DATE = date(2024, 7, 26)
@@ -84,9 +85,11 @@ def summary_by_period_form() -> None:
     period_length_input = st.selectbox("Period Length", ['1min', '5min', '15min'], key='period_length_input', help="Select the period length for aggregating the summary data.")
     if not st.session_state.df.empty:
         st.session_state.summary_by_period_df = aggregate_data(st.session_state.df, period_length_input)
 def alarm_state_form() -> None:
-    threshold_input = st.number_input("Threshold (ms)", min_value=50, max_value=300, value=150, key='threshold_input', help="Specify the threshold value for evaluating the alarm state.")
     datapoints_to_alarm_input = st.number_input("Datapoints to Alarm", min_value=1, value=3, key='datapoints_to_alarm_input', help="Specify the number of data points required to trigger an alarm.")
     evaluation_range_input = st.number_input("Evaluation Range", min_value=1, value=5, key='evaluation_range_input', help="Specify the range of data points to evaluate for alarm state.")
     aggregation_function_input = st.selectbox(
@@ -183,7 +186,7 @@ def display_key_tables() -> None:
     # Symbols
     st.write("#### Symbols")
     symbol_data = {
-        "Symbol": ["X", "-", "0"],
         "Meaning": [
             "Breaching data point: This data point exceeds the threshold.",
             "Missing data point: This data point is missing or not reported.",
@@ -194,14 +197,22 @@ def display_key_tables() -> None:
     st.table(symbol_df)
     # Columns
-    st.write("#### Columns")
     column_data = {
-        "Column": ["MISSING", "IGNORE", "BREACHING", "NOT BREACHING"],
-        "Meaning": [
-            "Action to take when all data points are missing. Possible values: INSUFFICIENT_DATA, Retain current state, ALARM, OK.",
-            "Action to take when data points are missing but ignored. Possible values: Retain current state, ALARM, OK.",
-            "Action to take when missing data points are treated as breaching. Possible values: ALARM, OK.",
-            "Action to take when missing data points are treated as not breaching. Possible values: ALARM, OK."
         ]
     }
     column_df = pd.DataFrame(column_data)

 from datetime import datetime, time, date
 from typing import List, Dict, Any, Tuple
 from utils import generate_random_data, calculate_percentile, evaluate_alarm_state, aggregate_data
+from textwrap import dedent
 # Constants
 HARD_CODED_DATE = date(2024, 7, 26)
     period_length_input = st.selectbox("Period Length", ['1min', '5min', '15min'], key='period_length_input', help="Select the period length for aggregating the summary data.")
     if not st.session_state.df.empty:
         st.session_state.summary_by_period_df = aggregate_data(st.session_state.df, period_length_input)
+    else:
+        st.warning("No data available to aggregate.")
 def alarm_state_form() -> None:
+    threshold_input = st.slider("Threshold (ms)", min_value=50, max_value=300, value=150, key='threshold_input', help="Specify the threshold value for evaluating the alarm state.")
     datapoints_to_alarm_input = st.number_input("Datapoints to Alarm", min_value=1, value=3, key='datapoints_to_alarm_input', help="Specify the number of data points required to trigger an alarm.")
     evaluation_range_input = st.number_input("Evaluation Range", min_value=1, value=5, key='evaluation_range_input', help="Specify the range of data points to evaluate for alarm state.")
     aggregation_function_input = st.selectbox(
     # Symbols
     st.write("#### Symbols")
     symbol_data = {
+        "Symbol": ["🔴", "⚫️", "🟢"],
         "Meaning": [
             "Breaching data point: This data point exceeds the threshold.",
             "Missing data point: This data point is missing or not reported.",
     st.table(symbol_df)
     # Columns
+    st.write(dedent("""\
+    #### Columns: Strategies for handling missing data points [docs](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/AlarmThatSendsEmail.html#alarms-and-missing-data)
+    Sometimes, no metric events may have been reported during a given time period. In this case,
+    you must decide how you will treat missing data points. Ignore it? Or consider it a failure.
+    Here are the 4 supported strategies in AWS:
+    """))
     column_data = {
+        "Strategy": ["missing", "ignore", "breaching", "notBreaching"],
+        "Explanation": [
+            "If all data points in the alarm evaluation range are missing, the alarm transitions to INSUFFICIENT_DATA. Possible values: INSUFFICIENT_DATA, Retain current state, ALARM, OK.",
+            "The current alarm state is maintained. Possible values: Retain current state, ALARM, OK.",
+            "Missing data points are treated as \"bad\" and breaching the threshold. Possible values: ALARM, OK.",
+            "Missing data points are treated as \"good\" and within the threshold. Possible values: ALARM, OK."
         ]
     }
     column_df = pd.DataFrame(column_data)

utils.py CHANGED Viewed

@@ -42,8 +42,7 @@ def calculate_percentile(
     freq: str,
     percentile: float
 ) -> pd.DataFrame:
-    percentile_df: pd.DataFrame = df.groupby(pd.Grouper(key='Timestamp', freq=freq))["ResponseTime(ms)"]\
-                                    .quantile(percentile).reset_index(name=f"p{int(percentile * 100)}_ResponseTime(ms)")
     percentile_df.replace(to_replace=np.nan, value=None, inplace=True)
     return percentile_df
@@ -51,13 +50,16 @@ def aggregate_data(
     df: pd.DataFrame,
     period_length: str
 ) -> pd.DataFrame:
     aggregation_funcs = {
-        'p50': lambda x: np.percentile(x.dropna(), 50),
-        'p95': lambda x: np.percentile(x.dropna(), 95),
-        'p99': lambda x: np.percentile(x.dropna(), 99),
-        'max': lambda x: np.max(x.dropna()),
-        'min': lambda x: np.min(x.dropna()),
-        'average': lambda x: np.mean(x.dropna())
     }
     summary_df = df.groupby(pd.Grouper(key='Timestamp', freq=period_length)).agg(
@@ -111,30 +113,31 @@ def evaluate_alarm_state(
         for dp in chunk:
             if dp is None:
-                data_point_repr += '-'
             elif check_condition(dp, threshold, alarm_condition):
-                data_point_repr += 'X'
             else:
-                data_point_repr += '0'
         if len(chunk) < evaluation_range:
-            data_point_repr += '-' * (evaluation_range - len(chunk))
-        if data_point_repr.count('-') > (evaluation_range - datapoints_to_alarm):
-            num_dp_that_must_be_filled = datapoints_to_alarm - sum([data_point_repr.count('0'), data_point_repr.count('X')])
         data_table_dict["DataPoints"].append(data_point_repr)
         data_table_dict["# of data points that must be filled"].append(num_dp_that_must_be_filled)
         if num_dp_that_must_be_filled > 0:
-            data_table_dict["MISSING"].append("INSUFFICIENT_DATA" if data_point_repr.count('-') == evaluation_range else "Retain current state")
             data_table_dict["IGNORE"].append("Retain current state")
             data_table_dict["BREACHING"].append("ALARM")
             data_table_dict["NOT BREACHING"].append("OK")
         else:
             data_table_dict["MISSING"].append("OK")
             data_table_dict["IGNORE"].append("Retain current state")
-            data_table_dict["BREACHING"].append("ALARM" if 'X' * datapoints_to_alarm in data_point_repr else "OK")
-            data_table_dict["NOT BREACHING"].append("ALARM" if '0' * datapoints_to_alarm not in data_point_repr else "OK")
     return pd.DataFrame(data_table_dict)

     freq: str,
     percentile: float
 ) -> pd.DataFrame:
+    percentile_df: pd.DataFrame = df.groupby(pd.Grouper(key='Timestamp', freq=freq))["ResponseTime(ms)"]                                    .quantile(percentile).reset_index(name=f"p{int(percentile * 100)}_ResponseTime(ms)")
     percentile_df.replace(to_replace=np.nan, value=None, inplace=True)
     return percentile_df
     df: pd.DataFrame,
     period_length: str
 ) -> pd.DataFrame:
+    if df.empty:
+        return pd.DataFrame()  # Return an empty DataFrame if input is empty
     aggregation_funcs = {
+        'p50': lambda x: np.percentile(x.dropna(), 50) if not x.dropna().empty else np.nan,
+        'p95': lambda x: np.percentile(x.dropna(), 95) if not x.dropna().empty else np.nan,
+        'p99': lambda x: np.percentile(x.dropna(), 99) if not x.dropna().empty else np.nan,
+        'max': lambda x: np.max(x.dropna()) if not x.dropna().empty else np.nan,
+        'min': lambda x: np.min(x.dropna()) if not x.dropna().empty else np.nan,
+        'average': lambda x: np.mean(x.dropna()) if not x.dropna().empty else np.nan
     }
     summary_df = df.groupby(pd.Grouper(key='Timestamp', freq=period_length)).agg(
         for dp in chunk:
             if dp is None:
+                dp_symbol = '⚫️'
             elif check_condition(dp, threshold, alarm_condition):
+                dp_symbol = '🔴'
             else:
+                dp_symbol = '🟢'
+            data_point_repr += dp_symbol
         if len(chunk) < evaluation_range:
+            data_point_repr += '⚫️' * (evaluation_range - len(chunk))
+        if data_point_repr.count('⚫️') > (evaluation_range - datapoints_to_alarm):
+            num_dp_that_must_be_filled = datapoints_to_alarm - sum([data_point_repr.count('🟢'), data_point_repr.count('🔴')])
         data_table_dict["DataPoints"].append(data_point_repr)
         data_table_dict["# of data points that must be filled"].append(num_dp_that_must_be_filled)
         if num_dp_that_must_be_filled > 0:
+            data_table_dict["MISSING"].append("INSUFFICIENT_DATA" if data_point_repr.count('⚫️') == evaluation_range else "Retain current state")
             data_table_dict["IGNORE"].append("Retain current state")
             data_table_dict["BREACHING"].append("ALARM")
             data_table_dict["NOT BREACHING"].append("OK")
         else:
             data_table_dict["MISSING"].append("OK")
             data_table_dict["IGNORE"].append("Retain current state")
+            data_table_dict["BREACHING"].append("ALARM" if '🔴' * datapoints_to_alarm in data_point_repr else "OK")
+            data_table_dict["NOT BREACHING"].append("ALARM" if '🟢' * datapoints_to_alarm not in data_point_repr else "OK")
     return pd.DataFrame(data_table_dict)