Spaces:
Sleeping
Sleeping
works
Browse files- streamlit_app.py +20 -9
- utils.py +20 -17
streamlit_app.py
CHANGED
|
@@ -4,6 +4,7 @@ import matplotlib.pyplot as plt
|
|
| 4 |
from datetime import datetime, time, date
|
| 5 |
from typing import List, Dict, Any, Tuple
|
| 6 |
from utils import generate_random_data, calculate_percentile, evaluate_alarm_state, aggregate_data
|
|
|
|
| 7 |
|
| 8 |
# Constants
|
| 9 |
HARD_CODED_DATE = date(2024, 7, 26)
|
|
@@ -84,9 +85,11 @@ def summary_by_period_form() -> None:
|
|
| 84 |
period_length_input = st.selectbox("Period Length", ['1min', '5min', '15min'], key='period_length_input', help="Select the period length for aggregating the summary data.")
|
| 85 |
if not st.session_state.df.empty:
|
| 86 |
st.session_state.summary_by_period_df = aggregate_data(st.session_state.df, period_length_input)
|
|
|
|
|
|
|
| 87 |
|
| 88 |
def alarm_state_form() -> None:
|
| 89 |
-
threshold_input = st.
|
| 90 |
datapoints_to_alarm_input = st.number_input("Datapoints to Alarm", min_value=1, value=3, key='datapoints_to_alarm_input', help="Specify the number of data points required to trigger an alarm.")
|
| 91 |
evaluation_range_input = st.number_input("Evaluation Range", min_value=1, value=5, key='evaluation_range_input', help="Specify the range of data points to evaluate for alarm state.")
|
| 92 |
aggregation_function_input = st.selectbox(
|
|
@@ -183,7 +186,7 @@ def display_key_tables() -> None:
|
|
| 183 |
# Symbols
|
| 184 |
st.write("#### Symbols")
|
| 185 |
symbol_data = {
|
| 186 |
-
"Symbol": ["
|
| 187 |
"Meaning": [
|
| 188 |
"Breaching data point: This data point exceeds the threshold.",
|
| 189 |
"Missing data point: This data point is missing or not reported.",
|
|
@@ -194,14 +197,22 @@ def display_key_tables() -> None:
|
|
| 194 |
st.table(symbol_df)
|
| 195 |
|
| 196 |
# Columns
|
| 197 |
-
st.write("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
column_data = {
|
| 199 |
-
"
|
| 200 |
-
"
|
| 201 |
-
"
|
| 202 |
-
"
|
| 203 |
-
"
|
| 204 |
-
"
|
| 205 |
]
|
| 206 |
}
|
| 207 |
column_df = pd.DataFrame(column_data)
|
|
|
|
| 4 |
from datetime import datetime, time, date
|
| 5 |
from typing import List, Dict, Any, Tuple
|
| 6 |
from utils import generate_random_data, calculate_percentile, evaluate_alarm_state, aggregate_data
|
| 7 |
+
from textwrap import dedent
|
| 8 |
|
| 9 |
# Constants
|
| 10 |
HARD_CODED_DATE = date(2024, 7, 26)
|
|
|
|
| 85 |
period_length_input = st.selectbox("Period Length", ['1min', '5min', '15min'], key='period_length_input', help="Select the period length for aggregating the summary data.")
|
| 86 |
if not st.session_state.df.empty:
|
| 87 |
st.session_state.summary_by_period_df = aggregate_data(st.session_state.df, period_length_input)
|
| 88 |
+
else:
|
| 89 |
+
st.warning("No data available to aggregate.")
|
| 90 |
|
| 91 |
def alarm_state_form() -> None:
|
| 92 |
+
threshold_input = st.slider("Threshold (ms)", min_value=50, max_value=300, value=150, key='threshold_input', help="Specify the threshold value for evaluating the alarm state.")
|
| 93 |
datapoints_to_alarm_input = st.number_input("Datapoints to Alarm", min_value=1, value=3, key='datapoints_to_alarm_input', help="Specify the number of data points required to trigger an alarm.")
|
| 94 |
evaluation_range_input = st.number_input("Evaluation Range", min_value=1, value=5, key='evaluation_range_input', help="Specify the range of data points to evaluate for alarm state.")
|
| 95 |
aggregation_function_input = st.selectbox(
|
|
|
|
| 186 |
# Symbols
|
| 187 |
st.write("#### Symbols")
|
| 188 |
symbol_data = {
|
| 189 |
+
"Symbol": ["🔴", "⚫️", "🟢"],
|
| 190 |
"Meaning": [
|
| 191 |
"Breaching data point: This data point exceeds the threshold.",
|
| 192 |
"Missing data point: This data point is missing or not reported.",
|
|
|
|
| 197 |
st.table(symbol_df)
|
| 198 |
|
| 199 |
# Columns
|
| 200 |
+
st.write(dedent("""\
|
| 201 |
+
#### Columns: Strategies for handling missing data points [docs](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/AlarmThatSendsEmail.html#alarms-and-missing-data)
|
| 202 |
+
|
| 203 |
+
Sometimes, no metric events may have been reported during a given time period. In this case,
|
| 204 |
+
you must decide how you will treat missing data points. Ignore it? Or consider it a failure.
|
| 205 |
+
|
| 206 |
+
Here are the 4 supported strategies in AWS:
|
| 207 |
+
"""))
|
| 208 |
+
|
| 209 |
column_data = {
|
| 210 |
+
"Strategy": ["missing", "ignore", "breaching", "notBreaching"],
|
| 211 |
+
"Explanation": [
|
| 212 |
+
"If all data points in the alarm evaluation range are missing, the alarm transitions to INSUFFICIENT_DATA. Possible values: INSUFFICIENT_DATA, Retain current state, ALARM, OK.",
|
| 213 |
+
"The current alarm state is maintained. Possible values: Retain current state, ALARM, OK.",
|
| 214 |
+
"Missing data points are treated as \"bad\" and breaching the threshold. Possible values: ALARM, OK.",
|
| 215 |
+
"Missing data points are treated as \"good\" and within the threshold. Possible values: ALARM, OK."
|
| 216 |
]
|
| 217 |
}
|
| 218 |
column_df = pd.DataFrame(column_data)
|
utils.py
CHANGED
|
@@ -42,8 +42,7 @@ def calculate_percentile(
|
|
| 42 |
freq: str,
|
| 43 |
percentile: float
|
| 44 |
) -> pd.DataFrame:
|
| 45 |
-
percentile_df: pd.DataFrame = df.groupby(pd.Grouper(key='Timestamp', freq=freq))["ResponseTime(ms)"]
|
| 46 |
-
.quantile(percentile).reset_index(name=f"p{int(percentile * 100)}_ResponseTime(ms)")
|
| 47 |
percentile_df.replace(to_replace=np.nan, value=None, inplace=True)
|
| 48 |
return percentile_df
|
| 49 |
|
|
@@ -51,13 +50,16 @@ def aggregate_data(
|
|
| 51 |
df: pd.DataFrame,
|
| 52 |
period_length: str
|
| 53 |
) -> pd.DataFrame:
|
|
|
|
|
|
|
|
|
|
| 54 |
aggregation_funcs = {
|
| 55 |
-
'p50': lambda x: np.percentile(x.dropna(), 50),
|
| 56 |
-
'p95': lambda x: np.percentile(x.dropna(), 95),
|
| 57 |
-
'p99': lambda x: np.percentile(x.dropna(), 99),
|
| 58 |
-
'max': lambda x: np.max(x.dropna()),
|
| 59 |
-
'min': lambda x: np.min(x.dropna()),
|
| 60 |
-
'average': lambda x: np.mean(x.dropna())
|
| 61 |
}
|
| 62 |
|
| 63 |
summary_df = df.groupby(pd.Grouper(key='Timestamp', freq=period_length)).agg(
|
|
@@ -111,30 +113,31 @@ def evaluate_alarm_state(
|
|
| 111 |
|
| 112 |
for dp in chunk:
|
| 113 |
if dp is None:
|
| 114 |
-
|
| 115 |
elif check_condition(dp, threshold, alarm_condition):
|
| 116 |
-
|
| 117 |
else:
|
| 118 |
-
|
|
|
|
| 119 |
|
| 120 |
if len(chunk) < evaluation_range:
|
| 121 |
-
data_point_repr += '
|
| 122 |
|
| 123 |
-
if data_point_repr.count('
|
| 124 |
-
num_dp_that_must_be_filled = datapoints_to_alarm - sum([data_point_repr.count('
|
| 125 |
|
| 126 |
data_table_dict["DataPoints"].append(data_point_repr)
|
| 127 |
data_table_dict["# of data points that must be filled"].append(num_dp_that_must_be_filled)
|
| 128 |
|
| 129 |
if num_dp_that_must_be_filled > 0:
|
| 130 |
-
data_table_dict["MISSING"].append("INSUFFICIENT_DATA" if data_point_repr.count('
|
| 131 |
data_table_dict["IGNORE"].append("Retain current state")
|
| 132 |
data_table_dict["BREACHING"].append("ALARM")
|
| 133 |
data_table_dict["NOT BREACHING"].append("OK")
|
| 134 |
else:
|
| 135 |
data_table_dict["MISSING"].append("OK")
|
| 136 |
data_table_dict["IGNORE"].append("Retain current state")
|
| 137 |
-
data_table_dict["BREACHING"].append("ALARM" if '
|
| 138 |
-
data_table_dict["NOT BREACHING"].append("ALARM" if '
|
| 139 |
|
| 140 |
return pd.DataFrame(data_table_dict)
|
|
|
|
| 42 |
freq: str,
|
| 43 |
percentile: float
|
| 44 |
) -> pd.DataFrame:
|
| 45 |
+
percentile_df: pd.DataFrame = df.groupby(pd.Grouper(key='Timestamp', freq=freq))["ResponseTime(ms)"] .quantile(percentile).reset_index(name=f"p{int(percentile * 100)}_ResponseTime(ms)")
|
|
|
|
| 46 |
percentile_df.replace(to_replace=np.nan, value=None, inplace=True)
|
| 47 |
return percentile_df
|
| 48 |
|
|
|
|
| 50 |
df: pd.DataFrame,
|
| 51 |
period_length: str
|
| 52 |
) -> pd.DataFrame:
|
| 53 |
+
if df.empty:
|
| 54 |
+
return pd.DataFrame() # Return an empty DataFrame if input is empty
|
| 55 |
+
|
| 56 |
aggregation_funcs = {
|
| 57 |
+
'p50': lambda x: np.percentile(x.dropna(), 50) if not x.dropna().empty else np.nan,
|
| 58 |
+
'p95': lambda x: np.percentile(x.dropna(), 95) if not x.dropna().empty else np.nan,
|
| 59 |
+
'p99': lambda x: np.percentile(x.dropna(), 99) if not x.dropna().empty else np.nan,
|
| 60 |
+
'max': lambda x: np.max(x.dropna()) if not x.dropna().empty else np.nan,
|
| 61 |
+
'min': lambda x: np.min(x.dropna()) if not x.dropna().empty else np.nan,
|
| 62 |
+
'average': lambda x: np.mean(x.dropna()) if not x.dropna().empty else np.nan
|
| 63 |
}
|
| 64 |
|
| 65 |
summary_df = df.groupby(pd.Grouper(key='Timestamp', freq=period_length)).agg(
|
|
|
|
| 113 |
|
| 114 |
for dp in chunk:
|
| 115 |
if dp is None:
|
| 116 |
+
dp_symbol = '⚫️'
|
| 117 |
elif check_condition(dp, threshold, alarm_condition):
|
| 118 |
+
dp_symbol = '🔴'
|
| 119 |
else:
|
| 120 |
+
dp_symbol = '🟢'
|
| 121 |
+
data_point_repr += dp_symbol
|
| 122 |
|
| 123 |
if len(chunk) < evaluation_range:
|
| 124 |
+
data_point_repr += '⚫️' * (evaluation_range - len(chunk))
|
| 125 |
|
| 126 |
+
if data_point_repr.count('⚫️') > (evaluation_range - datapoints_to_alarm):
|
| 127 |
+
num_dp_that_must_be_filled = datapoints_to_alarm - sum([data_point_repr.count('🟢'), data_point_repr.count('🔴')])
|
| 128 |
|
| 129 |
data_table_dict["DataPoints"].append(data_point_repr)
|
| 130 |
data_table_dict["# of data points that must be filled"].append(num_dp_that_must_be_filled)
|
| 131 |
|
| 132 |
if num_dp_that_must_be_filled > 0:
|
| 133 |
+
data_table_dict["MISSING"].append("INSUFFICIENT_DATA" if data_point_repr.count('⚫️') == evaluation_range else "Retain current state")
|
| 134 |
data_table_dict["IGNORE"].append("Retain current state")
|
| 135 |
data_table_dict["BREACHING"].append("ALARM")
|
| 136 |
data_table_dict["NOT BREACHING"].append("OK")
|
| 137 |
else:
|
| 138 |
data_table_dict["MISSING"].append("OK")
|
| 139 |
data_table_dict["IGNORE"].append("Retain current state")
|
| 140 |
+
data_table_dict["BREACHING"].append("ALARM" if '🔴' * datapoints_to_alarm in data_point_repr else "OK")
|
| 141 |
+
data_table_dict["NOT BREACHING"].append("ALARM" if '🟢' * datapoints_to_alarm not in data_point_repr else "OK")
|
| 142 |
|
| 143 |
return pd.DataFrame(data_table_dict)
|