Upload 5 files
Browse files- src/package/ARLTheoretical.py +13 -0
- src/package/app.py +112 -38
- src/package/cusum.py +102 -88
src/package/ARLTheoretical.py
CHANGED
|
@@ -36,6 +36,19 @@ def get_ref_value_k(h: float, ARL_0: float) -> float:
|
|
| 36 |
|
| 37 |
return k
|
| 38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
def get_ref_value(
|
| 41 |
h: float, list_ARL_0: list[float]
|
|
|
|
| 36 |
|
| 37 |
return k
|
| 38 |
|
| 39 |
+
def get_threshold_h(k: float, ARL_0: float) -> float:
|
| 40 |
+
"""
|
| 41 |
+
Calculation for the threshold h for given k and ARL_0.
|
| 42 |
+
|
| 43 |
+
Args:
|
| 44 |
+
k (float): Normalized reference value.
|
| 45 |
+
ARL_0 (float): ARL0 value.
|
| 46 |
+
|
| 47 |
+
Returns:
|
| 48 |
+
float: Normalized threshold h.
|
| 49 |
+
"""
|
| 50 |
+
h = np.round(spc.xcusum_crit_(k, ARL_0, mu0=0, hs=0, sided="one", r=30), decimals=4).tolist()[0]
|
| 51 |
+
return h
|
| 52 |
|
| 53 |
def get_ref_value(
|
| 54 |
h: float, list_ARL_0: list[float]
|
src/package/app.py
CHANGED
|
@@ -11,7 +11,13 @@ import pandas as pd
|
|
| 11 |
import gradio as gr
|
| 12 |
import tomli
|
| 13 |
from cusum import CUSUM
|
| 14 |
-
from ARLTheoretical import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
from utils import (
|
| 16 |
populate_summary_table_ARL0_k,
|
| 17 |
populate_summary_table_ARL1_k,
|
|
@@ -81,7 +87,7 @@ def populate_table(h: str) -> tuple[gt.GT, gt.GT]:
|
|
| 81 |
), populate_summary_table_ARL1_k(summary_table_df_ARL1_k, dict_ARL0_k, h)
|
| 82 |
|
| 83 |
|
| 84 |
-
def calculate_reference_value_k(h: str, arl_0: str) ->
|
| 85 |
"""
|
| 86 |
Gets the reference value for given h and ARL_0.
|
| 87 |
|
|
@@ -90,7 +96,7 @@ def calculate_reference_value_k(h: str, arl_0: str) -> float:
|
|
| 90 |
arl_0 (str): ARL0 value.
|
| 91 |
|
| 92 |
Returns:
|
| 93 |
-
|
| 94 |
"""
|
| 95 |
h = float(h)
|
| 96 |
arl_0 = float(arl_0)
|
|
@@ -98,7 +104,27 @@ def calculate_reference_value_k(h: str, arl_0: str) -> float:
|
|
| 98 |
k = get_ref_value_k(h=h, ARL_0=arl_0)
|
| 99 |
k = "{:.2f}".format(k)
|
| 100 |
|
| 101 |
-
return k, k, k
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
|
| 103 |
|
| 104 |
def calculate_arl1_h_k_mu1(h: str, k: str, mu1: str) -> float:
|
|
@@ -171,7 +197,7 @@ with gr.Blocks(
|
|
| 171 |
|
| 172 |
gr.Markdown(f"""
|
| 173 |
### AIM-CU Input:
|
| 174 |
-
AI output (e.g.
|
| 175 |
""") # noqa: F541
|
| 176 |
|
| 177 |
with gr.Row():
|
|
@@ -181,7 +207,7 @@ with gr.Blocks(
|
|
| 181 |
""") # noqa: F541
|
| 182 |
|
| 183 |
gr.Markdown(f"""
|
| 184 |
-
### Upload the AI output.
|
| 185 |
""") # noqa: F541
|
| 186 |
|
| 187 |
# load the CSV file with specifities across days
|
|
@@ -191,7 +217,11 @@ with gr.Blocks(
|
|
| 191 |
|
| 192 |
with gr.Row():
|
| 193 |
with gr.Column():
|
| 194 |
-
init_days = gr.Textbox(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
with gr.Column():
|
| 196 |
button_calculate_incontrol_params = gr.Button(
|
| 197 |
"Calculate parameters"
|
|
@@ -213,16 +243,9 @@ with gr.Blocks(
|
|
| 213 |
Parameter choices for detecting change and detection delay estimates (theoretical calculations).
|
| 214 |
""") # noqa: F541
|
| 215 |
|
| 216 |
-
gr.Markdown(f"""
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
h_phase1 = gr.Textbox(
|
| 221 |
-
label="h value =",
|
| 222 |
-
placeholder="h = normalized threshold, default = 4. Range: between 4 and 5 ([4, 5])",
|
| 223 |
-
value="3",
|
| 224 |
-
autofocus=True,
|
| 225 |
-
)
|
| 226 |
|
| 227 |
dataframe_gt_ref_value = gr.HTML(
|
| 228 |
label="Reference Values for an intended ARL0 with normalized threshold h",
|
|
@@ -231,44 +254,81 @@ with gr.Blocks(
|
|
| 231 |
)
|
| 232 |
|
| 233 |
gr.Markdown(f"""
|
| 234 |
-
### Calculate
|
| 235 |
""") # noqa: F541
|
| 236 |
|
| 237 |
-
with gr.
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 241 |
|
| 242 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 243 |
|
| 244 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 245 |
|
| 246 |
dataframe_gt_ARL0 = gr.HTML(
|
| 247 |
-
label="Estimate of steady state ARL (
|
| 248 |
show_label=True,
|
| 249 |
visible=False,
|
| 250 |
)
|
| 251 |
|
| 252 |
gr.Markdown(f"""
|
| 253 |
-
### Calculate ARL<sub>1</sub> for
|
| 254 |
""") # noqa: F541
|
| 255 |
|
| 256 |
with gr.Row():
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
k_phase1 = gr.Textbox(
|
| 258 |
label="k value =", placeholder="k", value="0.2996"
|
| 259 |
)
|
|
|
|
|
|
|
|
|
|
| 260 |
mu1 = gr.Textbox(
|
| 261 |
-
label="Shift in mean value =",
|
| 262 |
placeholder="Shift in mean value",
|
| 263 |
value="1.2",
|
| 264 |
)
|
| 265 |
|
| 266 |
-
button_calculate_ARL_1 = gr.Button("Calculate
|
| 267 |
|
| 268 |
-
output_ARL_1 = gr.Textbox(label="Calculated
|
| 269 |
|
| 270 |
button_populate_table = gr.Button(
|
| 271 |
-
"Populate Reference Values and
|
| 272 |
)
|
| 273 |
|
| 274 |
gr.Markdown(f"""
|
|
@@ -283,10 +343,6 @@ with gr.Blocks(
|
|
| 283 |
- Enter h and k values.
|
| 284 |
- Get CUSUM plots.
|
| 285 |
""") # noqa: F541
|
| 286 |
-
|
| 287 |
-
table_param_description = gr.Dataframe(
|
| 288 |
-
value=pd.read_csv("../../assets/params.csv")
|
| 289 |
-
)
|
| 290 |
with gr.Column():
|
| 291 |
gr.Markdown(f"""
|
| 292 |
### Monitoring:
|
|
@@ -301,7 +357,7 @@ with gr.Blocks(
|
|
| 301 |
h_phase2 = gr.Textbox(
|
| 302 |
label="h value =",
|
| 303 |
placeholder="normalized threshold, default = 4. Range: between 4 and 5 ([4, 5])",
|
| 304 |
-
value="
|
| 305 |
)
|
| 306 |
|
| 307 |
k_phase2 = gr.Textbox(
|
|
@@ -312,11 +368,17 @@ with gr.Blocks(
|
|
| 312 |
|
| 313 |
button_csv_metric = gr.Button("Show CUSUM plots")
|
| 314 |
|
|
|
|
|
|
|
| 315 |
plot_avg_metric = gr.Plot(
|
| 316 |
label="AI model performance",
|
| 317 |
visible=False,
|
| 318 |
)
|
| 319 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 320 |
|
| 321 |
button_calculate_incontrol_params.click(
|
| 322 |
fn=set_init_days,
|
|
@@ -341,9 +403,21 @@ with gr.Blocks(
|
|
| 341 |
fn=lambda: gr.update(visible=True), inputs=[], outputs=dataframe_gt_ARL0
|
| 342 |
)
|
| 343 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 344 |
# Calculate specific k for ARL_0
|
| 345 |
button_calculate_k.click(
|
| 346 |
-
fn=calculate_reference_value_k,
|
|
|
|
|
|
|
| 347 |
)
|
| 348 |
button_calculate_k.click(
|
| 349 |
fn=lambda: gr.update(visible=True), inputs=[], outputs=output_k
|
|
@@ -352,7 +426,7 @@ with gr.Blocks(
|
|
| 352 |
# Calculate specific ARL_1 for value h, value k and shift in mean
|
| 353 |
button_calculate_ARL_1.click(
|
| 354 |
fn=calculate_arl1_h_k_mu1,
|
| 355 |
-
inputs=[
|
| 356 |
outputs=[output_ARL_1],
|
| 357 |
)
|
| 358 |
button_calculate_ARL_1.click(
|
|
|
|
| 11 |
import gradio as gr
|
| 12 |
import tomli
|
| 13 |
from cusum import CUSUM
|
| 14 |
+
from ARLTheoretical import (
|
| 15 |
+
get_ref_value,
|
| 16 |
+
get_ref_value_k,
|
| 17 |
+
get_ARL_1,
|
| 18 |
+
get_ARL_1_h_mu1_k,
|
| 19 |
+
get_threshold_h,
|
| 20 |
+
)
|
| 21 |
from utils import (
|
| 22 |
populate_summary_table_ARL0_k,
|
| 23 |
populate_summary_table_ARL1_k,
|
|
|
|
| 87 |
), populate_summary_table_ARL1_k(summary_table_df_ARL1_k, dict_ARL0_k, h)
|
| 88 |
|
| 89 |
|
| 90 |
+
def calculate_reference_value_k(h: str, arl_0: str) -> tuple[str, str, str, str]:
|
| 91 |
"""
|
| 92 |
Gets the reference value for given h and ARL_0.
|
| 93 |
|
|
|
|
| 96 |
arl_0 (str): ARL0 value.
|
| 97 |
|
| 98 |
Returns:
|
| 99 |
+
tuple[str, str, str, str]: Normalized reference value k (for output, k_phase1, k_phase2, h_phase2).
|
| 100 |
"""
|
| 101 |
h = float(h)
|
| 102 |
arl_0 = float(arl_0)
|
|
|
|
| 104 |
k = get_ref_value_k(h=h, ARL_0=arl_0)
|
| 105 |
k = "{:.2f}".format(k)
|
| 106 |
|
| 107 |
+
return k, k, k, h, h
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
def calculate_threshold_h(k: str, arl_0: str) -> tuple[str, str, str, str]:
|
| 111 |
+
"""
|
| 112 |
+
Gets the threshold h for given k and ARL_0.
|
| 113 |
+
|
| 114 |
+
Args:
|
| 115 |
+
k (str): Normalized reference value.
|
| 116 |
+
arl_0 (str): ARL0 value.
|
| 117 |
+
|
| 118 |
+
Returns:
|
| 119 |
+
tuple[str, str, str, str]: Normalized threshold h (for output, h_phase1, h_phase2, k_phase2).
|
| 120 |
+
"""
|
| 121 |
+
k_val = float(k)
|
| 122 |
+
arl_0 = float(arl_0)
|
| 123 |
+
|
| 124 |
+
h = get_threshold_h(k=k_val, ARL_0=arl_0)
|
| 125 |
+
h = "{:.2f}".format(h)
|
| 126 |
+
|
| 127 |
+
return h, h, h, k, h, k
|
| 128 |
|
| 129 |
|
| 130 |
def calculate_arl1_h_k_mu1(h: str, k: str, mu1: str) -> float:
|
|
|
|
| 197 |
|
| 198 |
gr.Markdown(f"""
|
| 199 |
### AIM-CU Input:
|
| 200 |
+
AI output metric (e.g. AUROC, F1-score, Sensitivity, Test Positive Rate, etc.)
|
| 201 |
""") # noqa: F541
|
| 202 |
|
| 203 |
with gr.Row():
|
|
|
|
| 207 |
""") # noqa: F541
|
| 208 |
|
| 209 |
gr.Markdown(f"""
|
| 210 |
+
### Upload the AI output metric.
|
| 211 |
""") # noqa: F541
|
| 212 |
|
| 213 |
# load the CSV file with specifities across days
|
|
|
|
| 217 |
|
| 218 |
with gr.Row():
|
| 219 |
with gr.Column():
|
| 220 |
+
init_days = gr.Textbox(
|
| 221 |
+
label="Number of baseline observations",
|
| 222 |
+
placeholder="30",
|
| 223 |
+
value="30",
|
| 224 |
+
)
|
| 225 |
with gr.Column():
|
| 226 |
button_calculate_incontrol_params = gr.Button(
|
| 227 |
"Calculate parameters"
|
|
|
|
| 243 |
Parameter choices for detecting change and detection delay estimates (theoretical calculations).
|
| 244 |
""") # noqa: F541
|
| 245 |
|
| 246 |
+
# gr.Markdown(f"""
|
| 247 |
+
# ### Enter h value:
|
| 248 |
+
# """) # noqa: F541
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 249 |
|
| 250 |
dataframe_gt_ref_value = gr.HTML(
|
| 251 |
label="Reference Values for an intended ARL0 with normalized threshold h",
|
|
|
|
| 254 |
)
|
| 255 |
|
| 256 |
gr.Markdown(f"""
|
| 257 |
+
### Calculate parameters:
|
| 258 |
""") # noqa: F541
|
| 259 |
|
| 260 |
+
with gr.Tabs():
|
| 261 |
+
with gr.Tab("Specify k first"):
|
| 262 |
+
gr.Markdown(f"""
|
| 263 |
+
Calculate threshold h for specific values of k and ARL<sub>0</sub>:
|
| 264 |
+
""") # noqa: F541
|
| 265 |
+
|
| 266 |
+
with gr.Row():
|
| 267 |
+
k_for_h = gr.Textbox(
|
| 268 |
+
label="k value =", placeholder="k", value="0.5"
|
| 269 |
+
)
|
| 270 |
+
arl_0_for_h = gr.Textbox(
|
| 271 |
+
label="ARL₀ value =", placeholder="ARL₀", value="100"
|
| 272 |
+
)
|
| 273 |
+
|
| 274 |
+
button_calculate_h = gr.Button("Calculate h")
|
| 275 |
+
|
| 276 |
+
output_h = gr.Textbox(label="Calculated h =", visible=False)
|
| 277 |
+
|
| 278 |
+
with gr.Tab("Specify h first"):
|
| 279 |
+
gr.Markdown(f"""
|
| 280 |
+
Calculate reference value k for specific values of h and ARL<sub>0</sub>:
|
| 281 |
+
""") # noqa: F541
|
| 282 |
|
| 283 |
+
with gr.Row():
|
| 284 |
+
h_phase1 = gr.Textbox(
|
| 285 |
+
label="h value =",
|
| 286 |
+
placeholder="h = normalized threshold, default = 4. Range: between 4 and 5 ([4, 5])",
|
| 287 |
+
value="4",
|
| 288 |
+
autofocus=True,
|
| 289 |
+
)
|
| 290 |
|
| 291 |
+
arl_0 = gr.Textbox(
|
| 292 |
+
label="ARL₀ value =", placeholder="ARL₀", value="100"
|
| 293 |
+
)
|
| 294 |
+
|
| 295 |
+
button_calculate_k = gr.Button("Calculate k")
|
| 296 |
+
|
| 297 |
+
output_k = gr.Textbox(label="Calculated k =", visible=False)
|
| 298 |
|
| 299 |
dataframe_gt_ARL0 = gr.HTML(
|
| 300 |
+
label="Estimate of steady state ARL (ARL₁ based on the computed reference values and intended zero-state ARL (ARL₀) with normalized threshold h)",
|
| 301 |
show_label=True,
|
| 302 |
visible=False,
|
| 303 |
)
|
| 304 |
|
| 305 |
gr.Markdown(f"""
|
| 306 |
+
### Calculate ARL<sub>1</sub> for threshold h, reference value k, and shift in mean:
|
| 307 |
""") # noqa: F541
|
| 308 |
|
| 309 |
with gr.Row():
|
| 310 |
+
h_for_arl1 = gr.Textbox(
|
| 311 |
+
label="h value =", placeholder="h", value="4"
|
| 312 |
+
)
|
| 313 |
+
|
| 314 |
k_phase1 = gr.Textbox(
|
| 315 |
label="k value =", placeholder="k", value="0.2996"
|
| 316 |
)
|
| 317 |
+
|
| 318 |
+
# example: if std_in=0.03 and shift in mean (in original data)=0.045, then the value that the user enter will be 0.045/0.03=1.5
|
| 319 |
+
# Shift in mean value is the absolute differece of in-control mean and test mean
|
| 320 |
mu1 = gr.Textbox(
|
| 321 |
+
label="Shift in mean value (expressed in term of in-control standard deviation) =",
|
| 322 |
placeholder="Shift in mean value",
|
| 323 |
value="1.2",
|
| 324 |
)
|
| 325 |
|
| 326 |
+
button_calculate_ARL_1 = gr.Button("Calculate ARL₁")
|
| 327 |
|
| 328 |
+
output_ARL_1 = gr.Textbox(label="Calculated ARL₁ =", visible=False)
|
| 329 |
|
| 330 |
button_populate_table = gr.Button(
|
| 331 |
+
"Populate Reference Values and ARL₁ tables for the given h value"
|
| 332 |
)
|
| 333 |
|
| 334 |
gr.Markdown(f"""
|
|
|
|
| 343 |
- Enter h and k values.
|
| 344 |
- Get CUSUM plots.
|
| 345 |
""") # noqa: F541
|
|
|
|
|
|
|
|
|
|
|
|
|
| 346 |
with gr.Column():
|
| 347 |
gr.Markdown(f"""
|
| 348 |
### Monitoring:
|
|
|
|
| 357 |
h_phase2 = gr.Textbox(
|
| 358 |
label="h value =",
|
| 359 |
placeholder="normalized threshold, default = 4. Range: between 4 and 5 ([4, 5])",
|
| 360 |
+
value="4",
|
| 361 |
)
|
| 362 |
|
| 363 |
k_phase2 = gr.Textbox(
|
|
|
|
| 368 |
|
| 369 |
button_csv_metric = gr.Button("Show CUSUM plots")
|
| 370 |
|
| 371 |
+
plot_cusum_chart = gr.Plot(label="CUSUM Chart", visible=False)
|
| 372 |
+
|
| 373 |
plot_avg_metric = gr.Plot(
|
| 374 |
label="AI model performance",
|
| 375 |
visible=False,
|
| 376 |
)
|
| 377 |
+
|
| 378 |
+
with gr.Row():
|
| 379 |
+
table_param_description = gr.Dataframe(
|
| 380 |
+
value=pd.read_csv("../../assets/params.csv"),
|
| 381 |
+
)
|
| 382 |
|
| 383 |
button_calculate_incontrol_params.click(
|
| 384 |
fn=set_init_days,
|
|
|
|
| 403 |
fn=lambda: gr.update(visible=True), inputs=[], outputs=dataframe_gt_ARL0
|
| 404 |
)
|
| 405 |
|
| 406 |
+
# Calculate specific h for k and ARL_0
|
| 407 |
+
button_calculate_h.click(
|
| 408 |
+
fn=calculate_threshold_h,
|
| 409 |
+
inputs=[k_for_h, arl_0_for_h],
|
| 410 |
+
outputs=[output_h, h_phase1, h_phase2, k_phase2, h_for_arl1, k_phase1],
|
| 411 |
+
)
|
| 412 |
+
button_calculate_h.click(
|
| 413 |
+
fn=lambda: gr.update(visible=True), inputs=[], outputs=output_h
|
| 414 |
+
)
|
| 415 |
+
|
| 416 |
# Calculate specific k for ARL_0
|
| 417 |
button_calculate_k.click(
|
| 418 |
+
fn=calculate_reference_value_k,
|
| 419 |
+
inputs=[h_phase1, arl_0],
|
| 420 |
+
outputs=[output_k, k_phase1, k_phase2, h_phase2, h_for_arl1],
|
| 421 |
)
|
| 422 |
button_calculate_k.click(
|
| 423 |
fn=lambda: gr.update(visible=True), inputs=[], outputs=output_k
|
|
|
|
| 426 |
# Calculate specific ARL_1 for value h, value k and shift in mean
|
| 427 |
button_calculate_ARL_1.click(
|
| 428 |
fn=calculate_arl1_h_k_mu1,
|
| 429 |
+
inputs=[h_for_arl1, k_phase1, mu1],
|
| 430 |
outputs=[output_ARL_1],
|
| 431 |
)
|
| 432 |
button_calculate_ARL_1.click(
|
src/package/cusum.py
CHANGED
|
@@ -111,7 +111,7 @@ class CUSUM:
|
|
| 111 |
self.set_timeline(self.data)
|
| 112 |
|
| 113 |
def compute_cusum(
|
| 114 |
-
self, x: list[float], mu_0: float,
|
| 115 |
) -> tuple[list[float], list[float], list[float]]:
|
| 116 |
"""
|
| 117 |
Compute CUSUM for the observations in x
|
|
@@ -119,7 +119,7 @@ class CUSUM:
|
|
| 119 |
Args:
|
| 120 |
x (list[float]): Performance metric to be monitored
|
| 121 |
mu_0 (float) : In-control mean of the observations/performance metric
|
| 122 |
-
|
| 123 |
|
| 124 |
Returns:
|
| 125 |
tuple[list[float], list[float], list[float]]: Positive cumulative sum, negative cumulative sum, and CUSUM
|
|
@@ -130,10 +130,10 @@ class CUSUM:
|
|
| 130 |
# S_hi : sum of positive changes --------------------------
|
| 131 |
self.S_hi = np.zeros(num_rows, dtype=float)
|
| 132 |
self.S_hi[0] = 0.0 # starts with 0
|
| 133 |
-
# Increase in mean = x-mu-
|
| 134 |
mean_hi = np.zeros(num_rows, dtype=float)
|
| 135 |
|
| 136 |
-
# Decrease in mean = mu-
|
| 137 |
mean_lo = np.zeros(num_rows, dtype=float)
|
| 138 |
# S_lo : sum of negative changes --------------------------
|
| 139 |
self.S_lo = np.zeros(num_rows, dtype=float)
|
|
@@ -144,9 +144,9 @@ class CUSUM:
|
|
| 144 |
|
| 145 |
for i in range(0, num_rows):
|
| 146 |
x_mean[i] = x[i] - mu_0 # x - mean
|
| 147 |
-
mean_hi[i] = x[i] - mu_0 -
|
| 148 |
self.S_hi[i] = max(0, self.S_hi[i - 1] + mean_hi[i])
|
| 149 |
-
mean_lo[i] = mu_0 -
|
| 150 |
self.S_lo[i] = max(0, self.S_lo[i - 1] + mean_lo[i])
|
| 151 |
cusum[i] = cusum[i - 1] + x_mean[i]
|
| 152 |
|
|
@@ -171,9 +171,8 @@ class CUSUM:
|
|
| 171 |
normalized_ref_value (float, optional): Normalized reference value for detecting a unit standard deviation change in mean of the process. Defaults to 0.5.
|
| 172 |
normalized_threshold (float, optional): Normalized threshold. Defaults to 4.
|
| 173 |
"""
|
| 174 |
-
self.pre_change_days = self.init_days # This is the number of baseline observations that we assume to be in-control - user enters or default = 30
|
| 175 |
|
| 176 |
-
ref_val = normalized_ref_value
|
| 177 |
control_limit = normalized_threshold
|
| 178 |
|
| 179 |
DetectionTimes = np.array([], dtype=int)
|
|
@@ -188,80 +187,65 @@ class CUSUM:
|
|
| 188 |
self.AvgDD = np.array([]) # Average Detection Delay
|
| 189 |
|
| 190 |
self.H = control_limit * self.in_std # Threhold
|
| 191 |
-
|
| 192 |
|
| 193 |
x = np.array(self.data)
|
| 194 |
|
| 195 |
-
# Call compute CUSUM function with x (observatoins), in-control mean (mu) and
|
| 196 |
-
self.S_hi, self.S_lo, cusum = self.compute_cusum(x, self.in_mu,
|
| 197 |
-
|
| 198 |
-
# Check the variations in self.S_hi and self.S_lo to determine whether there was a change in the data
|
| 199 |
-
S_hi_last_known_zero = np.where(self.S_hi == 0)[
|
| 200 |
-
|
| 201 |
-
] # Find all the indices where self.S_hi was 0
|
| 202 |
-
S_hi_start_of_change = (
|
| 203 |
-
|
| 204 |
-
) # Fetch the last entry where self.S_hi was 0
|
| 205 |
-
|
| 206 |
-
S_lo_last_known_zero = np.where(self.S_lo == 0)[
|
| 207 |
-
|
| 208 |
-
] # Find all the indices where self.S_lo was 0
|
| 209 |
-
S_lo_start_of_change = (
|
| 210 |
-
|
| 211 |
-
) # Fetch the last entry where self.S_lo was 0
|
| 212 |
-
|
| 213 |
-
# Display the print messages in the UI
|
| 214 |
-
if (S_lo_start_of_change < S_hi_start_of_change) and (
|
| 215 |
-
|
| 216 |
-
): # check if the changes in the next 10 observations exceed the threshold
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
elif (S_hi_start_of_change < S_lo_start_of_change) and (
|
| 223 |
-
|
| 224 |
-
):
|
| 225 |
-
|
| 226 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 227 |
else:
|
| 228 |
-
print(
|
| 229 |
-
|
| 230 |
-
# False positives and Total alarms
|
| 231 |
-
falsePos = 0
|
| 232 |
-
alarms = 0
|
| 233 |
-
avddd = 0 # this is the delay from the paper: td-ts (z_k-v) where v is the changepoint and z_k is the time of detection
|
| 234 |
-
|
| 235 |
-
for i in range(0, self.pre_change_days):
|
| 236 |
-
if (self.S_hi[i] > self.H) or (self.S_lo[i] > self.H):
|
| 237 |
-
falsePos += 1 # False Positives
|
| 238 |
-
DetectionTimes = np.append(
|
| 239 |
-
DetectionTimes, i + 1
|
| 240 |
-
) # time at which a false positive is detected
|
| 241 |
-
Dj = np.append(Dj, 1)
|
| 242 |
-
Zj = np.append(Zj, min(i, self.pre_change_days))
|
| 243 |
-
break
|
| 244 |
-
|
| 245 |
-
# If there is no false positive, Zj = pre_change_days, Dj = 0
|
| 246 |
-
if falsePos == 0:
|
| 247 |
-
Dj = np.append(Dj, 0)
|
| 248 |
-
Zj = np.append(Zj, self.pre_change_days)
|
| 249 |
-
|
| 250 |
-
# Delay to detect the first changepoint
|
| 251 |
-
# delay = 0
|
| 252 |
-
for i in range(self.pre_change_days, self.total_days):
|
| 253 |
-
if (self.S_hi[i] > self.H) or (self.S_lo[i] > self.H):
|
| 254 |
-
alarms += 1 # True Positive: break after detecting one TP
|
| 255 |
-
cj = np.append(cj, 1)
|
| 256 |
-
zj = np.append(zj, min(i, self.total_days) - self.pre_change_days)
|
| 257 |
-
break
|
| 258 |
-
|
| 259 |
-
# If there is no true detection, zj = total simulation days, cj = 0
|
| 260 |
-
if alarms == 0:
|
| 261 |
-
cj = np.append(cj, 0)
|
| 262 |
-
zj = np.append(zj, self.total_days)
|
| 263 |
-
|
| 264 |
-
self.AvgDD = np.append(self.AvgDD, avddd) # ADD estimate from the paper
|
| 265 |
|
| 266 |
def plot_input_metric_plotly_raw(self) -> go.Figure:
|
| 267 |
"""
|
|
@@ -364,7 +348,7 @@ class CUSUM:
|
|
| 364 |
fig = go.Figure()
|
| 365 |
|
| 366 |
font_size_title = 20
|
| 367 |
-
font_size_legend =
|
| 368 |
|
| 369 |
# add subplots
|
| 370 |
fig.add_trace(
|
|
@@ -382,7 +366,7 @@ class CUSUM:
|
|
| 382 |
x=x2,
|
| 383 |
y=y2,
|
| 384 |
mode="markers",
|
| 385 |
-
name=f"""
|
| 386 |
marker=dict(color="coral", size=10),
|
| 387 |
opacity=0.4,
|
| 388 |
),
|
|
@@ -403,7 +387,7 @@ class CUSUM:
|
|
| 403 |
x=[min(x2), max(x2)],
|
| 404 |
y=[mean_y2, mean_y2],
|
| 405 |
mode="lines",
|
| 406 |
-
name="
|
| 407 |
line=dict(color="coral", dash="dash"),
|
| 408 |
),
|
| 409 |
)
|
|
@@ -414,16 +398,19 @@ class CUSUM:
|
|
| 414 |
x=[self.pre_change_days, self.pre_change_days],
|
| 415 |
y=[np.min(self.data), np.max(self.data)],
|
| 416 |
mode="lines",
|
| 417 |
-
name="
|
| 418 |
line=dict(color="grey", dash="dash"),
|
| 419 |
-
# textfont=dict(size=18)
|
| 420 |
),
|
| 421 |
)
|
| 422 |
|
| 423 |
fig.update_layout(
|
| 424 |
title={
|
| 425 |
-
"text": "
|
| 426 |
"font": {"size": font_size_title, "weight": "bold"},
|
|
|
|
|
|
|
|
|
|
|
|
|
| 427 |
},
|
| 428 |
xaxis_title={
|
| 429 |
"text": "Time",
|
|
@@ -434,12 +421,24 @@ class CUSUM:
|
|
| 434 |
"font": {"size": font_size_legend, "weight": "bold"},
|
| 435 |
},
|
| 436 |
xaxis=dict(dtick=20),
|
|
|
|
|
|
|
| 437 |
)
|
| 438 |
|
| 439 |
fig.update_layout(plot_bgcolor=self.config["color"]["blue_005"])
|
| 440 |
|
| 441 |
fig.update_layout(
|
| 442 |
-
legend=dict(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 443 |
)
|
| 444 |
|
| 445 |
if self.config["control"]["save_figure"] == "true":
|
|
@@ -478,7 +477,7 @@ class CUSUM:
|
|
| 478 |
fig = go.Figure()
|
| 479 |
|
| 480 |
font_size_title = 20
|
| 481 |
-
font_size_legend =
|
| 482 |
|
| 483 |
fig.add_trace(
|
| 484 |
go.Scatter(
|
|
@@ -514,6 +513,10 @@ class CUSUM:
|
|
| 514 |
title={
|
| 515 |
"text": "CUSUM Chart",
|
| 516 |
"font": {"size": font_size_title, "weight": "bold"},
|
|
|
|
|
|
|
|
|
|
|
|
|
| 517 |
},
|
| 518 |
xaxis_title={
|
| 519 |
"text": "Time",
|
|
@@ -524,9 +527,10 @@ class CUSUM:
|
|
| 524 |
"font": {"size": font_size_legend, "weight": "bold"},
|
| 525 |
},
|
| 526 |
xaxis=dict(dtick=20),
|
|
|
|
|
|
|
| 527 |
)
|
| 528 |
|
| 529 |
-
|
| 530 |
fig.add_shape(
|
| 531 |
type="rect",
|
| 532 |
x0=0, x1=self.pre_change_days,
|
|
@@ -550,7 +554,17 @@ class CUSUM:
|
|
| 550 |
)
|
| 551 |
|
| 552 |
fig.update_layout(
|
| 553 |
-
legend=dict(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 554 |
)
|
| 555 |
|
| 556 |
if self.config["control"]["save_figure"] == "true":
|
|
|
|
| 111 |
self.set_timeline(self.data)
|
| 112 |
|
| 113 |
def compute_cusum(
|
| 114 |
+
self, x: list[float], mu_0: float, ref_val: float
|
| 115 |
) -> tuple[list[float], list[float], list[float]]:
|
| 116 |
"""
|
| 117 |
Compute CUSUM for the observations in x
|
|
|
|
| 119 |
Args:
|
| 120 |
x (list[float]): Performance metric to be monitored
|
| 121 |
mu_0 (float) : In-control mean of the observations/performance metric
|
| 122 |
+
ref_val (float) : Reference value related to the magnitude of change that one is interested in detecting
|
| 123 |
|
| 124 |
Returns:
|
| 125 |
tuple[list[float], list[float], list[float]]: Positive cumulative sum, negative cumulative sum, and CUSUM
|
|
|
|
| 130 |
# S_hi : sum of positive changes --------------------------
|
| 131 |
self.S_hi = np.zeros(num_rows, dtype=float)
|
| 132 |
self.S_hi[0] = 0.0 # starts with 0
|
| 133 |
+
# Increase in mean = x-mu-ref_val ----------------------------
|
| 134 |
mean_hi = np.zeros(num_rows, dtype=float)
|
| 135 |
|
| 136 |
+
# Decrease in mean = mu-ref_val-x----------------------------
|
| 137 |
mean_lo = np.zeros(num_rows, dtype=float)
|
| 138 |
# S_lo : sum of negative changes --------------------------
|
| 139 |
self.S_lo = np.zeros(num_rows, dtype=float)
|
|
|
|
| 144 |
|
| 145 |
for i in range(0, num_rows):
|
| 146 |
x_mean[i] = x[i] - mu_0 # x - mean
|
| 147 |
+
mean_hi[i] = x[i] - mu_0 - ref_val
|
| 148 |
self.S_hi[i] = max(0, self.S_hi[i - 1] + mean_hi[i])
|
| 149 |
+
mean_lo[i] = mu_0 - ref_val - x[i]
|
| 150 |
self.S_lo[i] = max(0, self.S_lo[i - 1] + mean_lo[i])
|
| 151 |
cusum[i] = cusum[i - 1] + x_mean[i]
|
| 152 |
|
|
|
|
| 171 |
normalized_ref_value (float, optional): Normalized reference value for detecting a unit standard deviation change in mean of the process. Defaults to 0.5.
|
| 172 |
normalized_threshold (float, optional): Normalized threshold. Defaults to 4.
|
| 173 |
"""
|
| 174 |
+
self.pre_change_days = None # self.init_days # This is the number of baseline observations that we assume to be in-control - user enters or default = 30
|
| 175 |
|
|
|
|
| 176 |
control_limit = normalized_threshold
|
| 177 |
|
| 178 |
DetectionTimes = np.array([], dtype=int)
|
|
|
|
| 187 |
self.AvgDD = np.array([]) # Average Detection Delay
|
| 188 |
|
| 189 |
self.H = control_limit * self.in_std # Threhold
|
| 190 |
+
ref_val = normalized_ref_value * self.in_std # Reference value
|
| 191 |
|
| 192 |
x = np.array(self.data)
|
| 193 |
|
| 194 |
+
# Call compute CUSUM function with x (observatoins), in-control mean (mu) and ref_val (drift or reference value)
|
| 195 |
+
self.S_hi, self.S_lo, cusum = self.compute_cusum(x, self.in_mu, ref_val)
|
| 196 |
+
|
| 197 |
+
# # Check the variations in self.S_hi and self.S_lo to determine whether there was a change in the data
|
| 198 |
+
# S_hi_last_known_zero = np.where(self.S_hi == 0)[
|
| 199 |
+
# 0
|
| 200 |
+
# ] # Find all the indices where self.S_hi was 0
|
| 201 |
+
# S_hi_start_of_change = (
|
| 202 |
+
# S_hi_last_known_zero[-1] + 1
|
| 203 |
+
# ) # Fetch the last entry where self.S_hi was 0
|
| 204 |
+
|
| 205 |
+
# S_lo_last_known_zero = np.where(self.S_lo == 0)[
|
| 206 |
+
# 0
|
| 207 |
+
# ] # Find all the indices where self.S_lo was 0
|
| 208 |
+
# S_lo_start_of_change = (
|
| 209 |
+
# S_lo_last_known_zero[-1] + 1
|
| 210 |
+
# ) # Fetch the last entry where self.S_lo was 0
|
| 211 |
+
|
| 212 |
+
# # Display the print messages in the UI
|
| 213 |
+
# if (S_lo_start_of_change < S_hi_start_of_change) and (
|
| 214 |
+
# self.S_lo[S_lo_start_of_change + 10] > self.H
|
| 215 |
+
# ): # check if the changes in the next 10 observations exceed the threshold
|
| 216 |
+
# print(
|
| 217 |
+
# f"Detected change point with respect to S_lo is: {S_lo_start_of_change}"
|
| 218 |
+
# ) # Use this change-point to generate histograms
|
| 219 |
+
# self.pre_change_days = S_lo_start_of_change
|
| 220 |
+
|
| 221 |
+
# elif (S_hi_start_of_change < S_lo_start_of_change) and (
|
| 222 |
+
# self.S_hi[S_hi_start_of_change + 10] > self.H
|
| 223 |
+
# ):
|
| 224 |
+
# print(f"Detected change point with respect to S_hi is: {S_hi_start_of_change}")
|
| 225 |
+
# self.pre_change_days = S_hi_start_of_change
|
| 226 |
+
# else:
|
| 227 |
+
# print(f"No change")
|
| 228 |
+
|
| 229 |
+
# Find first occurrence where threshold is exceeded
|
| 230 |
+
S_hi_exceeds = np.where(self.S_hi > self.H)[0]
|
| 231 |
+
S_lo_exceeds = np.where(self.S_lo > self.H)[0]
|
| 232 |
+
|
| 233 |
+
# Take whichever comes first
|
| 234 |
+
if len(S_hi_exceeds) > 0 and len(S_lo_exceeds) > 0:
|
| 235 |
+
if S_hi_exceeds[0] < S_lo_exceeds[0]:
|
| 236 |
+
self.pre_change_days = S_hi_exceeds[0]
|
| 237 |
+
print(f"(both exceed threshold) Detected upward shift at: {S_hi_exceeds[0]}")
|
| 238 |
+
else:
|
| 239 |
+
self.pre_change_days = S_lo_exceeds[0]
|
| 240 |
+
print(f"(both exceed threshold) Detected downward shift at: {S_lo_exceeds[0]}")
|
| 241 |
+
elif len(S_hi_exceeds) > 0:
|
| 242 |
+
self.pre_change_days = S_hi_exceeds[0]
|
| 243 |
+
print(f"Detected upward shift at: {S_hi_exceeds[0]}")
|
| 244 |
+
elif len(S_lo_exceeds) > 0:
|
| 245 |
+
self.pre_change_days = S_lo_exceeds[0]
|
| 246 |
+
print(f"Detected downward shift at: {S_lo_exceeds[0]}")
|
| 247 |
else:
|
| 248 |
+
print("No change detected")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 249 |
|
| 250 |
def plot_input_metric_plotly_raw(self) -> go.Figure:
|
| 251 |
"""
|
|
|
|
| 348 |
fig = go.Figure()
|
| 349 |
|
| 350 |
font_size_title = 20
|
| 351 |
+
font_size_legend = 14
|
| 352 |
|
| 353 |
# add subplots
|
| 354 |
fig.add_trace(
|
|
|
|
| 366 |
x=x2,
|
| 367 |
y=y2,
|
| 368 |
mode="markers",
|
| 369 |
+
name=f"""Test data""",
|
| 370 |
marker=dict(color="coral", size=10),
|
| 371 |
opacity=0.4,
|
| 372 |
),
|
|
|
|
| 387 |
x=[min(x2), max(x2)],
|
| 388 |
y=[mean_y2, mean_y2],
|
| 389 |
mode="lines",
|
| 390 |
+
name="Test mean",
|
| 391 |
line=dict(color="coral", dash="dash"),
|
| 392 |
),
|
| 393 |
)
|
|
|
|
| 398 |
x=[self.pre_change_days, self.pre_change_days],
|
| 399 |
y=[np.min(self.data), np.max(self.data)],
|
| 400 |
mode="lines",
|
| 401 |
+
name="Detected change point",
|
| 402 |
line=dict(color="grey", dash="dash"),
|
|
|
|
| 403 |
),
|
| 404 |
)
|
| 405 |
|
| 406 |
fig.update_layout(
|
| 407 |
title={
|
| 408 |
+
"text": "AI model metric versus time",
|
| 409 |
"font": {"size": font_size_title, "weight": "bold"},
|
| 410 |
+
"x": 0.5,
|
| 411 |
+
"xanchor": "center",
|
| 412 |
+
"y": 0.98,
|
| 413 |
+
"yanchor": "top"
|
| 414 |
},
|
| 415 |
xaxis_title={
|
| 416 |
"text": "Time",
|
|
|
|
| 421 |
"font": {"size": font_size_legend, "weight": "bold"},
|
| 422 |
},
|
| 423 |
xaxis=dict(dtick=20),
|
| 424 |
+
autosize=True,
|
| 425 |
+
margin=dict(l=60, r=50, t=50, b=50),
|
| 426 |
)
|
| 427 |
|
| 428 |
fig.update_layout(plot_bgcolor=self.config["color"]["blue_005"])
|
| 429 |
|
| 430 |
fig.update_layout(
|
| 431 |
+
legend=dict(
|
| 432 |
+
orientation="h",
|
| 433 |
+
yanchor="bottom",
|
| 434 |
+
y=1.02,
|
| 435 |
+
xanchor="center",
|
| 436 |
+
x=0.5,
|
| 437 |
+
font=dict(size=14),
|
| 438 |
+
bgcolor="rgba(255, 255, 255, 0.9)",
|
| 439 |
+
bordercolor="rgba(0, 124, 186, 0.5)",
|
| 440 |
+
borderwidth=1
|
| 441 |
+
)
|
| 442 |
)
|
| 443 |
|
| 444 |
if self.config["control"]["save_figure"] == "true":
|
|
|
|
| 477 |
fig = go.Figure()
|
| 478 |
|
| 479 |
font_size_title = 20
|
| 480 |
+
font_size_legend = 14
|
| 481 |
|
| 482 |
fig.add_trace(
|
| 483 |
go.Scatter(
|
|
|
|
| 513 |
title={
|
| 514 |
"text": "CUSUM Chart",
|
| 515 |
"font": {"size": font_size_title, "weight": "bold"},
|
| 516 |
+
"x": 0.5,
|
| 517 |
+
"xanchor": "center",
|
| 518 |
+
"y": 0.98,
|
| 519 |
+
"yanchor": "top"
|
| 520 |
},
|
| 521 |
xaxis_title={
|
| 522 |
"text": "Time",
|
|
|
|
| 527 |
"font": {"size": font_size_legend, "weight": "bold"},
|
| 528 |
},
|
| 529 |
xaxis=dict(dtick=20),
|
| 530 |
+
autosize=True,
|
| 531 |
+
margin=dict(l=60, r=50, t=50, b=50), # Increased bottom margin for legend
|
| 532 |
)
|
| 533 |
|
|
|
|
| 534 |
fig.add_shape(
|
| 535 |
type="rect",
|
| 536 |
x0=0, x1=self.pre_change_days,
|
|
|
|
| 554 |
)
|
| 555 |
|
| 556 |
fig.update_layout(
|
| 557 |
+
legend=dict(
|
| 558 |
+
orientation="h",
|
| 559 |
+
yanchor="bottom",
|
| 560 |
+
y=1.02,
|
| 561 |
+
xanchor="center",
|
| 562 |
+
x=0.5,
|
| 563 |
+
font=dict(size=14),
|
| 564 |
+
bgcolor="rgba(255, 255, 255, 0.9)",
|
| 565 |
+
bordercolor="rgba(0, 124, 186, 0.5)",
|
| 566 |
+
borderwidth=1
|
| 567 |
+
)
|
| 568 |
)
|
| 569 |
|
| 570 |
if self.config["control"]["save_figure"] == "true":
|