Add RCA group classification, tags, and hints to multi-RAT top anomalies table with helper functions for slug generation, numeric formatting, and metadata construction based on KPI groups, status, baseline/recent values, and multi-RAT impact
Browse files
process_kpi/kpi_health_check/multi_rat.py
CHANGED
|
@@ -1,5 +1,74 @@
|
|
| 1 |
import pandas as pd
|
| 2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
def compute_multirat_views(
|
| 5 |
status_df: pd.DataFrame,
|
|
@@ -142,6 +211,22 @@ def compute_multirat_views(
|
|
| 142 |
ascending=[False, False, False, False],
|
| 143 |
)
|
| 144 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
top_cols = [
|
| 146 |
c
|
| 147 |
for c in [
|
|
@@ -151,7 +236,11 @@ def compute_multirat_views(
|
|
| 151 |
"site_code",
|
| 152 |
"City",
|
| 153 |
"KPI",
|
|
|
|
|
|
|
|
|
|
| 154 |
"status",
|
|
|
|
| 155 |
"baseline_median",
|
| 156 |
"recent_median",
|
| 157 |
"bad_days_recent",
|
|
|
|
| 1 |
import pandas as pd
|
| 2 |
|
| 3 |
+
from process_kpi.kpi_health_check.kpi_groups import classify_kpi
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def _slug(value: str) -> str:
|
| 7 |
+
s = str(value or "").strip().lower()
|
| 8 |
+
out = []
|
| 9 |
+
prev_underscore = False
|
| 10 |
+
for ch in s:
|
| 11 |
+
if ch.isalnum():
|
| 12 |
+
out.append(ch)
|
| 13 |
+
prev_underscore = False
|
| 14 |
+
else:
|
| 15 |
+
if not prev_underscore:
|
| 16 |
+
out.append("_")
|
| 17 |
+
prev_underscore = True
|
| 18 |
+
return "".join(out).strip("_")
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def _fmt_num(value) -> str:
|
| 22 |
+
try:
|
| 23 |
+
v = pd.to_numeric(value, errors="coerce")
|
| 24 |
+
if pd.isna(v):
|
| 25 |
+
return "NA"
|
| 26 |
+
return f"{float(v):.3g}"
|
| 27 |
+
except Exception: # noqa: BLE001
|
| 28 |
+
return "NA"
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def _build_rca_tags(row: dict) -> str:
|
| 32 |
+
tags: list[str] = []
|
| 33 |
+
group = str(row.get("rca_group") or "Other")
|
| 34 |
+
tags.append(_slug(group) if group else "other")
|
| 35 |
+
|
| 36 |
+
status = str(row.get("status") or "").strip().upper()
|
| 37 |
+
if status == "PERSISTENT_DEGRADED":
|
| 38 |
+
tags.append("persistent")
|
| 39 |
+
elif status == "DEGRADED":
|
| 40 |
+
tags.append("degraded")
|
| 41 |
+
elif status:
|
| 42 |
+
tags.append(_slug(status))
|
| 43 |
+
|
| 44 |
+
baseline = row.get("baseline_median")
|
| 45 |
+
recent = row.get("recent_median")
|
| 46 |
+
if pd.isna(pd.to_numeric(baseline, errors="coerce")):
|
| 47 |
+
tags.append("missing_baseline")
|
| 48 |
+
if pd.isna(pd.to_numeric(recent, errors="coerce")):
|
| 49 |
+
tags.append("missing_recent")
|
| 50 |
+
|
| 51 |
+
impact = pd.to_numeric(row.get("impacted_rats"), errors="coerce")
|
| 52 |
+
if pd.notna(impact) and float(impact) >= 2:
|
| 53 |
+
tags.append("multi_rat")
|
| 54 |
+
|
| 55 |
+
return ",".join([t for t in tags if t])
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def _build_rca_hint(row: dict) -> str:
|
| 59 |
+
group = str(row.get("rca_group") or "Other")
|
| 60 |
+
kpi = str(row.get("KPI") or "")
|
| 61 |
+
rat = str(row.get("RAT") or "")
|
| 62 |
+
status = str(row.get("status") or "")
|
| 63 |
+
baseline_s = _fmt_num(row.get("baseline_median"))
|
| 64 |
+
recent_s = _fmt_num(row.get("recent_median"))
|
| 65 |
+
streak = int(pd.to_numeric(row.get("max_streak_recent"), errors="coerce") or 0)
|
| 66 |
+
bad = int(pd.to_numeric(row.get("bad_days_recent"), errors="coerce") or 0)
|
| 67 |
+
return (
|
| 68 |
+
f"{group} | {rat} | {kpi} | {status} | "
|
| 69 |
+
f"baseline={baseline_s} recent={recent_s} | streak={streak}d bad={bad}d"
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
|
| 73 |
def compute_multirat_views(
|
| 74 |
status_df: pd.DataFrame,
|
|
|
|
| 211 |
ascending=[False, False, False, False],
|
| 212 |
)
|
| 213 |
|
| 214 |
+
try:
|
| 215 |
+
top = pd.merge(top, impacted, on="site_code", how="left")
|
| 216 |
+
top["impacted_rats"] = (
|
| 217 |
+
pd.to_numeric(top["impacted_rats"], errors="coerce").fillna(0).astype(int)
|
| 218 |
+
)
|
| 219 |
+
except Exception: # noqa: BLE001
|
| 220 |
+
top["impacted_rats"] = 0
|
| 221 |
+
|
| 222 |
+
top["rca_group"] = top["KPI"].apply(classify_kpi)
|
| 223 |
+
try:
|
| 224 |
+
top["rca_hint"] = top.apply(lambda r: _build_rca_hint(r.to_dict()), axis=1)
|
| 225 |
+
top["rca_tags"] = top.apply(lambda r: _build_rca_tags(r.to_dict()), axis=1)
|
| 226 |
+
except Exception: # noqa: BLE001
|
| 227 |
+
top["rca_hint"] = ""
|
| 228 |
+
top["rca_tags"] = ""
|
| 229 |
+
|
| 230 |
top_cols = [
|
| 231 |
c
|
| 232 |
for c in [
|
|
|
|
| 236 |
"site_code",
|
| 237 |
"City",
|
| 238 |
"KPI",
|
| 239 |
+
"rca_group",
|
| 240 |
+
"rca_tags",
|
| 241 |
+
"rca_hint",
|
| 242 |
"status",
|
| 243 |
+
"impacted_rats",
|
| 244 |
"baseline_median",
|
| 245 |
"recent_median",
|
| 246 |
"bad_days_recent",
|