DavMelchi commited on
Commit
e87272d
·
1 Parent(s): 3dc462c

Add RCA group classification, tags, and hints to multi-RAT top anomalies table with helper functions for slug generation, numeric formatting, and metadata construction based on KPI groups, status, baseline/recent values, and multi-RAT impact

Browse files
process_kpi/kpi_health_check/multi_rat.py CHANGED
@@ -1,5 +1,74 @@
1
  import pandas as pd
2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  def compute_multirat_views(
5
  status_df: pd.DataFrame,
@@ -142,6 +211,22 @@ def compute_multirat_views(
142
  ascending=[False, False, False, False],
143
  )
144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  top_cols = [
146
  c
147
  for c in [
@@ -151,7 +236,11 @@ def compute_multirat_views(
151
  "site_code",
152
  "City",
153
  "KPI",
 
 
 
154
  "status",
 
155
  "baseline_median",
156
  "recent_median",
157
  "bad_days_recent",
 
1
  import pandas as pd
2
 
3
+ from process_kpi.kpi_health_check.kpi_groups import classify_kpi
4
+
5
+
6
+ def _slug(value: str) -> str:
7
+ s = str(value or "").strip().lower()
8
+ out = []
9
+ prev_underscore = False
10
+ for ch in s:
11
+ if ch.isalnum():
12
+ out.append(ch)
13
+ prev_underscore = False
14
+ else:
15
+ if not prev_underscore:
16
+ out.append("_")
17
+ prev_underscore = True
18
+ return "".join(out).strip("_")
19
+
20
+
21
+ def _fmt_num(value) -> str:
22
+ try:
23
+ v = pd.to_numeric(value, errors="coerce")
24
+ if pd.isna(v):
25
+ return "NA"
26
+ return f"{float(v):.3g}"
27
+ except Exception: # noqa: BLE001
28
+ return "NA"
29
+
30
+
31
+ def _build_rca_tags(row: dict) -> str:
32
+ tags: list[str] = []
33
+ group = str(row.get("rca_group") or "Other")
34
+ tags.append(_slug(group) if group else "other")
35
+
36
+ status = str(row.get("status") or "").strip().upper()
37
+ if status == "PERSISTENT_DEGRADED":
38
+ tags.append("persistent")
39
+ elif status == "DEGRADED":
40
+ tags.append("degraded")
41
+ elif status:
42
+ tags.append(_slug(status))
43
+
44
+ baseline = row.get("baseline_median")
45
+ recent = row.get("recent_median")
46
+ if pd.isna(pd.to_numeric(baseline, errors="coerce")):
47
+ tags.append("missing_baseline")
48
+ if pd.isna(pd.to_numeric(recent, errors="coerce")):
49
+ tags.append("missing_recent")
50
+
51
+ impact = pd.to_numeric(row.get("impacted_rats"), errors="coerce")
52
+ if pd.notna(impact) and float(impact) >= 2:
53
+ tags.append("multi_rat")
54
+
55
+ return ",".join([t for t in tags if t])
56
+
57
+
58
+ def _build_rca_hint(row: dict) -> str:
59
+ group = str(row.get("rca_group") or "Other")
60
+ kpi = str(row.get("KPI") or "")
61
+ rat = str(row.get("RAT") or "")
62
+ status = str(row.get("status") or "")
63
+ baseline_s = _fmt_num(row.get("baseline_median"))
64
+ recent_s = _fmt_num(row.get("recent_median"))
65
+ streak = int(pd.to_numeric(row.get("max_streak_recent"), errors="coerce") or 0)
66
+ bad = int(pd.to_numeric(row.get("bad_days_recent"), errors="coerce") or 0)
67
+ return (
68
+ f"{group} | {rat} | {kpi} | {status} | "
69
+ f"baseline={baseline_s} recent={recent_s} | streak={streak}d bad={bad}d"
70
+ )
71
+
72
 
73
  def compute_multirat_views(
74
  status_df: pd.DataFrame,
 
211
  ascending=[False, False, False, False],
212
  )
213
 
214
+ try:
215
+ top = pd.merge(top, impacted, on="site_code", how="left")
216
+ top["impacted_rats"] = (
217
+ pd.to_numeric(top["impacted_rats"], errors="coerce").fillna(0).astype(int)
218
+ )
219
+ except Exception: # noqa: BLE001
220
+ top["impacted_rats"] = 0
221
+
222
+ top["rca_group"] = top["KPI"].apply(classify_kpi)
223
+ try:
224
+ top["rca_hint"] = top.apply(lambda r: _build_rca_hint(r.to_dict()), axis=1)
225
+ top["rca_tags"] = top.apply(lambda r: _build_rca_tags(r.to_dict()), axis=1)
226
+ except Exception: # noqa: BLE001
227
+ top["rca_hint"] = ""
228
+ top["rca_tags"] = ""
229
+
230
  top_cols = [
231
  c
232
  for c in [
 
236
  "site_code",
237
  "City",
238
  "KPI",
239
+ "rca_group",
240
+ "rca_tags",
241
+ "rca_hint",
242
  "status",
243
+ "impacted_rats",
244
  "baseline_median",
245
  "recent_median",
246
  "bad_days_recent",