DavMelchi commited on
Commit
4ef0ce6
·
1 Parent(s): 97cf335

Add KPI Health Check Panel v2 with multi-RAT analysis, interactive drilldown plots, complaint site tracking, snapshot comparison, preset/profile management, geographic map visualization, correlation analysis, and Excel export with alert pack generation including 3640 lines of UI components, caching logic, and health check engine integration

Browse files
panel_app/kpi_health_check_panel_v2.py ADDED
The diff for this file is too large to render. See raw diff
 
panel_app/panel_portal.py CHANGED
@@ -10,7 +10,11 @@ if ROOT_DIR not in sys.path:
10
  pn.extension("plotly", "tabulator")
11
 
12
  # Import pages (kept as modules, not nested templates)
13
- from panel_app import kpi_health_check_panel, trafic_analysis_panel
 
 
 
 
14
 
15
  PAGES = {
16
  "📊 Global Traffic Analysis": {
@@ -21,6 +25,10 @@ PAGES = {
21
  "get_components": kpi_health_check_panel.get_page_components,
22
  "description": "Détection KPI dégradés/persistants/résolus + drill-down + export.",
23
  },
 
 
 
 
24
  }
25
 
26
  HOME_PAGE = "🏠 Gallery"
 
10
  pn.extension("plotly", "tabulator")
11
 
12
  # Import pages (kept as modules, not nested templates)
13
+ from panel_app import (
14
+ kpi_health_check_panel,
15
+ kpi_health_check_panel_v2,
16
+ trafic_analysis_panel,
17
+ )
18
 
19
  PAGES = {
20
  "📊 Global Traffic Analysis": {
 
25
  "get_components": kpi_health_check_panel.get_page_components,
26
  "description": "Détection KPI dégradés/persistants/résolus + drill-down + export.",
27
  },
28
+ "⚡ KPI Health Check (V2)": {
29
+ "get_components": kpi_health_check_panel_v2.get_page_components,
30
+ "description": "Version optimisée (cache disque + moteur health-check vectorisé).",
31
+ },
32
  }
33
 
34
  HOME_PAGE = "🏠 Gallery"
panel_app/panel_v2_backend.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import hashlib
2
+ import os
3
+ from dataclasses import dataclass
4
+
5
+ import pandas as pd
6
+
7
+
8
+ def _project_root() -> str:
9
+ return os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
10
+
11
+
12
+ def cache_root() -> str:
13
+ root = _project_root()
14
+ path = os.path.join(root, ".cache", "panel_app_v2")
15
+ os.makedirs(path, exist_ok=True)
16
+ return path
17
+
18
+
19
+ def _safe_str(value: object) -> str:
20
+ try:
21
+ return str(value or "")
22
+ except Exception:
23
+ return ""
24
+
25
+
26
+ def fingerprint_bytes(file_bytes: bytes, filename: str = "", extra: str = "") -> str:
27
+ h = hashlib.blake2b(digest_size=16)
28
+ if file_bytes:
29
+ h.update(file_bytes)
30
+ name = _safe_str(filename)
31
+ if name:
32
+ h.update(name.encode("utf-8", errors="ignore"))
33
+ ex = _safe_str(extra)
34
+ if ex:
35
+ h.update(ex.encode("utf-8", errors="ignore"))
36
+ return h.hexdigest()
37
+
38
+
39
+ def _has_pyarrow() -> bool:
40
+ try:
41
+ import pyarrow # noqa: F401
42
+
43
+ return True
44
+ except Exception:
45
+ return False
46
+
47
+
48
+ def _has_duckdb() -> bool:
49
+ try:
50
+ import duckdb # noqa: F401
51
+
52
+ return True
53
+ except Exception:
54
+ return False
55
+
56
+
57
+ def write_table(df: pd.DataFrame, path_no_ext: str) -> str:
58
+ if _has_pyarrow():
59
+ path = path_no_ext + ".parquet"
60
+ df.to_parquet(path, index=False)
61
+ return path
62
+ path = path_no_ext + ".pkl"
63
+ df.to_pickle(path)
64
+ return path
65
+
66
+
67
+ def read_table(path: str) -> pd.DataFrame:
68
+ if not path or not os.path.exists(path):
69
+ return pd.DataFrame()
70
+ p = str(path).lower()
71
+ if p.endswith(".parquet"):
72
+ return pd.read_parquet(path)
73
+ return pd.read_pickle(path)
74
+
75
+
76
+ @dataclass(frozen=True)
77
+ class CachedDataset:
78
+ dataset_id: str
79
+ rat: str
80
+ granularity: str
81
+
82
+ def base_dir(self) -> str:
83
+ return os.path.join(
84
+ cache_root(),
85
+ self.dataset_id,
86
+ f"rat={self.rat}",
87
+ f"granularity={self.granularity}",
88
+ )
89
+
90
+ def daily_table_base(self) -> str:
91
+ return os.path.join(self.base_dir(), "daily")
92
+
93
+ def meta_path(self) -> str:
94
+ return os.path.join(self.base_dir(), "meta.json")
95
+
96
+
97
+ def try_load_cached_daily(dataset: CachedDataset) -> pd.DataFrame | None:
98
+ base = dataset.daily_table_base()
99
+ candidates = [base + ".parquet", base + ".pkl"]
100
+ for p in candidates:
101
+ if os.path.exists(p):
102
+ try:
103
+ df = read_table(p)
104
+ return df if isinstance(df, pd.DataFrame) else pd.DataFrame()
105
+ except Exception:
106
+ return pd.DataFrame()
107
+ return None
108
+
109
+
110
+ def save_cached_daily(dataset: CachedDataset, daily: pd.DataFrame) -> str:
111
+ os.makedirs(dataset.base_dir(), exist_ok=True)
112
+ return write_table(daily, dataset.daily_table_base())
113
+
114
+
115
+ def ensure_duckdb_available() -> None:
116
+ if not _has_duckdb():
117
+ raise RuntimeError(
118
+ "DuckDB is not installed. Install it to enable the V2 SQL engine: python -m pip install duckdb"
119
+ )
process_kpi/kpi_health_check/engine_v2.py ADDED
@@ -0,0 +1,315 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from datetime import timedelta
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+
8
+ from process_kpi.kpi_health_check.engine import window_bounds_period
9
+
10
+
11
+ def _to_datetime_series(s: pd.Series) -> pd.Series:
12
+ try:
13
+ return pd.to_datetime(s, errors="coerce")
14
+ except Exception:
15
+ return pd.to_datetime(pd.Series([], dtype="datetime64[ns]"), errors="coerce")
16
+
17
+
18
+ def _vector_is_bad(
19
+ value: pd.Series,
20
+ baseline: pd.Series,
21
+ direction: str,
22
+ rel_threshold_pct: float,
23
+ sla: float | None,
24
+ ) -> pd.Series:
25
+ v = pd.to_numeric(value, errors="coerce")
26
+ b = pd.to_numeric(baseline, errors="coerce")
27
+
28
+ bad = pd.Series(False, index=v.index)
29
+
30
+ if sla is not None and not (isinstance(sla, float) and np.isnan(sla)):
31
+ if str(direction) == "higher_is_better":
32
+ bad = bad | (v < float(sla))
33
+ else:
34
+ bad = bad | (v > float(sla))
35
+
36
+ thr = float(rel_threshold_pct) / 100.0
37
+ has_b = b.notna()
38
+ if bool(has_b.any()):
39
+ if str(direction) == "higher_is_better":
40
+ bad = bad | (v < (b - b.abs() * thr))
41
+ else:
42
+ bad = bad | (v > (b + b.abs() * thr))
43
+
44
+ bad = bad & v.notna()
45
+ return bad
46
+
47
+
48
+ def evaluate_health_check(
49
+ daily: pd.DataFrame,
50
+ rat: str,
51
+ rules_df: pd.DataFrame,
52
+ baseline_days_n: int,
53
+ recent_days_n: int,
54
+ rel_threshold_pct: float,
55
+ min_consecutive_days: int,
56
+ granularity: str = "Daily",
57
+ ) -> tuple[pd.DataFrame, pd.DataFrame]:
58
+ if daily is None or daily.empty:
59
+ return pd.DataFrame(), pd.DataFrame()
60
+
61
+ g = str(granularity or "Daily").strip().lower()
62
+ is_hourly = g.startswith("hour") or g.startswith("h")
63
+ time_col = (
64
+ "period_start"
65
+ if (is_hourly and "period_start" in daily.columns)
66
+ else "date_only"
67
+ )
68
+
69
+ step = timedelta(hours=1) if is_hourly else timedelta(days=1)
70
+ baseline_periods = int(baseline_days_n) * 24 if is_hourly else int(baseline_days_n)
71
+ recent_periods = int(recent_days_n) * 24 if is_hourly else int(recent_days_n)
72
+ min_periods = (
73
+ int(min_consecutive_days) * 24 if is_hourly else int(min_consecutive_days)
74
+ )
75
+
76
+ t_all = _to_datetime_series(daily[time_col])
77
+ end_dt = t_all.max()
78
+ if pd.isna(end_dt):
79
+ return pd.DataFrame(), pd.DataFrame()
80
+
81
+ end_dt = pd.Timestamp(end_dt)
82
+ if is_hourly:
83
+ end_dt = end_dt.floor("h")
84
+
85
+ recent_start_dt, recent_end_dt = window_bounds_period(end_dt, recent_periods, step)
86
+ baseline_end_dt = recent_start_dt - step
87
+ baseline_start_dt, _ = window_bounds_period(baseline_end_dt, baseline_periods, step)
88
+
89
+ rat_rules = (
90
+ rules_df[rules_df["RAT"] == rat].copy()
91
+ if isinstance(rules_df, pd.DataFrame)
92
+ else pd.DataFrame()
93
+ )
94
+ if rat_rules.empty or "KPI" not in rat_rules.columns:
95
+ return pd.DataFrame(), pd.DataFrame()
96
+
97
+ kpi_cols = [k for k in rat_rules["KPI"].tolist() if k in daily.columns]
98
+ if not kpi_cols:
99
+ return pd.DataFrame(), pd.DataFrame()
100
+
101
+ base_cols = ["site_code", time_col]
102
+ if "City" in daily.columns:
103
+ base_cols.append("City")
104
+ base = daily[base_cols + kpi_cols].copy()
105
+ base["site_code"] = pd.to_numeric(base["site_code"], errors="coerce")
106
+ base = base.dropna(subset=["site_code"]).copy()
107
+ base["site_code"] = base["site_code"].astype(int)
108
+
109
+ base_t = _to_datetime_series(base[time_col])
110
+ base["_t"] = base_t
111
+ base = base.dropna(subset=["_t"]).copy()
112
+
113
+ baseline_mask = (base["_t"] >= pd.to_datetime(baseline_start_dt)) & (
114
+ base["_t"] <= pd.to_datetime(baseline_end_dt)
115
+ )
116
+ recent_mask = (base["_t"] >= pd.to_datetime(recent_start_dt)) & (
117
+ base["_t"] <= pd.to_datetime(recent_end_dt)
118
+ )
119
+
120
+ counts = base.groupby("site_code")[kpi_cols].count()
121
+ all_sites = counts.index
122
+
123
+ if "City" in base.columns:
124
+ city_map = (
125
+ base[["site_code", "City"]]
126
+ .dropna(subset=["City"])
127
+ .drop_duplicates("site_code")
128
+ .set_index("site_code")["City"]
129
+ )
130
+ city = city_map.reindex(all_sites)
131
+ else:
132
+ city = pd.Series([None] * len(all_sites), index=all_sites)
133
+
134
+ baseline_subset = base.loc[baseline_mask, ["site_code"] + kpi_cols]
135
+ recent_subset = base.loc[recent_mask, ["site_code", "_t"] + kpi_cols]
136
+
137
+ baseline_medians = (
138
+ baseline_subset.groupby("site_code")[kpi_cols].median(numeric_only=True)
139
+ if not baseline_subset.empty
140
+ else pd.DataFrame(index=all_sites)
141
+ )
142
+ recent_medians = (
143
+ recent_subset.groupby("site_code")[kpi_cols].median(numeric_only=True)
144
+ if not recent_subset.empty
145
+ else pd.DataFrame(index=all_sites)
146
+ )
147
+
148
+ recent_sorted = (
149
+ recent_subset.sort_values(["site_code", "_t"])
150
+ if not recent_subset.empty
151
+ else recent_subset
152
+ )
153
+ gap = recent_sorted.groupby("site_code")["_t"].diff()
154
+ gap_ok = (gap == step).fillna(False)
155
+
156
+ out_frames: list[pd.DataFrame] = []
157
+
158
+ for _, rr in rat_rules.iterrows():
159
+ kpi = str(rr.get("KPI"))
160
+ if not kpi or kpi not in kpi_cols:
161
+ continue
162
+
163
+ direction = str(rr.get("direction", "higher_is_better"))
164
+ policy = str(rr.get("policy", "enforce") or "enforce").strip().lower()
165
+ sla_raw = rr.get("sla", np.nan)
166
+ try:
167
+ sla_val = float(sla_raw) if pd.notna(sla_raw) else None
168
+ except Exception:
169
+ sla_val = None
170
+
171
+ sla_eval = None if policy == "notify" else sla_val
172
+
173
+ cnt = counts[kpi].reindex(all_sites).fillna(0).astype(int)
174
+ has_any = cnt > 0
175
+
176
+ baseline = (
177
+ baseline_medians[kpi].reindex(all_sites)
178
+ if kpi in baseline_medians.columns
179
+ else pd.Series([np.nan] * len(all_sites), index=all_sites)
180
+ )
181
+ recent = (
182
+ recent_medians[kpi].reindex(all_sites)
183
+ if kpi in recent_medians.columns
184
+ else pd.Series([np.nan] * len(all_sites), index=all_sites)
185
+ )
186
+
187
+ if not recent_sorted.empty and kpi in recent_sorted.columns:
188
+ v_recent = pd.to_numeric(recent_sorted[kpi], errors="coerce")
189
+ b_row = recent_sorted["site_code"].map(
190
+ pd.to_numeric(
191
+ baseline_medians.get(kpi, pd.Series(dtype=float)), errors="coerce"
192
+ )
193
+ )
194
+ bad_row = _vector_is_bad(
195
+ v_recent, b_row, direction, float(rel_threshold_pct), sla_eval
196
+ )
197
+ bad_row = bad_row & recent_sorted["_t"].notna()
198
+
199
+ start = (~gap_ok) | (~bad_row) | gap_ok.isna()
200
+ run_id = start.groupby(recent_sorted["site_code"]).cumsum()
201
+
202
+ bad_counts = (
203
+ bad_row.groupby(recent_sorted["site_code"])
204
+ .sum()
205
+ .reindex(all_sites)
206
+ .fillna(0)
207
+ .astype(int)
208
+ )
209
+ streaks = (
210
+ bad_row.groupby([recent_sorted["site_code"], run_id])
211
+ .sum()
212
+ .groupby(level=0)
213
+ .max()
214
+ .reindex(all_sites)
215
+ .fillna(0)
216
+ .astype(int)
217
+ )
218
+
219
+ tmp_last = (
220
+ recent_sorted[["site_code", "_t", kpi]]
221
+ .dropna(subset=[kpi])
222
+ .sort_values(["site_code", "_t"])
223
+ )
224
+ if not tmp_last.empty:
225
+ last_vals = tmp_last.groupby("site_code")[kpi].tail(1)
226
+ last_map = pd.Series(
227
+ last_vals.values,
228
+ index=tmp_last.groupby("site_code")
229
+ .tail(1)["site_code"]
230
+ .astype(int)
231
+ .values,
232
+ )
233
+ last = last_map.reindex(all_sites)
234
+ else:
235
+ last = pd.Series([np.nan] * len(all_sites), index=all_sites)
236
+ else:
237
+ bad_counts = pd.Series([0] * len(all_sites), index=all_sites)
238
+ streaks = pd.Series([0] * len(all_sites), index=all_sites)
239
+ last = pd.Series([np.nan] * len(all_sites), index=all_sites)
240
+
241
+ is_bad_recent = _vector_is_bad(
242
+ recent, baseline, direction, float(rel_threshold_pct), sla_eval
243
+ )
244
+ is_bad_current = _vector_is_bad(
245
+ last, baseline, direction, float(rel_threshold_pct), sla_eval
246
+ )
247
+ had_bad_recent = (bad_counts > 0) | is_bad_recent
248
+
249
+ persistent = streaks >= int(min_periods)
250
+
251
+ status = pd.Series("OK", index=all_sites)
252
+ status = status.where(has_any, "NO_DATA")
253
+
254
+ if policy == "notify":
255
+ status = status.where(~has_any, "NO_DATA")
256
+ status = status.where(~(has_any & is_bad_current), "NOTIFY")
257
+ status = status.where(
258
+ ~(has_any & (~is_bad_current) & had_bad_recent), "NOTIFY_RESOLVED"
259
+ )
260
+ else:
261
+ status = status.where(
262
+ ~(has_any & is_bad_current & persistent), "PERSISTENT_DEGRADED"
263
+ )
264
+ status = status.where(
265
+ ~(has_any & is_bad_current & (~persistent)), "DEGRADED"
266
+ )
267
+ status = status.where(
268
+ ~(has_any & (~is_bad_current) & had_bad_recent), "RESOLVED"
269
+ )
270
+
271
+ frame = pd.DataFrame(
272
+ {
273
+ "RAT": rat,
274
+ "site_code": all_sites.astype(int),
275
+ "City": city.values,
276
+ "KPI": kpi,
277
+ "direction": direction,
278
+ "sla": sla_val,
279
+ "policy": policy,
280
+ "baseline_median": baseline.values,
281
+ "recent_median": recent.values,
282
+ "bad_days_recent": bad_counts.values,
283
+ "max_streak_recent": streaks.values,
284
+ "status": status.values,
285
+ }
286
+ )
287
+ out_frames.append(frame)
288
+
289
+ if not out_frames:
290
+ return pd.DataFrame(), pd.DataFrame()
291
+
292
+ status_df = pd.concat(out_frames, ignore_index=True)
293
+
294
+ summary = (
295
+ status_df.groupby("site_code", as_index=False)
296
+ .agg(
297
+ RAT=("RAT", "first"),
298
+ City=("City", "first"),
299
+ degraded_kpis=(
300
+ "status",
301
+ lambda s: int(s.isin(["DEGRADED", "PERSISTENT_DEGRADED"]).sum()),
302
+ ),
303
+ persistent_kpis=(
304
+ "status",
305
+ lambda s: int((s == "PERSISTENT_DEGRADED").sum()),
306
+ ),
307
+ resolved_kpis=("status", lambda s: int((s == "RESOLVED").sum())),
308
+ )
309
+ .sort_values(
310
+ by=["degraded_kpis", "persistent_kpis", "resolved_kpis"],
311
+ ascending=[False, False, False],
312
+ )
313
+ )
314
+
315
+ return status_df, summary
utils/utils_vars.py CHANGED
@@ -111,6 +111,7 @@ class UtilsVars:
111
  406283: "MBSKTL01",
112
  406284: "MBSSEG01",
113
  406308: "MBSSK0S1",
 
114
  }
115
  final_lte_database = ""
116
  final_gsm_database = ""
 
111
  406283: "MBSKTL01",
112
  406284: "MBSSEG01",
113
  406308: "MBSSK0S1",
114
+ 406309: "ASBSCMSC3",
115
  }
116
  final_lte_database = ""
117
  final_gsm_database = ""
ziptool.py CHANGED
@@ -2,7 +2,7 @@ import os
2
  import zipfile
3
 
4
  # Path to your folder containing CSV files
5
- folder_path = r"C:\Users\David\Documents\DISK E\BI REPORTING\FLOYD REPORT\2025\CAPACITY\DECEMBRE\20151214"
6
 
7
  # Loop through all files in the folder
8
  for filename in os.listdir(folder_path):
 
2
  import zipfile
3
 
4
  # Path to your folder containing CSV files
5
+ folder_path = r"C:\Users\David\Documents\DISK E\BI REPORTING\FLOYD REPORT\2025\CAPACITY\DECEMBRE\20251229"
6
 
7
  # Loop through all files in the folder
8
  for filename in os.listdir(folder_path):