Spaces:

DavMelchi
/

db_query

Running

App Files Files Community

DavMelchi commited on 22 days ago

Commit

4ef0ce6

1 Parent(s): 97cf335

Add KPI Health Check Panel v2 with multi-RAT analysis, interactive drilldown plots, complaint site tracking, snapshot comparison, preset/profile management, geographic map visualization, correlation analysis, and Excel export with alert pack generation including 3640 lines of UI components, caching logic, and health check engine integration

Browse files

Files changed (6) hide show

panel_app/kpi_health_check_panel_v2.py +0 -0
panel_app/panel_portal.py +9 -1
panel_app/panel_v2_backend.py +119 -0
process_kpi/kpi_health_check/engine_v2.py +315 -0
utils/utils_vars.py +1 -0
ziptool.py +1 -1

panel_app/kpi_health_check_panel_v2.py ADDED Viewed

The diff for this file is too large to render. See raw diff

panel_app/panel_portal.py CHANGED Viewed

@@ -10,7 +10,11 @@ if ROOT_DIR not in sys.path:
 pn.extension("plotly", "tabulator")
 # Import pages (kept as modules, not nested templates)
-from panel_app import kpi_health_check_panel, trafic_analysis_panel
 PAGES = {
     "📊 Global Traffic Analysis": {
@@ -21,6 +25,10 @@ PAGES = {
         "get_components": kpi_health_check_panel.get_page_components,
         "description": "Détection KPI dégradés/persistants/résolus + drill-down + export.",
     },
 }
 HOME_PAGE = "🏠 Gallery"

 pn.extension("plotly", "tabulator")
 # Import pages (kept as modules, not nested templates)
+from panel_app import (
+    kpi_health_check_panel,
+    kpi_health_check_panel_v2,
+    trafic_analysis_panel,
+)
 PAGES = {
     "📊 Global Traffic Analysis": {
         "get_components": kpi_health_check_panel.get_page_components,
         "description": "Détection KPI dégradés/persistants/résolus + drill-down + export.",
     },
+    "⚡ KPI Health Check (V2)": {
+        "get_components": kpi_health_check_panel_v2.get_page_components,
+        "description": "Version optimisée (cache disque + moteur health-check vectorisé).",
+    },
 }
 HOME_PAGE = "🏠 Gallery"

panel_app/panel_v2_backend.py ADDED Viewed

	@@ -0,0 +1,119 @@

+import hashlib
+import os
+from dataclasses import dataclass
+import pandas as pd
+def _project_root() -> str:
+    return os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+def cache_root() -> str:
+    root = _project_root()
+    path = os.path.join(root, ".cache", "panel_app_v2")
+    os.makedirs(path, exist_ok=True)
+    return path
+def _safe_str(value: object) -> str:
+    try:
+        return str(value or "")
+    except Exception:
+        return ""
+def fingerprint_bytes(file_bytes: bytes, filename: str = "", extra: str = "") -> str:
+    h = hashlib.blake2b(digest_size=16)
+    if file_bytes:
+        h.update(file_bytes)
+    name = _safe_str(filename)
+    if name:
+        h.update(name.encode("utf-8", errors="ignore"))
+    ex = _safe_str(extra)
+    if ex:
+        h.update(ex.encode("utf-8", errors="ignore"))
+    return h.hexdigest()
+def _has_pyarrow() -> bool:
+    try:
+        import pyarrow  # noqa: F401
+        return True
+    except Exception:
+        return False
+def _has_duckdb() -> bool:
+    try:
+        import duckdb  # noqa: F401
+        return True
+    except Exception:
+        return False
+def write_table(df: pd.DataFrame, path_no_ext: str) -> str:
+    if _has_pyarrow():
+        path = path_no_ext + ".parquet"
+        df.to_parquet(path, index=False)
+        return path
+    path = path_no_ext + ".pkl"
+    df.to_pickle(path)
+    return path
+def read_table(path: str) -> pd.DataFrame:
+    if not path or not os.path.exists(path):
+        return pd.DataFrame()
+    p = str(path).lower()
+    if p.endswith(".parquet"):
+        return pd.read_parquet(path)
+    return pd.read_pickle(path)
+@dataclass(frozen=True)
+class CachedDataset:
+    dataset_id: str
+    rat: str
+    granularity: str
+    def base_dir(self) -> str:
+        return os.path.join(
+            cache_root(),
+            self.dataset_id,
+            f"rat={self.rat}",
+            f"granularity={self.granularity}",
+        )
+    def daily_table_base(self) -> str:
+        return os.path.join(self.base_dir(), "daily")
+    def meta_path(self) -> str:
+        return os.path.join(self.base_dir(), "meta.json")
+def try_load_cached_daily(dataset: CachedDataset) -> pd.DataFrame | None:
+    base = dataset.daily_table_base()
+    candidates = [base + ".parquet", base + ".pkl"]
+    for p in candidates:
+        if os.path.exists(p):
+            try:
+                df = read_table(p)
+                return df if isinstance(df, pd.DataFrame) else pd.DataFrame()
+            except Exception:
+                return pd.DataFrame()
+    return None
+def save_cached_daily(dataset: CachedDataset, daily: pd.DataFrame) -> str:
+    os.makedirs(dataset.base_dir(), exist_ok=True)
+    return write_table(daily, dataset.daily_table_base())
+def ensure_duckdb_available() -> None:
+    if not _has_duckdb():
+        raise RuntimeError(
+            "DuckDB is not installed. Install it to enable the V2 SQL engine: python -m pip install duckdb"
+        )

process_kpi/kpi_health_check/engine_v2.py ADDED Viewed

	@@ -0,0 +1,315 @@

+from __future__ import annotations
+from datetime import timedelta
+import numpy as np
+import pandas as pd
+from process_kpi.kpi_health_check.engine import window_bounds_period
+def _to_datetime_series(s: pd.Series) -> pd.Series:
+    try:
+        return pd.to_datetime(s, errors="coerce")
+    except Exception:
+        return pd.to_datetime(pd.Series([], dtype="datetime64[ns]"), errors="coerce")
+def _vector_is_bad(
+    value: pd.Series,
+    baseline: pd.Series,
+    direction: str,
+    rel_threshold_pct: float,
+    sla: float | None,
+) -> pd.Series:
+    v = pd.to_numeric(value, errors="coerce")
+    b = pd.to_numeric(baseline, errors="coerce")
+    bad = pd.Series(False, index=v.index)
+    if sla is not None and not (isinstance(sla, float) and np.isnan(sla)):
+        if str(direction) == "higher_is_better":
+            bad = bad | (v < float(sla))
+        else:
+            bad = bad | (v > float(sla))
+    thr = float(rel_threshold_pct) / 100.0
+    has_b = b.notna()
+    if bool(has_b.any()):
+        if str(direction) == "higher_is_better":
+            bad = bad | (v < (b - b.abs() * thr))
+        else:
+            bad = bad | (v > (b + b.abs() * thr))
+    bad = bad & v.notna()
+    return bad
+def evaluate_health_check(
+    daily: pd.DataFrame,
+    rat: str,
+    rules_df: pd.DataFrame,
+    baseline_days_n: int,
+    recent_days_n: int,
+    rel_threshold_pct: float,
+    min_consecutive_days: int,
+    granularity: str = "Daily",
+) -> tuple[pd.DataFrame, pd.DataFrame]:
+    if daily is None or daily.empty:
+        return pd.DataFrame(), pd.DataFrame()
+    g = str(granularity or "Daily").strip().lower()
+    is_hourly = g.startswith("hour") or g.startswith("h")
+    time_col = (
+        "period_start"
+        if (is_hourly and "period_start" in daily.columns)
+        else "date_only"
+    )
+    step = timedelta(hours=1) if is_hourly else timedelta(days=1)
+    baseline_periods = int(baseline_days_n) * 24 if is_hourly else int(baseline_days_n)
+    recent_periods = int(recent_days_n) * 24 if is_hourly else int(recent_days_n)
+    min_periods = (
+        int(min_consecutive_days) * 24 if is_hourly else int(min_consecutive_days)
+    )
+    t_all = _to_datetime_series(daily[time_col])
+    end_dt = t_all.max()
+    if pd.isna(end_dt):
+        return pd.DataFrame(), pd.DataFrame()
+    end_dt = pd.Timestamp(end_dt)
+    if is_hourly:
+        end_dt = end_dt.floor("h")
+    recent_start_dt, recent_end_dt = window_bounds_period(end_dt, recent_periods, step)
+    baseline_end_dt = recent_start_dt - step
+    baseline_start_dt, _ = window_bounds_period(baseline_end_dt, baseline_periods, step)
+    rat_rules = (
+        rules_df[rules_df["RAT"] == rat].copy()
+        if isinstance(rules_df, pd.DataFrame)
+        else pd.DataFrame()
+    )
+    if rat_rules.empty or "KPI" not in rat_rules.columns:
+        return pd.DataFrame(), pd.DataFrame()
+    kpi_cols = [k for k in rat_rules["KPI"].tolist() if k in daily.columns]
+    if not kpi_cols:
+        return pd.DataFrame(), pd.DataFrame()
+    base_cols = ["site_code", time_col]
+    if "City" in daily.columns:
+        base_cols.append("City")
+    base = daily[base_cols + kpi_cols].copy()
+    base["site_code"] = pd.to_numeric(base["site_code"], errors="coerce")
+    base = base.dropna(subset=["site_code"]).copy()
+    base["site_code"] = base["site_code"].astype(int)
+    base_t = _to_datetime_series(base[time_col])
+    base["_t"] = base_t
+    base = base.dropna(subset=["_t"]).copy()
+    baseline_mask = (base["_t"] >= pd.to_datetime(baseline_start_dt)) & (
+        base["_t"] <= pd.to_datetime(baseline_end_dt)
+    )
+    recent_mask = (base["_t"] >= pd.to_datetime(recent_start_dt)) & (
+        base["_t"] <= pd.to_datetime(recent_end_dt)
+    )
+    counts = base.groupby("site_code")[kpi_cols].count()
+    all_sites = counts.index
+    if "City" in base.columns:
+        city_map = (
+            base[["site_code", "City"]]
+            .dropna(subset=["City"])
+            .drop_duplicates("site_code")
+            .set_index("site_code")["City"]
+        )
+        city = city_map.reindex(all_sites)
+    else:
+        city = pd.Series([None] * len(all_sites), index=all_sites)
+    baseline_subset = base.loc[baseline_mask, ["site_code"] + kpi_cols]
+    recent_subset = base.loc[recent_mask, ["site_code", "_t"] + kpi_cols]
+    baseline_medians = (
+        baseline_subset.groupby("site_code")[kpi_cols].median(numeric_only=True)
+        if not baseline_subset.empty
+        else pd.DataFrame(index=all_sites)
+    )
+    recent_medians = (
+        recent_subset.groupby("site_code")[kpi_cols].median(numeric_only=True)
+        if not recent_subset.empty
+        else pd.DataFrame(index=all_sites)
+    )
+    recent_sorted = (
+        recent_subset.sort_values(["site_code", "_t"])
+        if not recent_subset.empty
+        else recent_subset
+    )
+    gap = recent_sorted.groupby("site_code")["_t"].diff()
+    gap_ok = (gap == step).fillna(False)
+    out_frames: list[pd.DataFrame] = []
+    for _, rr in rat_rules.iterrows():
+        kpi = str(rr.get("KPI"))
+        if not kpi or kpi not in kpi_cols:
+            continue
+        direction = str(rr.get("direction", "higher_is_better"))
+        policy = str(rr.get("policy", "enforce") or "enforce").strip().lower()
+        sla_raw = rr.get("sla", np.nan)
+        try:
+            sla_val = float(sla_raw) if pd.notna(sla_raw) else None
+        except Exception:
+            sla_val = None
+        sla_eval = None if policy == "notify" else sla_val
+        cnt = counts[kpi].reindex(all_sites).fillna(0).astype(int)
+        has_any = cnt > 0
+        baseline = (
+            baseline_medians[kpi].reindex(all_sites)
+            if kpi in baseline_medians.columns
+            else pd.Series([np.nan] * len(all_sites), index=all_sites)
+        )
+        recent = (
+            recent_medians[kpi].reindex(all_sites)
+            if kpi in recent_medians.columns
+            else pd.Series([np.nan] * len(all_sites), index=all_sites)
+        )
+        if not recent_sorted.empty and kpi in recent_sorted.columns:
+            v_recent = pd.to_numeric(recent_sorted[kpi], errors="coerce")
+            b_row = recent_sorted["site_code"].map(
+                pd.to_numeric(
+                    baseline_medians.get(kpi, pd.Series(dtype=float)), errors="coerce"
+                )
+            )
+            bad_row = _vector_is_bad(
+                v_recent, b_row, direction, float(rel_threshold_pct), sla_eval
+            )
+            bad_row = bad_row & recent_sorted["_t"].notna()
+            start = (~gap_ok) | (~bad_row) | gap_ok.isna()
+            run_id = start.groupby(recent_sorted["site_code"]).cumsum()
+            bad_counts = (
+                bad_row.groupby(recent_sorted["site_code"])
+                .sum()
+                .reindex(all_sites)
+                .fillna(0)
+                .astype(int)
+            )
+            streaks = (
+                bad_row.groupby([recent_sorted["site_code"], run_id])
+                .sum()
+                .groupby(level=0)
+                .max()
+                .reindex(all_sites)
+                .fillna(0)
+                .astype(int)
+            )
+            tmp_last = (
+                recent_sorted[["site_code", "_t", kpi]]
+                .dropna(subset=[kpi])
+                .sort_values(["site_code", "_t"])
+            )
+            if not tmp_last.empty:
+                last_vals = tmp_last.groupby("site_code")[kpi].tail(1)
+                last_map = pd.Series(
+                    last_vals.values,
+                    index=tmp_last.groupby("site_code")
+                    .tail(1)["site_code"]
+                    .astype(int)
+                    .values,
+                )
+                last = last_map.reindex(all_sites)
+            else:
+                last = pd.Series([np.nan] * len(all_sites), index=all_sites)
+        else:
+            bad_counts = pd.Series([0] * len(all_sites), index=all_sites)
+            streaks = pd.Series([0] * len(all_sites), index=all_sites)
+            last = pd.Series([np.nan] * len(all_sites), index=all_sites)
+        is_bad_recent = _vector_is_bad(
+            recent, baseline, direction, float(rel_threshold_pct), sla_eval
+        )
+        is_bad_current = _vector_is_bad(
+            last, baseline, direction, float(rel_threshold_pct), sla_eval
+        )
+        had_bad_recent = (bad_counts > 0) | is_bad_recent
+        persistent = streaks >= int(min_periods)
+        status = pd.Series("OK", index=all_sites)
+        status = status.where(has_any, "NO_DATA")
+        if policy == "notify":
+            status = status.where(~has_any, "NO_DATA")
+            status = status.where(~(has_any & is_bad_current), "NOTIFY")
+            status = status.where(
+                ~(has_any & (~is_bad_current) & had_bad_recent), "NOTIFY_RESOLVED"
+            )
+        else:
+            status = status.where(
+                ~(has_any & is_bad_current & persistent), "PERSISTENT_DEGRADED"
+            )
+            status = status.where(
+                ~(has_any & is_bad_current & (~persistent)), "DEGRADED"
+            )
+            status = status.where(
+                ~(has_any & (~is_bad_current) & had_bad_recent), "RESOLVED"
+            )
+        frame = pd.DataFrame(
+            {
+                "RAT": rat,
+                "site_code": all_sites.astype(int),
+                "City": city.values,
+                "KPI": kpi,
+                "direction": direction,
+                "sla": sla_val,
+                "policy": policy,
+                "baseline_median": baseline.values,
+                "recent_median": recent.values,
+                "bad_days_recent": bad_counts.values,
+                "max_streak_recent": streaks.values,
+                "status": status.values,
+            }
+        )
+        out_frames.append(frame)
+    if not out_frames:
+        return pd.DataFrame(), pd.DataFrame()
+    status_df = pd.concat(out_frames, ignore_index=True)
+    summary = (
+        status_df.groupby("site_code", as_index=False)
+        .agg(
+            RAT=("RAT", "first"),
+            City=("City", "first"),
+            degraded_kpis=(
+                "status",
+                lambda s: int(s.isin(["DEGRADED", "PERSISTENT_DEGRADED"]).sum()),
+            ),
+            persistent_kpis=(
+                "status",
+                lambda s: int((s == "PERSISTENT_DEGRADED").sum()),
+            ),
+            resolved_kpis=("status", lambda s: int((s == "RESOLVED").sum())),
+        )
+        .sort_values(
+            by=["degraded_kpis", "persistent_kpis", "resolved_kpis"],
+            ascending=[False, False, False],
+        )
+    )
+    return status_df, summary

utils/utils_vars.py CHANGED Viewed

@@ -111,6 +111,7 @@ class UtilsVars:
         406283: "MBSKTL01",
         406284: "MBSSEG01",
         406308: "MBSSK0S1",
     }
     final_lte_database = ""
     final_gsm_database = ""

         406283: "MBSKTL01",
         406284: "MBSSEG01",
         406308: "MBSSK0S1",
+        406309: "ASBSCMSC3",
     }
     final_lte_database = ""
     final_gsm_database = ""

ziptool.py CHANGED Viewed

@@ -2,7 +2,7 @@ import os
 import zipfile
 # Path to your folder containing CSV files
-folder_path = r"C:\Users\David\Documents\DISK E\BI REPORTING\FLOYD REPORT\2025\CAPACITY\DECEMBRE\20151214"
 # Loop through all files in the folder
 for filename in os.listdir(folder_path):

 import zipfile
 # Path to your folder containing CSV files
+folder_path = r"C:\Users\David\Documents\DISK E\BI REPORTING\FLOYD REPORT\2025\CAPACITY\DECEMBRE\20251229"
 # Loop through all files in the folder
 for filename in os.listdir(folder_path):