Spaces:

DavMelchi
/

kpi_analysis

Running

App Files Files Community

github-actions commited on about 14 hours ago

Commit

a0e03be

0 Parent(s):

Deploy Panel Space

Browse files

Files changed (43) hide show

Dockerfile +14 -0
README.md +11 -0
data/kpi_health_check_presets/presets_1.json +301 -0
data/kpi_health_check_profiles/Profil_1.json +32 -0
panel_app/convert_to_excel_panel.py +55 -0
panel_app/kpi_health_check_drilldown_plots.py +360 -0
panel_app/kpi_health_check_panel.py +0 -0
panel_app/kpi_health_check_panel_v2.py +0 -0
panel_app/panel_portal.py +121 -0
panel_app/panel_v2_backend.py +128 -0
panel_app/trafic_analysis_panel.py +2459 -0
physical_db/physical_database.csv +0 -0
process_kpi/__init__.py +0 -0
process_kpi/gsm_kpi_requirements.md +47 -0
process_kpi/kpi_health_check/__init__.py +0 -0
process_kpi/kpi_health_check/benchmarks.py +42 -0
process_kpi/kpi_health_check/engine.py +293 -0
process_kpi/kpi_health_check/engine_v2.py +320 -0
process_kpi/kpi_health_check/export.py +264 -0
process_kpi/kpi_health_check/io.py +45 -0
process_kpi/kpi_health_check/kpi_groups.py +96 -0
process_kpi/kpi_health_check/multi_rat.py +253 -0
process_kpi/kpi_health_check/normalization.py +292 -0
process_kpi/kpi_health_check/presets.py +79 -0
process_kpi/kpi_health_check/profiles.py +71 -0
process_kpi/kpi_health_check/rules.py +132 -0
process_kpi/lte_kpi_requirements.md +46 -0
process_kpi/process_gsm_capacity.py +719 -0
process_kpi/process_lcg_capacity.py +286 -0
process_kpi/process_lte_capacity.py +528 -0
process_kpi/process_wbts_capacity.py +312 -0
process_kpi/process_wcel_capacity.py +348 -0
requirements.txt +10 -0
utils/azimuth_validation.py +35 -0
utils/check_sheet_exist.py +90 -0
utils/config_band.py +156 -0
utils/convert_to_excel.py +365 -0
utils/extract_code.py +34 -0
utils/kml_creator.py +79 -0
utils/kpi_analysis_utils.py +666 -0
utils/rnc_bsc_lac_count_chart.py +89 -0
utils/utils_functions.py +126 -0
utils/utils_vars.py +243 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,14 @@

+FROM python:3.11-slim
+RUN useradd -m -u 1000 user
+WORKDIR /app
+COPY --chown=user:user requirements.txt /app/requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt
+COPY --chown=user:user . /app
+USER user
+EXPOSE 7860
+CMD ["panel", "serve", "panel_app/panel_portal.py", "--address", "0.0.0.0", "--port", "7860", "--allow-websocket-origin=*", "--num-procs", "1", "--log-level", "info"]

README.md ADDED Viewed

	@@ -0,0 +1,11 @@

+---
+title: KPI Analysis (Panel)
+emoji: "📊"
+colorFrom: blue
+colorTo: red
+sdk: docker
+app_port: 7860
+pinned: false
+---
+This Space runs the Panel portal located at `panel_app/panel_portal.py`.

data/kpi_health_check_presets/presets_1.json ADDED Viewed

	@@ -0,0 +1,301 @@

+{
+  "name": "Test1",
+  "saved_at": "2025-12-13T13:16:30.212771Z",
+  "rules": [
+    {
+      "RAT": "2G",
+      "KPI": "2G_Carried Traffic",
+      "direction": "higher_is_better",
+      "sla": NaN
+    },
+    {
+      "RAT": "2G",
+      "KPI": "Data CSSR",
+      "direction": "higher_is_better",
+      "sla": 90.0
+    },
+    {
+      "RAT": "2G",
+      "KPI": "FT_2G_SDCCH_Drop_rate_1",
+      "direction": "lower_is_better",
+      "sla": 2.0
+    },
+    {
+      "RAT": "2G",
+      "KPI": "Handover success rate",
+      "direction": "higher_is_better",
+      "sla": 98.0
+    },
+    {
+      "RAT": "2G",
+      "KPI": "PS_UL_Load",
+      "direction": "higher_is_better",
+      "sla": NaN
+    },
+    {
+      "RAT": "2G",
+      "KPI": "SDCCH real blocking",
+      "direction": "lower_is_better",
+      "sla": 2.0
+    },
+    {
+      "RAT": "2G",
+      "KPI": "TCH availability ratio",
+      "direction": "higher_is_better",
+      "sla": 98.0
+    },
+    {
+      "RAT": "2G",
+      "KPI": "TCH call blocking",
+      "direction": "lower_is_better",
+      "sla": 2.0
+    },
+    {
+      "RAT": "2G",
+      "KPI": "TCH_ABIS_FAIL_CALL (c001084)",
+      "direction": "lower_is_better",
+      "sla": 10.0
+    },
+    {
+      "RAT": "2G",
+      "KPI": "TRAFFIC_PS DL",
+      "direction": "higher_is_better",
+      "sla": NaN
+    },
+    {
+      "RAT": "2G",
+      "KPI": "Voice CSSR%_",
+      "direction": "higher_is_better",
+      "sla": 98.0
+    },
+    {
+      "RAT": "2G",
+      "KPI": "Voice_DCR_OML",
+      "direction": "lower_is_better",
+      "sla": 2.0
+    },
+    {
+      "RAT": "3G",
+      "KPI": "3G Call Setup Success Rate  PS",
+      "direction": "higher_is_better",
+      "sla": 98.0
+    },
+    {
+      "RAT": "3G",
+      "KPI": "3G Drop Call Rate  CS",
+      "direction": "lower_is_better",
+      "sla": 2.0
+    },
+    {
+      "RAT": "3G",
+      "KPI": "3G Drop Call Rate - All Data services",
+      "direction": "lower_is_better",
+      "sla": 2.0
+    },
+    {
+      "RAT": "3G",
+      "KPI": "3G HSUPA_USER_THROUGHPUT_BOTH_MT",
+      "direction": "higher_is_better",
+      "sla": NaN
+    },
+    {
+      "RAT": "3G",
+      "KPI": "Average RTWP",
+      "direction": "lower_is_better",
+      "sla": -89.0
+    },
+    {
+      "RAT": "3G",
+      "KPI": "CS_CALL_RADIO_CONN_LOST (M1006C311)",
+      "direction": "lower_is_better",
+      "sla": 100.0
+    },
+    {
+      "RAT": "3G",
+      "KPI": "Cell Availability, excluding blocked by user state (BLU)",
+      "direction": "higher_is_better",
+      "sla": 98.0
+    },
+    {
+      "RAT": "3G",
+      "KPI": "FT_Soft_HO_Success_Rate_1",
+      "direction": "higher_is_better",
+      "sla": 98.0
+    },
+    {
+      "RAT": "3G",
+      "KPI": "HSDPA congestion rate in Iub",
+      "direction": "lower_is_better",
+      "sla": 10.0
+    },
+    {
+      "RAT": "3G",
+      "KPI": "HSDPA_USER_THROUGHPUT",
+      "direction": "higher_is_better",
+      "sla": NaN
+    },
+    {
+      "RAT": "3G",
+      "KPI": "IUB_LOSS_CC_FRAME_LOSS_IND (M1022C71)",
+      "direction": "lower_is_better",
+      "sla": 100.0
+    },
+    {
+      "RAT": "3G",
+      "KPI": "Total CS traffic - Erl",
+      "direction": "higher_is_better",
+      "sla": NaN
+    },
+    {
+      "RAT": "3G",
+      "KPI": "Total_Data_Traffic",
+      "direction": "higher_is_better",
+      "sla": NaN
+    },
+    {
+      "RAT": "3G",
+      "KPI": "ft_cs_call_setup_success_rate",
+      "direction": "higher_is_better",
+      "sla": 98.0
+    },
+    {
+      "RAT": "3G",
+      "KPI": "ft_hsdpa_call_setup_succ_rate",
+      "direction": "higher_is_better",
+      "sla": 98.0
+    },
+    {
+      "RAT": "3G",
+      "KPI": "ft_hsupa_call_setup_succ_rate",
+      "direction": "higher_is_better",
+      "sla": 98.0
+    },
+    {
+      "RAT": "LTE",
+      "KPI": "% MIMO RI 2",
+      "direction": "higher_is_better",
+      "sla": 50.0
+    },
+    {
+      "RAT": "LTE",
+      "KPI": "4G/LTE CALL SETUP SUCCESS RATE (WITHOUT VOLTE)",
+      "direction": "higher_is_better",
+      "sla": 98.0
+    },
+    {
+      "RAT": "LTE",
+      "KPI": "4G/LTE DL Traffic Volume (GBytes)",
+      "direction": "higher_is_better",
+      "sla": NaN
+    },
+    {
+      "RAT": "LTE",
+      "KPI": "4G/LTE UL Traffic Volume (GBytes)",
+      "direction": "higher_is_better",
+      "sla": NaN
+    },
+    {
+      "RAT": "LTE",
+      "KPI": "AVE 4G/LTE DL USER THRPUT (ALL)KBnew",
+      "direction": "higher_is_better",
+      "sla": NaN
+    },
+    {
+      "RAT": "LTE",
+      "KPI": "AVE 4G/LTE UL USER THRPUT (ALL) (Knew",
+      "direction": "higher_is_better",
+      "sla": NaN
+    },
+    {
+      "RAT": "LTE",
+      "KPI": "AVG_RTWP_RX_ANT_1 (M8005C306)",
+      "direction": "lower_is_better",
+      "sla": -800.0
+    },
+    {
+      "RAT": "LTE",
+      "KPI": "AVG_RTWP_RX_ANT_2 (M8005C307)",
+      "direction": "lower_is_better",
+      "sla": -800.0
+    },
+    {
+      "RAT": "LTE",
+      "KPI": "Avg RRC conn UE",
+      "direction": "higher_is_better",
+      "sla": NaN
+    },
+    {
+      "RAT": "LTE",
+      "KPI": "Avg UE distance",
+      "direction": "higher_is_better",
+      "sla": NaN,
+      "policy": "notify"
+    },
+    {
+      "RAT": "LTE",
+      "KPI": "CSFB_V6",
+      "direction": "higher_is_better",
+      "sla": 98.0
+    },
+    {
+      "RAT": "LTE",
+      "KPI": "Call Drop Rate_ 4G New",
+      "direction": "lower_is_better",
+      "sla": 2.0
+    },
+    {
+      "RAT": "LTE",
+      "KPI": "Cell Avail excl BLU",
+      "direction": "higher_is_better",
+      "sla": 98.0
+    },
+    {
+      "RAT": "LTE",
+      "KPI": "E-UTRAN Avg PRB usage per TTI DL",
+      "direction": "lower_is_better",
+      "sla": 50.0
+    },
+    {
+      "RAT": "LTE",
+      "KPI": "E-UTRAN E-RAB stp SR",
+      "direction": "higher_is_better",
+      "sla": 98.0
+    },
+    {
+      "RAT": "LTE",
+      "KPI": "E-UTRAN Intra-Freq HO SR",
+      "direction": "higher_is_better",
+      "sla": 98.0
+    },
+    {
+      "RAT": "LTE",
+      "KPI": "E-UTRAN RRC Paging Discard Ratio",
+      "direction": "lower_is_better",
+      "sla": 1.0
+    },
+    {
+      "RAT": "LTE",
+      "KPI": "Intra eNB HO SR",
+      "direction": "higher_is_better",
+      "sla": 98.0
+    },
+    {
+      "RAT": "LTE",
+      "KPI": "Max PDCP Thr DL (classic eNB)",
+      "direction": "higher_is_better",
+      "sla": NaN
+    },
+    {
+      "RAT": "LTE",
+      "KPI": "Total E-UTRAN RRC conn stp SR",
+      "direction": "higher_is_better",
+      "sla": 98.0
+    },
+    {
+      "RAT": "LTE",
+      "KPI": "UE-spec log S1 sig conn SR",
+      "direction": "higher_is_better",
+      "sla": 98.0
+    }
+  ]
+}

data/kpi_health_check_profiles/Profil_1.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "name": "Profil_1",
+  "saved_at": "2025-12-13T13:16:45.937845Z",
+  "config": {
+    "analysis_range": [
+      null,
+      null
+    ],
+    "baseline_days": 30,
+    "recent_days": 7,
+    "rel_threshold_pct": 10.0,
+    "min_consecutive_days": 3,
+    "min_criticality": 0,
+    "min_anomaly_score": 0,
+    "city_filter": "",
+    "top_rat_filter": [
+      "2G",
+      "3G",
+      "LTE"
+    ],
+    "top_status_filter": [
+      "DEGRADED",
+      "PERSISTENT_DEGRADED"
+    ],
+    "preset_selected": "presets_1.json",
+    "drilldown": {
+      "site_code": 2130,
+      "rat": "LTE",
+      "kpi": ""
+    }
+  }
+}

panel_app/convert_to_excel_panel.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import io
+import time
+from typing import Iterable, Sequence
+import pandas as pd
+def write_dfs_to_excel(
+    dfs: Sequence[pd.DataFrame],
+    sheet_names: Sequence[str],
+    index: bool = True,
+    profile: dict | None = None,
+) -> bytes:
+    """Simple Excel export for Panel.
+    Writes the given DataFrames to an in-memory XLSX file and returns the bytes.
+    No Streamlit dependency and no heavy formatting, to keep Panel exports fast
+    and avoid Streamlit runtime warnings.
+    """
+    bytes_io = io.BytesIO()
+    t0 = time.perf_counter() if profile is not None else 0.0
+    with pd.ExcelWriter(bytes_io, engine="xlsxwriter") as writer:
+        for df, name in zip(dfs, sheet_names):
+            # Ensure we always write a valid DataFrame, even if None was passed
+            safe_df = df if isinstance(df, pd.DataFrame) else pd.DataFrame()
+            t_sheet0 = time.perf_counter() if profile is not None else 0.0
+            safe_df.to_excel(writer, sheet_name=str(name), index=index)
+            t_sheet1 = time.perf_counter() if profile is not None else 0.0
+            if profile is not None:
+                sheets = profile.get("excel_sheets")
+                if not isinstance(sheets, list):
+                    sheets = []
+                    profile["excel_sheets"] = sheets
+                try:
+                    rows = int(len(safe_df))
+                except Exception:  # noqa: BLE001
+                    rows = 0
+                try:
+                    cols = int(safe_df.shape[1])
+                except Exception:  # noqa: BLE001
+                    cols = 0
+                sheets.append(
+                    {
+                        "name": str(name),
+                        "rows": rows,
+                        "cols": cols,
+                        "seconds": float(t_sheet1 - t_sheet0),
+                    }
+                )
+    if profile is not None:
+        profile["excel_total_seconds"] = float(time.perf_counter() - t0)
+    return bytes_io.getvalue()

panel_app/kpi_health_check_drilldown_plots.py ADDED Viewed

	@@ -0,0 +1,360 @@

+from datetime import timedelta
+import pandas as pd
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+from process_kpi.kpi_health_check.engine import is_bad
+def build_drilldown_plot(
+    df: pd.DataFrame,
+    kpis: list[str],
+    rules_df: pd.DataFrame | None = None,
+    highlight_bad_days: bool = True,
+    show_sla: bool = True,
+    site_code: str | int = "",
+    rat: str = "",
+    main_kpi: str | None = None,
+    baseline_days_n: int = 30,
+    recent_days_n: int = 7,
+    rel_threshold_pct: float = 10.0,
+    normalization: str = "None",
+    granularity: str = "Daily",
+) -> go.Figure | None:
+    """
+    Builds the drill-down trend plot with native Plotly annotations.
+    """
+    if df is None or df.empty or not kpis:
+        return None
+    # Filter columns that exist
+    valid_kpis = [k for k in kpis if k in df.columns]
+    if not valid_kpis:
+        return None
+    g = str(granularity or "Daily").strip().lower()
+    is_hourly = g.startswith("hour") or g.startswith("h")
+    time_col = (
+        "period_start" if (is_hourly and "period_start" in df.columns) else "date_only"
+    )
+    plot_df = df.sort_values(time_col).copy()
+    try:
+        plot_df[time_col] = pd.to_datetime(plot_df[time_col])
+    except Exception:
+        pass
+    if main_kpi is None and valid_kpis:
+        main_kpi = valid_kpis[0]
+    title_text = f"{rat} - Site {site_code}"
+    # If single KPI, be explicit in title
+    if len(valid_kpis) == 1:
+        title_text = f"{rat} - Site {site_code} - {valid_kpis[0]}"
+    # Subplot for Timeline (Streak) - Row 2
+    # Row 1: Main Trend
+    fig = make_subplots(
+        rows=2,
+        cols=1,
+        shared_xaxes=True,
+        vertical_spacing=0.05,
+        row_heights=[0.85, 0.15],
+        subplot_titles=(title_text, "Status Check"),
+    )
+    norm_mode = str(normalization or "None").strip()
+    do_norm = (norm_mode != "None") and (len(valid_kpis) > 1)
+    n_kpis = len(valid_kpis)
+    trace_mode = "lines+markers" if n_kpis <= 3 else "lines"
+    marker_size = 6 if n_kpis <= 3 else 0
+    for kpi in valid_kpis:
+        # Data preparation
+        x_data = plot_df[time_col]
+        y_data = pd.to_numeric(plot_df[kpi], errors="coerce")
+        if do_norm:
+            if norm_mode == "Min-Max":
+                vmin = y_data.min(skipna=True)
+                vmax = y_data.max(skipna=True)
+                if pd.notna(vmin) and pd.notna(vmax) and float(vmax) != float(vmin):
+                    y_data = (y_data - vmin) / (vmax - vmin)
+                else:
+                    y_data = y_data * 0.0
+            elif norm_mode == "Z-score":
+                mu = y_data.mean(skipna=True)
+                sd = y_data.std(skipna=True)
+                if pd.notna(sd) and float(sd) != 0.0:
+                    y_data = (y_data - mu) / sd
+                else:
+                    y_data = y_data * 0.0
+        # Add Trace
+        fig.add_trace(
+            go.Scatter(
+                x=x_data,
+                y=y_data,
+                mode=trace_mode,
+                name=kpi,
+                legendgroup=kpi,  # Allows grouping logic if needed
+                marker=dict(size=marker_size) if marker_size else None,
+            ),
+            row=1,
+            col=1,
+        )
+        # Add SLA line if available
+        if show_sla and rules_df is not None:
+            try:
+                if do_norm:
+                    continue
+                # Find rule for this KPI
+                # Note: This implies rules_df is filtered for the correct RAT
+                rule = rules_df[rules_df["KPI"] == kpi]
+                if not rule.empty:
+                    pol = (
+                        str(rule.iloc[0].get("policy", "enforce") or "enforce")
+                        .strip()
+                        .lower()
+                    )
+                    if pol == "notify":
+                        continue
+                    if len(valid_kpis) > 1 and str(kpi) != str(main_kpi):
+                        continue
+                    sla_val = pd.to_numeric(rule.iloc[0].get("sla"), errors="coerce")
+                    if pd.notna(sla_val):
+                        fig.add_hline(
+                            y=sla_val,
+                            line_dash="dot",
+                            line_color="red",
+                            annotation_text=f"SLA {kpi}",
+                            annotation_position="bottom right",
+                            row=1,
+                            col=1,
+                        )
+            except Exception:
+                pass
+    try:
+        if highlight_bad_days and main_kpi and main_kpi in plot_df.columns:
+            direction = "higher_is_better"
+            policy = "enforce"
+            sla_eval = None
+            if (
+                rules_df is not None
+                and isinstance(rules_df, pd.DataFrame)
+                and not rules_df.empty
+            ):
+                rule = rules_df[rules_df["KPI"] == str(main_kpi)]
+                if not rule.empty:
+                    direction = str(
+                        rule.iloc[0].get("direction", direction) or direction
+                    )
+                    policy = (
+                        str(rule.iloc[0].get("policy", policy) or policy)
+                        .strip()
+                        .lower()
+                    )
+                    if policy != "notify":
+                        sla_val = pd.to_numeric(
+                            rule.iloc[0].get("sla"), errors="coerce"
+                        )
+                        if pd.notna(sla_val):
+                            try:
+                                sla_eval = float(sla_val)
+                            except Exception:
+                                sla_eval = None
+            end_dt = pd.to_datetime(plot_df[time_col]).max()
+            if is_hourly:
+                rs = end_dt - timedelta(hours=max(int(recent_days_n), 1) * 24 - 1)
+                be = rs - timedelta(hours=1)
+                bs = be - timedelta(hours=max(int(baseline_days_n), 1) * 24 - 1)
+            else:
+                rs = end_dt - timedelta(days=max(int(recent_days_n), 1) - 1)
+                be = rs - timedelta(days=1)
+                bs = be - timedelta(days=max(int(baseline_days_n), 1) - 1)
+            dts = pd.to_datetime(plot_df[time_col])
+            baseline_mask = (dts >= bs) & (dts <= be)
+            recent_mask = (dts >= rs) & (dts <= end_dt)
+            baseline_val = pd.to_numeric(
+                plot_df.loc[baseline_mask, str(main_kpi)], errors="coerce"
+            ).median()
+            baseline_val = float(baseline_val) if pd.notna(baseline_val) else None
+            vals = pd.to_numeric(plot_df[str(main_kpi)], errors="coerce")
+            bad_flags = [
+                is_bad(
+                    float(v) if pd.notna(v) else None,
+                    baseline_val,
+                    direction,
+                    float(rel_threshold_pct),
+                    sla_eval,
+                )
+                for v in vals.tolist()
+            ]
+            bad_recent = [bool(b) and bool(r) for b, r in zip(bad_flags, recent_mask)]
+            baseline_color = "#bdbdbd"
+            ok_color = "#1565c0"
+            bad_color = "#f9a825" if policy == "notify" else "#e53935"
+            colors = []
+            hover_txt = []
+            for is_b, is_base, is_recent in zip(bad_flags, baseline_mask, recent_mask):
+                if bool(is_base):
+                    colors.append(baseline_color)
+                    hover_txt.append(f"BASELINE ({main_kpi})")
+                elif bool(is_recent) and bool(is_b):
+                    colors.append(bad_color)
+                    hover_txt.append(f"RECENT BAD ({main_kpi})")
+                elif bool(is_recent):
+                    colors.append(ok_color)
+                    hover_txt.append(f"RECENT OK ({main_kpi})")
+                else:
+                    colors.append("#e0e0e0")
+                    hover_txt.append(f"OUTSIDE WINDOW ({main_kpi})")
+            fig.add_trace(
+                go.Scatter(
+                    x=plot_df[time_col],
+                    y=[0] * len(plot_df),
+                    mode="markers",
+                    marker=dict(symbol="square", size=10, color=colors),
+                    showlegend=False,
+                    hovertext=hover_txt,
+                    hoverinfo="text",
+                ),
+                row=2,
+                col=1,
+            )
+            try:
+                y_main = pd.to_numeric(plot_df[str(main_kpi)], errors="coerce")
+                if do_norm:
+                    if norm_mode == "Min-Max":
+                        vmin = y_main.min(skipna=True)
+                        vmax = y_main.max(skipna=True)
+                        if (
+                            pd.notna(vmin)
+                            and pd.notna(vmax)
+                            and float(vmax) != float(vmin)
+                        ):
+                            y_main = (y_main - vmin) / (vmax - vmin)
+                        else:
+                            y_main = y_main * 0.0
+                    elif norm_mode == "Z-score":
+                        mu = y_main.mean(skipna=True)
+                        sd = y_main.std(skipna=True)
+                        if pd.notna(sd) and float(sd) != 0.0:
+                            y_main = (y_main - mu) / sd
+                        else:
+                            y_main = y_main * 0.0
+                idx_bad = [i for i, b in enumerate(bad_recent) if bool(b)]
+                if idx_bad:
+                    fig.add_trace(
+                        go.Scatter(
+                            x=[plot_df[time_col].iloc[i] for i in idx_bad],
+                            y=[y_main.iloc[i] for i in idx_bad],
+                            mode="markers",
+                            marker=dict(size=10, color=bad_color, symbol="circle"),
+                            name="Bad days",
+                            showlegend=(n_kpis <= 3),
+                        ),
+                        row=1,
+                        col=1,
+                    )
+            except Exception:
+                pass
+    except Exception:
+        pass
+    if not plot_df.empty and not highlight_bad_days:
+        fig.add_trace(
+            go.Scatter(
+                x=plot_df[time_col],
+                y=[0] * len(plot_df),
+                mode="markers",
+                opacity=0,
+                showlegend=False,
+                hoverinfo="skip",
+            ),
+            row=2,
+            col=1,
+        )
+    fig.update_layout(
+        template="plotly_white",
+        height=500,
+        margin=dict(l=50, r=50, t=50, b=50),
+        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
+        hovermode="x unified",
+    )
+    try:
+        force_all_dates = False
+        try:
+            x_min = pd.to_datetime(plot_df[time_col]).min()
+            x_max = pd.to_datetime(plot_df[time_col]).max()
+            span_days = int((x_max - x_min).days) + 1
+            n_dates = int(pd.to_datetime(plot_df[time_col]).nunique())
+            force_all_dates = (span_days <= 200) and (n_dates <= 200)
+        except Exception:
+            force_all_dates = False
+        if do_norm:
+            fig.update_yaxes(title_text=f"Normalized ({norm_mode})", row=1, col=1)
+        else:
+            fig.update_yaxes(title_text="Value", row=1, col=1)
+        fig.update_yaxes(
+            showticklabels=False,
+            showgrid=False,
+            zeroline=False,
+            range=[-1, 1],
+            row=2,
+            col=1,
+        )
+        if force_all_dates and not is_hourly:
+            fig.update_xaxes(
+                tickmode="linear",
+                dtick=86400000,
+                tickformat="%d-%b",
+                tickangle=-90,
+                tickfont=dict(size=10),
+                automargin=True,
+                ticks="outside",
+                ticklen=6,
+                showgrid=True,
+                row=2,
+                col=1,
+            )
+        else:
+            fig.update_xaxes(
+                tickangle=-45,
+                automargin=True,
+                ticks="outside",
+                ticklen=6,
+                showgrid=True,
+                tickformatstops=[
+                    {
+                        "dtickrange": [None, 86400000],
+                        "value": "%d-%b\n%H:%M" if is_hourly else "%d-%b\n%Y",
+                    },
+                    {"dtickrange": [86400000, 7 * 86400000], "value": "%d-%b"},
+                    {"dtickrange": [7 * 86400000, "M1"], "value": "%d-%b"},
+                    {"dtickrange": ["M1", "M12"], "value": "%b\n%Y"},
+                    {"dtickrange": ["M12", None], "value": "%Y"},
+                ],
+                row=2,
+                col=1,
+            )
+    except Exception:
+        pass
+    return fig

panel_app/kpi_health_check_panel.py ADDED Viewed

The diff for this file is too large to render. See raw diff

panel_app/kpi_health_check_panel_v2.py ADDED Viewed

The diff for this file is too large to render. See raw diff

panel_app/panel_portal.py ADDED Viewed

	@@ -0,0 +1,121 @@

+import os
+import sys
+import panel as pn
+ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+if ROOT_DIR not in sys.path:
+    sys.path.insert(0, ROOT_DIR)
+pn.extension("plotly", "tabulator")
+# Import pages (kept as modules, not nested templates)
+from panel_app import (
+    kpi_health_check_panel,
+    kpi_health_check_panel_v2,
+    trafic_analysis_panel,
+)
+PAGES = {
+    "📊 Global Traffic Analysis": {
+        "get_components": trafic_analysis_panel.get_page_components,
+        "description": "Analyse trafic multi-RAT + cartes + exports.",
+    },
+    "📈 KPI Health Check": {
+        "get_components": kpi_health_check_panel.get_page_components,
+        "description": "Détection KPI dégradés/persistants/résolus + drill-down + export.",
+    },
+    "⚡ KPI Health Check (V2)": {
+        "get_components": kpi_health_check_panel_v2.get_page_components,
+        "description": "Version optimisée (cache disque + moteur health-check vectorisé).",
+    },
+}
+HOME_PAGE = "🏠 Gallery"
+page_sidebar_container = pn.Column(sizing_mode="stretch_width")
+page_main_container = pn.Column(sizing_mode="stretch_both")
+page_title = pn.pane.Markdown("", sizing_mode="stretch_width")
+back_button = pn.widgets.Button(
+    name="← Back to gallery",
+    button_type="primary",
+    width=180,
+)
+home_button = pn.widgets.Button(
+    name=HOME_PAGE,
+    button_type="default",
+    width_policy="max",
+)
+def _load_page(page_name: str) -> None:
+    if page_name == HOME_PAGE:
+        page_title.object = "## Applications"
+        tiles = []
+        for title, meta in PAGES.items():
+            btn = pn.widgets.Button(name="Open", button_type="primary", width=120)
+            btn.on_click(lambda e, t=title: _load_page(t))
+            tile = pn.Column(
+                pn.pane.Markdown(f"### {title}\n\n{meta.get('description', '')}"),
+                btn,
+                sizing_mode="stretch_width",
+                margin=(10, 10, 10, 10),
+            )
+            tiles.append(tile)
+        gallery = pn.GridBox(*tiles, ncols=2, sizing_mode="stretch_width")
+        page_sidebar_container.objects = [
+            pn.pane.Markdown(
+                """### Bienvenue\n\nChoisis une application dans la gallery."""
+            )
+        ]
+        page_main_container.objects = [page_title, gallery]
+        return
+    meta = PAGES.get(page_name)
+    if meta is None:
+        page_sidebar_container.objects = [
+            pn.pane.Alert("Unknown page", alert_type="danger")
+        ]
+        page_main_container.objects = []
+        return
+    sidebar, main = meta["get_components"]()
+    page_title.object = f"## {page_name}"
+    page_sidebar_container.objects = [sidebar]
+    page_main_container.objects = [
+        pn.Row(back_button, pn.Spacer(), sizing_mode="stretch_width"),
+        page_title,
+        main,
+    ]
+template = pn.template.MaterialTemplate(title="OML DB - Portal")
+def _go_home(event=None) -> None:
+    _load_page(HOME_PAGE)
+back_button.on_click(_go_home)
+home_button.on_click(_go_home)
+_load_page(HOME_PAGE)
+template.sidebar.append(
+    pn.Column(
+        pn.pane.Markdown("## Navigation"),
+        home_button,
+        pn.layout.Divider(),
+        page_sidebar_container,
+        sizing_mode="stretch_width",
+    )
+)
+template.main.append(page_main_container)
+template.servable()

panel_app/panel_v2_backend.py ADDED Viewed

	@@ -0,0 +1,128 @@

+import hashlib
+import os
+from dataclasses import dataclass
+import pandas as pd
+def _project_root() -> str:
+    return os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+def cache_root() -> str:
+    # Priority: env var > /tmp (for HF Spaces/containers) > project root (local dev)
+    env_cache = os.environ.get("CACHE_DIR")
+    if env_cache:
+        path = os.path.join(env_cache, "panel_app_v2")
+    elif os.path.exists("/tmp") and os.access("/tmp", os.W_OK):
+        # On Hugging Face Spaces and Linux containers, /tmp is always writable
+        path = os.path.join("/tmp", "panel_app_v2_cache")
+    else:
+        # Fallback to project root for local development
+        root = _project_root()
+        path = os.path.join(root, ".cache", "panel_app_v2")
+    os.makedirs(path, exist_ok=True)
+    return path
+def _safe_str(value: object) -> str:
+    try:
+        return str(value or "")
+    except Exception:
+        return ""
+def fingerprint_bytes(file_bytes: bytes, filename: str = "", extra: str = "") -> str:
+    h = hashlib.blake2b(digest_size=16)
+    if file_bytes:
+        h.update(file_bytes)
+    name = _safe_str(filename)
+    if name:
+        h.update(name.encode("utf-8", errors="ignore"))
+    ex = _safe_str(extra)
+    if ex:
+        h.update(ex.encode("utf-8", errors="ignore"))
+    return h.hexdigest()
+def _has_pyarrow() -> bool:
+    try:
+        import pyarrow  # noqa: F401
+        return True
+    except Exception:
+        return False
+def _has_duckdb() -> bool:
+    try:
+        import duckdb  # noqa: F401
+        return True
+    except Exception:
+        return False
+def write_table(df: pd.DataFrame, path_no_ext: str) -> str:
+    if _has_pyarrow():
+        path = path_no_ext + ".parquet"
+        df.to_parquet(path, index=False)
+        return path
+    path = path_no_ext + ".pkl"
+    df.to_pickle(path)
+    return path
+def read_table(path: str) -> pd.DataFrame:
+    if not path or not os.path.exists(path):
+        return pd.DataFrame()
+    p = str(path).lower()
+    if p.endswith(".parquet"):
+        return pd.read_parquet(path)
+    return pd.read_pickle(path)
+@dataclass(frozen=True)
+class CachedDataset:
+    dataset_id: str
+    rat: str
+    granularity: str
+    def base_dir(self) -> str:
+        return os.path.join(
+            cache_root(),
+            self.dataset_id,
+            f"rat={self.rat}",
+            f"granularity={self.granularity}",
+        )
+    def daily_table_base(self) -> str:
+        return os.path.join(self.base_dir(), "daily")
+    def meta_path(self) -> str:
+        return os.path.join(self.base_dir(), "meta.json")
+def try_load_cached_daily(dataset: CachedDataset) -> pd.DataFrame | None:
+    base = dataset.daily_table_base()
+    candidates = [base + ".parquet", base + ".pkl"]
+    for p in candidates:
+        if os.path.exists(p):
+            try:
+                df = read_table(p)
+                return df if isinstance(df, pd.DataFrame) else pd.DataFrame()
+            except Exception:
+                return pd.DataFrame()
+    return None
+def save_cached_daily(dataset: CachedDataset, daily: pd.DataFrame) -> str:
+    os.makedirs(dataset.base_dir(), exist_ok=True)
+    return write_table(daily, dataset.daily_table_base())
+def ensure_duckdb_available() -> None:
+    if not _has_duckdb():
+        raise RuntimeError(
+            "DuckDB is not installed. Install it to enable the V2 SQL engine: python -m pip install duckdb"
+        )

panel_app/trafic_analysis_panel.py ADDED Viewed

	@@ -0,0 +1,2459 @@

+import io
+import os
+import sys
+import zipfile
+from datetime import date, datetime, timedelta
+import numpy as np
+import pandas as pd
+import panel as pn
+import plotly.express as px
+ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+if ROOT_DIR not in sys.path:
+    sys.path.insert(0, ROOT_DIR)
+from panel_app.convert_to_excel_panel import write_dfs_to_excel
+from utils.utils_vars import get_physical_db
+pn.extension(
+    "plotly",
+    "tabulator",
+    raw_css=[
+        ":fullscreen { background-color: white; overflow: auto; }",
+        "::backdrop { background-color: white; }",
+        ".plot-fullscreen-wrapper:fullscreen { padding: 20px; display: flex; flex-direction: column; }",
+        ".plot-fullscreen-wrapper:fullscreen > * { height: 100% !important; width: 100% !important; }",
+    ],
+)
+def read_fileinput_to_df(file_input: pn.widgets.FileInput) -> pd.DataFrame | None:
+    """Read a Panel FileInput (ZIP or CSV) into a DataFrame.
+    Returns None if no file is provided.
+    """
+    if file_input is None or not file_input.value:
+        return None
+    filename = (file_input.filename or "").lower()
+    data = io.BytesIO(file_input.value)
+    if filename.endswith(".zip"):
+        with zipfile.ZipFile(data) as z:
+            csv_files = [f for f in z.namelist() if f.lower().endswith(".csv")]
+            if not csv_files:
+                raise ValueError("No CSV file found in the ZIP archive")
+            with z.open(csv_files[0]) as f:
+                return pd.read_csv(f, encoding="latin1", sep=";", low_memory=False)
+    elif filename.endswith(".csv"):
+        return pd.read_csv(data, encoding="latin1", sep=";", low_memory=False)
+    else:
+        raise ValueError("Unsupported file format. Please upload a ZIP or CSV file.")
+def extract_code(name):
+    name = name.replace(" ", "_") if isinstance(name, str) else None
+    if name and len(name) >= 10:
+        try:
+            return int(name.split("_")[0])
+        except ValueError:
+            return None
+    return None
+def preprocess_2g(df: pd.DataFrame) -> pd.DataFrame:
+    df = df[df["BCF name"].str.len() >= 10].copy()
+    df["2g_data_trafic"] = ((df["TRAFFIC_PS DL"] + df["PS_UL_Load"]) / 1000).round(1)
+    df.rename(columns={"2G_Carried Traffic": "2g_voice_trafic"}, inplace=True)
+    df["code"] = df["BCF name"].apply(extract_code)
+    df["code"] = pd.to_numeric(df["code"], errors="coerce")
+    df = df[df["code"].notna()]
+    df["code"] = df["code"].astype(int)
+    date_format = (
+        "%m.%d.%Y %H:%M:%S" if len(df["PERIOD_START_TIME"].iat[0]) > 10 else "%m.%d.%Y"
+    )
+    df["date"] = pd.to_datetime(df["PERIOD_START_TIME"], format=date_format)
+    df["ID"] = df["date"].astype(str) + "_" + df["code"].astype(str)
+    if "TCH availability ratio" in df.columns:
+        df["2g_tch_avail"] = pd.to_numeric(
+            df["TCH availability ratio"], errors="coerce"
+        )
+    agg_dict = {
+        "2g_data_trafic": "sum",
+        "2g_voice_trafic": "sum",
+    }
+    if "2g_tch_avail" in df.columns:
+        agg_dict["2g_tch_avail"] = "mean"
+    df = df.groupby(["date", "ID", "code"], as_index=False).agg(agg_dict)
+    return df
+def preprocess_3g(df: pd.DataFrame) -> pd.DataFrame:
+    df = df[df["WBTS name"].str.len() >= 10].copy()
+    df["code"] = df["WBTS name"].apply(extract_code)
+    df["code"] = pd.to_numeric(df["code"], errors="coerce")
+    df = df[df["code"].notna()]
+    df["code"] = df["code"].astype(int)
+    date_format = (
+        "%m.%d.%Y %H:%M:%S" if len(df["PERIOD_START_TIME"].iat[0]) > 10 else "%m.%d.%Y"
+    )
+    df["date"] = pd.to_datetime(df["PERIOD_START_TIME"], format=date_format)
+    df["ID"] = df["date"].astype(str) + "_" + df["code"].astype(str)
+    df.rename(
+        columns={
+            "Total CS traffic - Erl": "3g_voice_trafic",
+            "Total_Data_Traffic": "3g_data_trafic",
+        },
+        inplace=True,
+    )
+    kpi_col = None
+    for col in df.columns:
+        if "cell availability" in str(col).lower():
+            kpi_col = col
+            break
+    if kpi_col is not None:
+        df["3g_cell_avail"] = pd.to_numeric(df[kpi_col], errors="coerce")
+    agg_dict = {
+        "3g_voice_trafic": "sum",
+        "3g_data_trafic": "sum",
+    }
+    if "3g_cell_avail" in df.columns:
+        agg_dict["3g_cell_avail"] = "mean"
+    df = df.groupby(["date", "ID", "code"], as_index=False).agg(agg_dict)
+    return df
+def preprocess_lte(df: pd.DataFrame) -> pd.DataFrame:
+    df = df[df["LNBTS name"].str.len() >= 10].copy()
+    df["lte_data_trafic"] = (
+        df["4G/LTE DL Traffic Volume (GBytes)"]
+        + df["4G/LTE UL Traffic Volume (GBytes)"]
+    )
+    df["code"] = df["LNBTS name"].apply(extract_code)
+    df["code"] = pd.to_numeric(df["code"], errors="coerce")
+    df = df[df["code"].notna()]
+    df["code"] = df["code"].astype(int)
+    date_format = (
+        "%m.%d.%Y %H:%M:%S" if len(df["PERIOD_START_TIME"].iat[0]) > 10 else "%m.%d.%Y"
+    )
+    df["date"] = pd.to_datetime(df["PERIOD_START_TIME"], format=date_format)
+    df["ID"] = df["date"].astype(str) + "_" + df["code"].astype(str)
+    if "Cell Avail excl BLU" in df.columns:
+        df["lte_cell_avail"] = pd.to_numeric(df["Cell Avail excl BLU"], errors="coerce")
+    agg_dict = {"lte_data_trafic": "sum"}
+    if "lte_cell_avail" in df.columns:
+        agg_dict["lte_cell_avail"] = "mean"
+    df = df.groupby(["date", "ID", "code"], as_index=False).agg(agg_dict)
+    return df
+def merge_and_compare(df_2g, df_3g, df_lte, pre_range, post_range, last_period_range):
+    physical_db = get_physical_db()
+    physical_db["code"] = physical_db["Code_Sector"].str.split("_").str[0]
+    physical_db["code"] = (
+        pd.to_numeric(physical_db["code"], errors="coerce").fillna(0).astype(int)
+    )
+    physical_db = physical_db[["code", "Longitude", "Latitude", "City"]]
+    physical_db = physical_db.drop_duplicates(subset="code")
+    df = pd.merge(df_2g, df_3g, on=["date", "ID", "code"], how="outer")
+    df = pd.merge(df, df_lte, on=["date", "ID", "code"], how="outer")
+    for col in [
+        "2g_data_trafic",
+        "2g_voice_trafic",
+        "3g_voice_trafic",
+        "3g_data_trafic",
+        "lte_data_trafic",
+    ]:
+        if col not in df:
+            df[col] = 0
+    kpi_masks = {}
+    for kpi_col in ["2g_tch_avail", "3g_cell_avail", "lte_cell_avail"]:
+        if kpi_col in df.columns:
+            kpi_masks[kpi_col] = df[kpi_col].notna()
+    df.fillna(0, inplace=True)
+    for kpi_col, mask in kpi_masks.items():
+        df.loc[~mask, kpi_col] = np.nan
+    df["total_voice_trafic"] = df["2g_voice_trafic"] + df["3g_voice_trafic"]
+    df["total_data_trafic"] = (
+        df["2g_data_trafic"] + df["3g_data_trafic"] + df["lte_data_trafic"]
+    )
+    df = pd.merge(df, physical_db, on=["code"], how="left")
+    pre_start, pre_end = pd.to_datetime(pre_range[0]), pd.to_datetime(pre_range[1])
+    post_start, post_end = pd.to_datetime(post_range[0]), pd.to_datetime(post_range[1])
+    last_period_start, last_period_end = (
+        pd.to_datetime(last_period_range[0]),
+        pd.to_datetime(last_period_range[1]),
+    )
+    last_period = df[
+        (df["date"] >= last_period_start) & (df["date"] <= last_period_end)
+    ]
+    def assign_period(x):
+        if pre_start <= x <= pre_end:
+            return "pre"
+        if post_start <= x <= post_end:
+            return "post"
+        return "other"
+    df["period"] = df["date"].apply(assign_period)
+    comparison = df[df["period"].isin(["pre", "post"])]
+    sum_pivot = (
+        comparison.groupby(["code", "period"])[
+            ["total_voice_trafic", "total_data_trafic"]
+        ]
+        .sum()
+        .unstack()
+    )
+    sum_pivot.columns = [f"{metric}_{period}" for metric, period in sum_pivot.columns]
+    sum_pivot = sum_pivot.reset_index()
+    sum_pivot["total_voice_trafic_diff"] = (
+        sum_pivot["total_voice_trafic_post"] - sum_pivot["total_voice_trafic_pre"]
+    )
+    sum_pivot["total_data_trafic_diff"] = (
+        sum_pivot["total_data_trafic_post"] - sum_pivot["total_data_trafic_pre"]
+    )
+    for metric in ["total_voice_trafic", "total_data_trafic"]:
+        sum_pivot[f"{metric}_diff_pct"] = (
+            (sum_pivot.get(f"{metric}_post", 0) - sum_pivot.get(f"{metric}_pre", 0))
+            / sum_pivot.get(f"{metric}_pre", 1)
+        ) * 100
+    sum_order = [
+        "code",
+        "total_voice_trafic_pre",
+        "total_voice_trafic_post",
+        "total_voice_trafic_diff",
+        "total_voice_trafic_diff_pct",
+        "total_data_trafic_pre",
+        "total_data_trafic_post",
+        "total_data_trafic_diff",
+        "total_data_trafic_diff_pct",
+    ]
+    sum_existing_cols = [col for col in sum_order if col in sum_pivot.columns]
+    sum_remaining_cols = [
+        col for col in sum_pivot.columns if col not in sum_existing_cols
+    ]
+    sum_pivot = sum_pivot[sum_existing_cols + sum_remaining_cols]
+    avg_pivot = (
+        comparison.groupby(["code", "period"])[
+            ["total_voice_trafic", "total_data_trafic"]
+        ]
+        .mean()
+        .unstack()
+    )
+    avg_pivot.columns = [f"{metric}_{period}" for metric, period in avg_pivot.columns]
+    avg_pivot = avg_pivot.reset_index()
+    avg_pivot["total_voice_trafic_diff"] = (
+        avg_pivot["total_voice_trafic_post"] - avg_pivot["total_voice_trafic_pre"]
+    )
+    avg_pivot["total_data_trafic_diff"] = (
+        avg_pivot["total_data_trafic_post"] - avg_pivot["total_data_trafic_pre"]
+    )
+    for metric in ["total_voice_trafic", "total_data_trafic"]:
+        avg_pivot[f"{metric}_diff_pct"] = (
+            (avg_pivot.get(f"{metric}_post", 0) - avg_pivot.get(f"{metric}_pre", 0))
+            / avg_pivot.get(f"{metric}_pre", 1)
+        ) * 100
+    avg_pivot = avg_pivot.rename(
+        columns={
+            "total_voice_trafic_pre": "avg_voice_trafic_pre",
+            "total_voice_trafic_post": "avg_voice_trafic_post",
+            "total_voice_trafic_diff": "avg_voice_trafic_diff",
+            "total_voice_trafic_diff_pct": "avg_voice_trafic_diff_pct",
+            "total_data_trafic_pre": "avg_data_trafic_pre",
+            "total_data_trafic_post": "avg_data_trafic_post",
+            "total_data_trafic_diff": "avg_data_trafic_diff",
+            "total_data_trafic_diff_pct": "avg_data_trafic_diff_pct",
+        }
+    )
+    avg_order = [
+        "code",
+        "avg_voice_trafic_pre",
+        "avg_voice_trafic_post",
+        "avg_voice_trafic_diff",
+        "avg_voice_trafic_diff_pct",
+        "avg_data_trafic_pre",
+        "avg_data_trafic_post",
+        "avg_data_trafic_diff",
+        "avg_data_trafic_diff_pct",
+    ]
+    avg_existing_cols = [col for col in avg_order if col in avg_pivot.columns]
+    avg_remaining_cols = [
+        col for col in avg_pivot.columns if col not in avg_existing_cols
+    ]
+    avg_pivot = avg_pivot[avg_existing_cols + avg_remaining_cols]
+    return df, last_period, sum_pivot.round(2), avg_pivot.round(2)
+def analyze_2g_availability(df: pd.DataFrame, sla_2g: float):
+    avail_col = "2g_tch_avail"
+    if avail_col not in df.columns or "period" not in df.columns:
+        return None, None
+    df_2g = df[df[avail_col].notna()].copy()
+    df_2g = df_2g[df_2g["period"].isin(["pre", "post"])]
+    if df_2g.empty:
+        return None, None
+    site_pivot = df_2g.groupby(["code", "period"])[avail_col].mean().unstack()
+    site_pivot = site_pivot.rename(
+        columns={"pre": "tch_avail_pre", "post": "tch_avail_post"}
+    )
+    if "tch_avail_pre" not in site_pivot.columns:
+        site_pivot["tch_avail_pre"] = pd.NA
+    if "tch_avail_post" not in site_pivot.columns:
+        site_pivot["tch_avail_post"] = pd.NA
+    site_pivot["tch_avail_diff"] = (
+        site_pivot["tch_avail_post"] - site_pivot["tch_avail_pre"]
+    )
+    site_pivot["pre_ok_vs_sla"] = site_pivot["tch_avail_pre"] >= sla_2g
+    site_pivot["post_ok_vs_sla"] = site_pivot["tch_avail_post"] >= sla_2g
+    site_pivot = site_pivot.reset_index()
+    summary_rows = []
+    for period_label, col_name in [
+        ("pre", "tch_avail_pre"),
+        ("post", "tch_avail_post"),
+    ]:
+        series = site_pivot[col_name].dropna()
+        total_cells = series.shape[0]
+        if total_cells == 0:
+            summary_rows.append(
+                {
+                    "period": period_label,
+                    "cells": 0,
+                    "avg_availability": pd.NA,
+                    "median_availability": pd.NA,
+                    "p05_availability": pd.NA,
+                    "p95_availability": pd.NA,
+                    "min_availability": pd.NA,
+                    "max_availability": pd.NA,
+                    "cells_ge_sla": 0,
+                    "cells_lt_sla": 0,
+                    "pct_cells_ge_sla": pd.NA,
+                }
+            )
+            continue
+        cells_ge_sla = (series >= sla_2g).sum()
+        cells_lt_sla = (series < sla_2g).sum()
+        summary_rows.append(
+            {
+                "period": period_label,
+                "cells": int(total_cells),
+                "avg_availability": series.mean(),
+                "median_availability": series.median(),
+                "p05_availability": series.quantile(0.05),
+                "p95_availability": series.quantile(0.95),
+                "min_availability": series.min(),
+                "max_availability": series.max(),
+                "cells_ge_sla": int(cells_ge_sla),
+                "cells_lt_sla": int(cells_lt_sla),
+                "pct_cells_ge_sla": cells_ge_sla / total_cells * 100,
+            }
+        )
+    summary_df = pd.DataFrame(summary_rows)
+    return summary_df, site_pivot
+def analyze_3g_availability(df: pd.DataFrame, sla_3g: float):
+    avail_col = "3g_cell_avail"
+    if avail_col not in df.columns or "period" not in df.columns:
+        return None, None
+    df_3g = df[df[avail_col].notna()].copy()
+    df_3g = df_3g[df_3g["period"].isin(["pre", "post"])]
+    if df_3g.empty:
+        return None, None
+    site_pivot = df_3g.groupby(["code", "period"])[avail_col].mean().unstack()
+    site_pivot = site_pivot.rename(
+        columns={"pre": "cell_avail_pre", "post": "cell_avail_post"}
+    )
+    if "cell_avail_pre" not in site_pivot.columns:
+        site_pivot["cell_avail_pre"] = pd.NA
+    if "cell_avail_post" not in site_pivot.columns:
+        site_pivot["cell_avail_post"] = pd.NA
+    site_pivot["cell_avail_diff"] = (
+        site_pivot["cell_avail_post"] - site_pivot["cell_avail_pre"]
+    )
+    site_pivot["pre_ok_vs_sla"] = site_pivot["cell_avail_pre"] >= sla_3g
+    site_pivot["post_ok_vs_sla"] = site_pivot["cell_avail_post"] >= sla_3g
+    site_pivot = site_pivot.reset_index()
+    summary_rows = []
+    for period_label, col_name in [
+        ("pre", "cell_avail_pre"),
+        ("post", "cell_avail_post"),
+    ]:
+        series = site_pivot[col_name].dropna()
+        total_cells = series.shape[0]
+        if total_cells == 0:
+            summary_rows.append(
+                {
+                    "period": period_label,
+                    "cells": 0,
+                    "avg_availability": pd.NA,
+                    "median_availability": pd.NA,
+                    "p05_availability": pd.NA,
+                    "p95_availability": pd.NA,
+                    "min_availability": pd.NA,
+                    "max_availability": pd.NA,
+                    "cells_ge_sla": 0,
+                    "cells_lt_sla": 0,
+                    "pct_cells_ge_sla": pd.NA,
+                }
+            )
+            continue
+        cells_ge_sla = (series >= sla_3g).sum()
+        cells_lt_sla = (series < sla_3g).sum()
+        summary_rows.append(
+            {
+                "period": period_label,
+                "cells": int(total_cells),
+                "avg_availability": series.mean(),
+                "median_availability": series.median(),
+                "p05_availability": series.quantile(0.05),
+                "p95_availability": series.quantile(0.95),
+                "min_availability": series.min(),
+                "max_availability": series.max(),
+                "cells_ge_sla": int(cells_ge_sla),
+                "cells_lt_sla": int(cells_lt_sla),
+                "pct_cells_ge_sla": cells_ge_sla / total_cells * 100,
+            }
+        )
+    summary_df = pd.DataFrame(summary_rows)
+    return summary_df, site_pivot
+def analyze_lte_availability(df: pd.DataFrame, sla_lte: float):
+    avail_col = "lte_cell_avail"
+    if avail_col not in df.columns or "period" not in df.columns:
+        return None, None
+    df_lte = df[df[avail_col].notna()].copy()
+    df_lte = df_lte[df_lte["period"].isin(["pre", "post"])]
+    if df_lte.empty:
+        return None, None
+    site_pivot = df_lte.groupby(["code", "period"])[avail_col].mean().unstack()
+    site_pivot = site_pivot.rename(
+        columns={"pre": "lte_avail_pre", "post": "lte_avail_post"}
+    )
+    if "lte_avail_pre" not in site_pivot.columns:
+        site_pivot["lte_avail_pre"] = pd.NA
+    if "lte_avail_post" not in site_pivot.columns:
+        site_pivot["lte_avail_post"] = pd.NA
+    site_pivot["lte_avail_diff"] = (
+        site_pivot["lte_avail_post"] - site_pivot["lte_avail_pre"]
+    )
+    site_pivot["pre_ok_vs_sla"] = site_pivot["lte_avail_pre"] >= sla_lte
+    site_pivot["post_ok_vs_sla"] = site_pivot["lte_avail_post"] >= sla_lte
+    site_pivot = site_pivot.reset_index()
+    summary_rows = []
+    for period_label, col_name in [
+        ("pre", "lte_avail_pre"),
+        ("post", "lte_avail_post"),
+    ]:
+        series = site_pivot[col_name].dropna()
+        total_cells = series.shape[0]
+        if total_cells == 0:
+            summary_rows.append(
+                {
+                    "period": period_label,
+                    "cells": 0,
+                    "avg_availability": pd.NA,
+                    "median_availability": pd.NA,
+                    "p05_availability": pd.NA,
+                    "p95_availability": pd.NA,
+                    "min_availability": pd.NA,
+                    "max_availability": pd.NA,
+                    "cells_ge_sla": 0,
+                    "cells_lt_sla": 0,
+                    "pct_cells_ge_sla": pd.NA,
+                }
+            )
+            continue
+        cells_ge_sla = (series >= sla_lte).sum()
+        cells_lt_sla = (series < sla_lte).sum()
+        summary_rows.append(
+            {
+                "period": period_label,
+                "cells": int(total_cells),
+                "avg_availability": series.mean(),
+                "median_availability": series.median(),
+                "p05_availability": series.quantile(0.05),
+                "p95_availability": series.quantile(0.95),
+                "min_availability": series.min(),
+                "max_availability": series.max(),
+                "cells_ge_sla": int(cells_ge_sla),
+                "cells_lt_sla": int(cells_lt_sla),
+                "pct_cells_ge_sla": cells_ge_sla / total_cells * 100,
+            }
+        )
+    summary_df = pd.DataFrame(summary_rows)
+    return summary_df, site_pivot
+def analyze_multirat_availability(
+    df: pd.DataFrame, sla_2g: float, sla_3g: float, sla_lte: float
+):
+    if "period" not in df.columns:
+        return None
+    rat_cols = []
+    if "2g_tch_avail" in df.columns:
+        rat_cols.append("2g_tch_avail")
+    if "3g_cell_avail" in df.columns:
+        rat_cols.append("3g_cell_avail")
+    if "lte_cell_avail" in df.columns:
+        rat_cols.append("lte_cell_avail")
+    if not rat_cols:
+        return None
+    agg_dict = {col: "mean" for col in rat_cols}
+    df_pre = df[df["period"] == "pre"]
+    df_post = df[df["period"] == "post"]
+    pre = df_pre.groupby("code", as_index=False).agg(agg_dict)
+    post = df_post.groupby("code", as_index=False).agg(agg_dict)
+    rename_map_pre = {
+        "2g_tch_avail": "2g_avail_pre",
+        "3g_cell_avail": "3g_avail_pre",
+        "lte_cell_avail": "lte_avail_pre",
+    }
+    rename_map_post = {
+        "2g_tch_avail": "2g_avail_post",
+        "3g_cell_avail": "3g_avail_post",
+        "lte_cell_avail": "lte_avail_post",
+    }
+    pre = pre.rename(columns=rename_map_pre)
+    post = post.rename(columns=rename_map_post)
+    multi = pd.merge(pre, post, on="code", how="outer")
+    if not df_post.empty and {
+        "total_voice_trafic",
+        "total_data_trafic",
+    }.issubset(df_post.columns):
+        post_traffic = (
+            df_post.groupby("code", as_index=False)[
+                ["total_voice_trafic", "total_data_trafic"]
+            ]
+            .sum()
+            .rename(
+                columns={
+                    "total_voice_trafic": "post_total_voice_trafic",
+                    "total_data_trafic": "post_total_data_trafic",
+                }
+            )
+        )
+        multi = pd.merge(multi, post_traffic, on="code", how="left")
+    if "City" in df.columns:
+        city_df = df[["code", "City"]].drop_duplicates("code")
+        multi = pd.merge(multi, city_df, on="code", how="left")
+    def _ok_flag(series: pd.Series, sla: float) -> pd.Series:
+        if series.name not in multi.columns:
+            return pd.Series([pd.NA] * len(multi), index=multi.index)
+        ok = multi[series.name] >= sla
+        ok = ok.where(multi[series.name].notna(), pd.NA)
+        return ok
+    if "2g_avail_post" in multi.columns:
+        multi["ok_2g_post"] = _ok_flag(multi["2g_avail_post"], sla_2g)
+    if "3g_avail_post" in multi.columns:
+        multi["ok_3g_post"] = _ok_flag(multi["3g_avail_post"], sla_3g)
+    if "lte_avail_post" in multi.columns:
+        multi["ok_lte_post"] = _ok_flag(multi["lte_avail_post"], sla_lte)
+    def classify_row(row):
+        rats_status = []
+        for rat, col in [
+            ("2G", "ok_2g_post"),
+            ("3G", "ok_3g_post"),
+            ("LTE", "ok_lte_post"),
+        ]:
+            if col in row and not pd.isna(row[col]):
+                rats_status.append((rat, bool(row[col])))
+        if not rats_status:
+            return "No RAT data"
+        bad_rats = [rat for rat, ok in rats_status if not ok]
+        if not bad_rats:
+            return "OK all RAT"
+        if len(bad_rats) == 1:
+            return f"Degraded {bad_rats[0]} only"
+        return "Degraded multi-RAT (" + ",".join(bad_rats) + ")"
+    multi["post_multirat_status"] = multi.apply(classify_row, axis=1)
+    ordered_cols = ["code"]
+    if "City" in multi.columns:
+        ordered_cols.append("City")
+    for col in [
+        "2g_avail_pre",
+        "2g_avail_post",
+        "3g_avail_pre",
+        "3g_avail_post",
+        "lte_avail_pre",
+        "lte_avail_post",
+        "post_total_voice_trafic",
+        "post_total_data_trafic",
+        "ok_2g_post",
+        "ok_3g_post",
+        "ok_lte_post",
+        "post_multirat_status",
+    ]:
+        if col in multi.columns:
+            ordered_cols.append(col)
+    remaining_cols = [c for c in multi.columns if c not in ordered_cols]
+    multi = multi[ordered_cols + remaining_cols]
+    return multi
+def analyze_persistent_availability(
+    df: pd.DataFrame,
+    multi_rat_df: pd.DataFrame,
+    sla_2g: float,
+    sla_3g: float,
+    sla_lte: float,
+    min_consecutive_days: int = 3,
+) -> pd.DataFrame:
+    if df is None or df.empty:
+        return pd.DataFrame()
+    if "date" not in df.columns or "code" not in df.columns:
+        return pd.DataFrame()
+    work_df = df.copy()
+    work_df["date_only"] = work_df["date"].dt.date
+    site_stats = {}
+    def _update_stats(rat_key_prefix: str, grouped: pd.DataFrame, sla: float) -> None:
+        if grouped.empty:
+            return
+        for code, group in grouped.groupby("code"):
+            group = group.sort_values("date_only")
+            dates = pd.to_datetime(group["date_only"]).tolist()
+            below_flags = (group["value"] < sla).tolist()
+            max_streak = 0
+            current_streak = 0
+            total_below = 0
+            last_date = None
+            for flag, current_date in zip(below_flags, dates):
+                if flag:
+                    total_below += 1
+                    if (
+                        last_date is not None
+                        and current_date == last_date + timedelta(days=1)
+                        and current_streak > 0
+                    ):
+                        current_streak += 1
+                    else:
+                        current_streak = 1
+                    if current_streak > max_streak:
+                        max_streak = current_streak
+                else:
+                    current_streak = 0
+                last_date = current_date
+            stats = site_stats.setdefault(
+                code,
+                {
+                    "code": code,
+                    "max_streak_2g": 0,
+                    "max_streak_3g": 0,
+                    "max_streak_lte": 0,
+                    "below_days_2g": 0,
+                    "below_days_3g": 0,
+                    "below_days_lte": 0,
+                },
+            )
+            stats[f"max_streak_{rat_key_prefix}"] = max_streak
+            stats[f"below_days_{rat_key_prefix}"] = total_below
+    for rat_col, rat_key, sla in [
+        ("2g_tch_avail", "2g", sla_2g),
+        ("3g_cell_avail", "3g", sla_3g),
+        ("lte_cell_avail", "lte", sla_lte),
+    ]:
+        if rat_col in work_df.columns:
+            g = (
+                work_df.dropna(subset=[rat_col])
+                .groupby(["code", "date_only"])[rat_col]
+                .mean()
+                .reset_index()
+            )
+            g = g.rename(columns={rat_col: "value"})
+            _update_stats(rat_key, g, sla)
+    if not site_stats:
+        return pd.DataFrame()
+    rows = []
+    for code, s in site_stats.items():
+        max_2g = s.get("max_streak_2g", 0)
+        max_3g = s.get("max_streak_3g", 0)
+        max_lte = s.get("max_streak_lte", 0)
+        below_2g = s.get("below_days_2g", 0)
+        below_3g = s.get("below_days_3g", 0)
+        below_lte = s.get("below_days_lte", 0)
+        persistent_2g = max_2g >= min_consecutive_days if max_2g else False
+        persistent_3g = max_3g >= min_consecutive_days if max_3g else False
+        persistent_lte = max_lte >= min_consecutive_days if max_lte else False
+        total_below_any = below_2g + below_3g + below_lte
+        persistent_any = persistent_2g or persistent_3g or persistent_lte
+        rats_persistent_count = sum(
+            [persistent_2g is True, persistent_3g is True, persistent_lte is True]
+        )
+        rows.append(
+            {
+                "code": code,
+                "persistent_issue_2g": persistent_2g,
+                "persistent_issue_3g": persistent_3g,
+                "persistent_issue_lte": persistent_lte,
+                "max_consecutive_days_2g": max_2g,
+                "max_consecutive_days_3g": max_3g,
+                "max_consecutive_days_lte": max_lte,
+                "total_below_days_2g": below_2g,
+                "total_below_days_3g": below_3g,
+                "total_below_days_lte": below_lte,
+                "total_below_days_any": total_below_any,
+                "persistent_issue_any": persistent_any,
+                "persistent_rats_count": rats_persistent_count,
+            }
+        )
+    result = pd.DataFrame(rows)
+    result = result[result["persistent_issue_any"] == True]
+    if result.empty:
+        return result
+    if multi_rat_df is not None and not multi_rat_df.empty:
+        cols_to_merge = [
+            c
+            for c in [
+                "code",
+                "City",
+                "post_total_voice_trafic",
+                "post_total_data_trafic",
+                "post_multirat_status",
+            ]
+            if c in multi_rat_df.columns
+        ]
+        if cols_to_merge:
+            result = pd.merge(
+                result,
+                multi_rat_df[cols_to_merge].drop_duplicates("code"),
+                on="code",
+                how="left",
+            )
+    if "post_total_data_trafic" not in result.columns:
+        result["post_total_data_trafic"] = 0.0
+    result["criticity_score"] = (
+        result["post_total_data_trafic"].fillna(0) * 1.0
+        + result["total_below_days_any"].fillna(0) * 100.0
+        + result["persistent_rats_count"].fillna(0) * 1000.0
+    )
+    result = result.sort_values(
+        by=["criticity_score", "total_below_days_any"], ascending=[False, False]
+    )
+    return result
+def monthly_data_analysis(df: pd.DataFrame):
+    df["date"] = pd.to_datetime(df["date"])
+    df["month_year"] = df["date"].dt.to_period("M").astype(str)
+    voice_trafic = df.pivot_table(
+        index="code",
+        columns="month_year",
+        values="total_voice_trafic",
+        aggfunc="sum",
+        fill_value=0,
+    )
+    voice_trafic = voice_trafic.reindex(sorted(voice_trafic.columns), axis=1)
+    data_trafic = df.pivot_table(
+        index="code",
+        columns="month_year",
+        values="total_data_trafic",
+        aggfunc="sum",
+        fill_value=0,
+    )
+    data_trafic = data_trafic.reindex(sorted(data_trafic.columns), axis=1)
+    return voice_trafic, data_trafic
+# --------------------------------------------------------------------------------------
+# Global state for drill-down views & export
+# --------------------------------------------------------------------------------------
+current_full_df: pd.DataFrame | None = None
+current_last_period_df: pd.DataFrame | None = None
+current_analysis_df: pd.DataFrame | None = None
+current_analysis_last_period_df: pd.DataFrame | None = None
+current_multi_rat_df: pd.DataFrame | None = None
+current_persistent_df: pd.DataFrame | None = None
+current_site_2g_avail: pd.DataFrame | None = None
+current_site_3g_avail: pd.DataFrame | None = None
+current_site_lte_avail: pd.DataFrame | None = None
+current_summary_2g_avail: pd.DataFrame | None = None
+current_summary_3g_avail: pd.DataFrame | None = None
+current_summary_lte_avail: pd.DataFrame | None = None
+current_monthly_voice_df: pd.DataFrame | None = None
+current_monthly_data_df: pd.DataFrame | None = None
+current_sum_pre_post_df: pd.DataFrame | None = None
+current_avg_pre_post_df: pd.DataFrame | None = None
+current_availability_summary_all_df: pd.DataFrame | None = None
+current_export_multi_rat_df: pd.DataFrame | None = None
+current_export_persistent_df: pd.DataFrame | None = None
+current_export_bytes: bytes | None = None
+# --------------------------------------------------------------------------------------
+# Widgets
+# --------------------------------------------------------------------------------------
+PLOTLY_CONFIG = {"displaylogo": False, "scrollZoom": True, "displayModeBar": True}
+file_2g = pn.widgets.FileInput(name="2G Traffic Report", accept=".csv,.zip")
+file_3g = pn.widgets.FileInput(name="3G Traffic Report", accept=".csv,.zip")
+file_lte = pn.widgets.FileInput(name="LTE Traffic Report", accept=".csv,.zip")
+pre_range = pn.widgets.DateRangePicker(name="Pre-period (from - to)")
+post_range = pn.widgets.DateRangePicker(name="Post-period (from - to)")
+last_range = pn.widgets.DateRangePicker(name="Last period (from - to)")
+sla_2g = pn.widgets.FloatInput(name="2G TCH availability SLA (%)", value=98.0, step=0.1)
+sla_3g = pn.widgets.FloatInput(
+    name="3G Cell availability SLA (%)", value=98.0, step=0.1
+)
+sla_lte = pn.widgets.FloatInput(
+    name="LTE Cell availability SLA (%)", value=98.0, step=0.1
+)
+number_of_top_trafic_sites = pn.widgets.IntInput(
+    name="Number of top traffic sites", value=25
+)
+min_persistent_days_widget = pn.widgets.IntInput(
+    name="Minimum consecutive days below SLA to flag persistent issue",
+    value=3,
+)
+top_critical_n_widget = pn.widgets.IntInput(
+    name="Number of top critical sites to display", value=25
+)
+run_button = pn.widgets.Button(name="Run analysis", button_type="primary")
+status_pane = pn.pane.Alert(
+    "Upload the 3 reports, select the 3 periods and click 'Run analysis'",
+    alert_type="primary",
+)
+summary_table = pn.widgets.Tabulator(
+    height=250,
+    sizing_mode="stretch_width",
+    layout="fit_data_table",
+)
+sum_pre_post_table = pn.widgets.Tabulator(
+    height=250,
+    sizing_mode="stretch_width",
+    layout="fit_data_table",
+)
+summary_2g_table = pn.widgets.Tabulator(
+    height=250,
+    sizing_mode="stretch_width",
+    layout="fit_data_table",
+)
+worst_2g_table = pn.widgets.Tabulator(
+    height=250,
+    sizing_mode="stretch_width",
+    layout="fit_data_table",
+)
+summary_3g_table = pn.widgets.Tabulator(
+    height=250,
+    sizing_mode="stretch_width",
+    layout="fit_data_table",
+)
+worst_3g_table = pn.widgets.Tabulator(
+    height=250,
+    sizing_mode="stretch_width",
+    layout="fit_data_table",
+)
+summary_lte_table = pn.widgets.Tabulator(
+    height=250,
+    sizing_mode="stretch_width",
+    layout="fit_data_table",
+)
+worst_lte_table = pn.widgets.Tabulator(
+    height=250,
+    sizing_mode="stretch_width",
+    layout="fit_data_table",
+)
+multi_rat_table = pn.widgets.Tabulator(
+    height=250,
+    sizing_mode="stretch_width",
+    layout="fit_data_table",
+)
+persistent_table = pn.widgets.Tabulator(
+    height=250,
+    sizing_mode="stretch_width",
+    layout="fit_data_table",
+)
+site_select = pn.widgets.AutocompleteInput(
+    name="Select a site for detailed view (Type to search)",
+    options={},
+    case_sensitive=False,
+    search_strategy="includes",
+    restrict=True,
+    placeholder="Type site code or city...",
+)
+site_traffic_plot_pane = pn.pane.Plotly(
+    sizing_mode="stretch_both",
+    config=PLOTLY_CONFIG,
+    css_classes=["fullscreen-target-site-traffic"],
+)
+site_traffic_plot = pn.Column(
+    site_traffic_plot_pane,
+    height=400,
+    sizing_mode="stretch_width",
+    css_classes=["plot-fullscreen-wrapper", "site-traffic-wrapper"],
+)
+site_avail_plot_pane = pn.pane.Plotly(
+    sizing_mode="stretch_both",
+    config=PLOTLY_CONFIG,
+    css_classes=["fullscreen-target-site-avail"],
+)
+site_avail_plot = pn.Column(
+    site_avail_plot_pane,
+    height=400,
+    sizing_mode="stretch_width",
+    css_classes=["plot-fullscreen-wrapper", "site-avail-wrapper"],
+)
+site_degraded_table = pn.widgets.Tabulator(
+    height=200,
+    sizing_mode="stretch_width",
+    layout="fit_data_table",
+)
+city_select = pn.widgets.AutocompleteInput(
+    name="Select a City for aggregated view (Type to search)",
+    options=[],
+    case_sensitive=False,
+    search_strategy="includes",
+    restrict=True,
+    placeholder="Type city name...",
+)
+city_traffic_plot_pane = pn.pane.Plotly(
+    sizing_mode="stretch_both",
+    config=PLOTLY_CONFIG,
+    css_classes=["fullscreen-target-city-traffic"],
+)
+city_traffic_plot = pn.Column(
+    city_traffic_plot_pane,
+    height=400,
+    sizing_mode="stretch_width",
+    css_classes=["plot-fullscreen-wrapper", "city-traffic-wrapper"],
+)
+city_avail_plot_pane = pn.pane.Plotly(
+    sizing_mode="stretch_both",
+    config=PLOTLY_CONFIG,
+    css_classes=["fullscreen-target-city-avail"],
+)
+city_avail_plot = pn.Column(
+    city_avail_plot_pane,
+    height=400,
+    sizing_mode="stretch_width",
+    css_classes=["plot-fullscreen-wrapper", "city-avail-wrapper"],
+)
+city_degraded_table = pn.widgets.Tabulator(
+    height=200,
+    sizing_mode="stretch_width",
+    layout="fit_data_table",
+)
+daily_avail_plot_pane = pn.pane.Plotly(
+    sizing_mode="stretch_both",
+    config=PLOTLY_CONFIG,
+    css_classes=["fullscreen-target-daily-avail"],
+)
+daily_avail_plot = pn.Column(
+    daily_avail_plot_pane,
+    height=400,
+    sizing_mode="stretch_width",
+    css_classes=["plot-fullscreen-wrapper", "daily-avail-wrapper"],
+)
+daily_degraded_table = pn.widgets.Tabulator(
+    height=200,
+    sizing_mode="stretch_width",
+    layout="fit_data_table",
+)
+top_data_sites_table = pn.widgets.Tabulator(
+    height=250,
+    sizing_mode="stretch_width",
+    layout="fit_data_table",
+)
+top_voice_sites_table = pn.widgets.Tabulator(
+    height=250,
+    sizing_mode="stretch_width",
+    layout="fit_data_table",
+)
+top_data_bar_plot_pane = pn.pane.Plotly(
+    sizing_mode="stretch_both",
+    config=PLOTLY_CONFIG,
+    css_classes=["fullscreen-target-top-data"],
+)
+top_data_bar_plot = pn.Column(
+    top_data_bar_plot_pane,
+    height=400,
+    sizing_mode="stretch_width",
+    css_classes=["plot-fullscreen-wrapper", "top-data-bar-wrapper"],
+)
+top_voice_bar_plot_pane = pn.pane.Plotly(
+    sizing_mode="stretch_both",
+    config=PLOTLY_CONFIG,
+    css_classes=["fullscreen-target-top-voice"],
+)
+top_voice_bar_plot = pn.Column(
+    top_voice_bar_plot_pane,
+    height=400,
+    sizing_mode="stretch_width",
+    css_classes=["plot-fullscreen-wrapper", "top-voice-bar-wrapper"],
+)
+data_map_plot_pane = pn.pane.Plotly(
+    sizing_mode="stretch_both",
+    config=PLOTLY_CONFIG,
+    css_classes=["fullscreen-target-data-map"],
+)
+data_map_plot = pn.Column(
+    data_map_plot_pane,
+    height=500,
+    sizing_mode="stretch_width",
+    css_classes=["plot-fullscreen-wrapper", "data-map-wrapper"],
+)
+voice_map_plot_pane = pn.pane.Plotly(
+    sizing_mode="stretch_both",
+    config=PLOTLY_CONFIG,
+    css_classes=["fullscreen-target-voice-map"],
+)
+voice_map_plot = pn.Column(
+    voice_map_plot_pane,
+    height=500,
+    sizing_mode="stretch_width",
+    css_classes=["plot-fullscreen-wrapper", "voice-map-wrapper"],
+)
+# Fullscreen helper logic has been replaced by client-side JS.
+# Fullscreen buttons for each Plotly plot
+site_traffic_fullscreen_btn = pn.widgets.Button(
+    name="Full screen site traffic", button_type="default"
+)
+site_avail_fullscreen_btn = pn.widgets.Button(
+    name="Full screen site availability", button_type="default"
+)
+city_traffic_fullscreen_btn = pn.widgets.Button(
+    name="Full screen city traffic", button_type="default"
+)
+city_avail_fullscreen_btn = pn.widgets.Button(
+    name="Full screen city availability", button_type="default"
+)
+daily_avail_fullscreen_btn = pn.widgets.Button(
+    name="Full screen daily availability", button_type="default"
+)
+top_data_fullscreen_btn = pn.widgets.Button(
+    name="Full screen top data bar", button_type="default"
+)
+top_voice_fullscreen_btn = pn.widgets.Button(
+    name="Full screen top voice bar", button_type="default"
+)
+data_map_fullscreen_btn = pn.widgets.Button(
+    name="Full screen data map", button_type="default"
+)
+voice_map_fullscreen_btn = pn.widgets.Button(
+    name="Full screen voice map", button_type="default"
+)
+multi_rat_download = pn.widgets.FileDownload(
+    label="Download Multi-RAT table (CSV)",
+    filename="multi_rat_availability.csv",
+    button_type="default",
+)
+persistent_download = pn.widgets.FileDownload(
+    label="Download persistent issues (CSV)",
+    filename="persistent_issues.csv",
+    button_type="default",
+)
+top_data_download = pn.widgets.FileDownload(
+    label="Download top data sites (CSV)",
+    filename="top_data_sites.csv",
+    button_type="default",
+)
+top_voice_download = pn.widgets.FileDownload(
+    label="Download top voice sites (CSV)",
+    filename="top_voice_sites.csv",
+    button_type="default",
+)
+export_button = pn.widgets.FileDownload(
+    label="Download the Analysis Report",
+    filename="Global_Trafic_Analysis_Report.xlsx",
+    button_type="primary",
+)
+# --------------------------------------------------------------------------------------
+# Callback
+# --------------------------------------------------------------------------------------
+def _validate_date_range(rng: tuple[date, date] | list[date], label: str) -> None:
+    if not rng or len(rng) != 2:
+        raise ValueError(f"Please select 2 dates for {label}.")
+    if rng[0] is None or rng[1] is None:
+        raise ValueError(f"Please select valid dates for {label}.")
+def run_analysis(event=None):  # event param required by on_click
+    try:
+        status_pane.object = "Running analysis..."
+        status_pane.alert_type = "primary"
+        global current_full_df, current_last_period_df
+        global current_analysis_df, current_analysis_last_period_df
+        global current_multi_rat_df, current_persistent_df
+        global current_site_2g_avail, current_site_3g_avail, current_site_lte_avail
+        global \
+            current_summary_2g_avail, \
+            current_summary_3g_avail, \
+            current_summary_lte_avail
+        global current_monthly_voice_df, current_monthly_data_df
+        global current_sum_pre_post_df, current_avg_pre_post_df
+        global current_availability_summary_all_df
+        global current_export_multi_rat_df, current_export_persistent_df
+        global current_export_bytes
+        # Basic validations
+        if not (file_2g.value and file_3g.value and file_lte.value):
+            raise ValueError("Please upload all 3 traffic reports (2G, 3G, LTE).")
+        _validate_date_range(pre_range.value, "pre-period")
+        _validate_date_range(post_range.value, "post-period")
+        _validate_date_range(last_range.value, "last period")
+        # Simple check on overlapping pre/post (same logic as Streamlit version, but lighter)
+        pre_start, pre_end = pre_range.value
+        post_start, post_end = post_range.value
+        if pre_start == post_start and pre_end == post_end:
+            raise ValueError("Pre and post periods are the same.")
+        if pre_start < post_start and pre_end > post_end:
+            raise ValueError("Pre and post periods are overlapping.")
+        df_2g = read_fileinput_to_df(file_2g)
+        df_3g = read_fileinput_to_df(file_3g)
+        df_lte = read_fileinput_to_df(file_lte)
+        if df_2g is None or df_3g is None or df_lte is None:
+            raise ValueError("Failed to read one or more input files.")
+        summary = pd.DataFrame(
+            {
+                "Dataset": ["2G", "3G", "LTE"],
+                "Rows": [len(df_2g), len(df_3g), len(df_lte)],
+                "Columns": [df_2g.shape[1], df_3g.shape[1], df_lte.shape[1]],
+            }
+        )
+        summary_table.value = summary
+        df_2g_clean = preprocess_2g(df_2g)
+        df_3g_clean = preprocess_3g(df_3g)
+        df_lte_clean = preprocess_lte(df_lte)
+        full_df, last_period, sum_pre_post_analysis, avg_pre_post_analysis = (
+            merge_and_compare(
+                df_2g_clean,
+                df_3g_clean,
+                df_lte_clean,
+                pre_range.value,
+                post_range.value,
+                last_range.value,
+            )
+        )
+        monthly_voice_df, monthly_data_df = monthly_data_analysis(full_df)
+        analysis_df = full_df
+        # Persist global state for later drill-down / export
+        current_full_df = full_df
+        current_last_period_df = last_period
+        current_analysis_df = analysis_df
+        current_analysis_last_period_df = last_period
+        current_monthly_voice_df = monthly_voice_df
+        current_monthly_data_df = monthly_data_df
+        current_sum_pre_post_df = sum_pre_post_analysis
+        current_avg_pre_post_df = avg_pre_post_analysis
+        sum_pre_post_table.value = sum_pre_post_analysis
+        summary_2g_avail, site_2g_avail = analyze_2g_availability(
+            analysis_df, float(sla_2g.value)
+        )
+        if summary_2g_avail is not None:
+            summary_2g_table.value = summary_2g_avail.round(2)
+            worst_sites_2g = site_2g_avail.sort_values("tch_avail_post").head(25)
+            worst_2g_table.value = worst_sites_2g.round(2)
+        else:
+            summary_2g_table.value = pd.DataFrame()
+            worst_2g_table.value = pd.DataFrame()
+        current_summary_2g_avail = summary_2g_avail
+        current_site_2g_avail = site_2g_avail if summary_2g_avail is not None else None
+        summary_3g_avail, site_3g_avail = analyze_3g_availability(
+            analysis_df, float(sla_3g.value)
+        )
+        if summary_3g_avail is not None:
+            summary_3g_table.value = summary_3g_avail.round(2)
+            worst_sites_3g = site_3g_avail.sort_values("cell_avail_post").head(25)
+            worst_3g_table.value = worst_sites_3g.round(2)
+        else:
+            summary_3g_table.value = pd.DataFrame()
+            worst_3g_table.value = pd.DataFrame()
+        current_summary_3g_avail = summary_3g_avail
+        current_site_3g_avail = site_3g_avail if summary_3g_avail is not None else None
+        summary_lte_avail, site_lte_avail = analyze_lte_availability(
+            analysis_df, float(sla_lte.value)
+        )
+        if summary_lte_avail is not None:
+            summary_lte_table.value = summary_lte_avail.round(2)
+            worst_sites_lte = site_lte_avail.sort_values("lte_avail_post").head(25)
+            worst_lte_table.value = worst_sites_lte.round(2)
+        else:
+            summary_lte_table.value = pd.DataFrame()
+            worst_lte_table.value = pd.DataFrame()
+        current_summary_lte_avail = summary_lte_avail
+        current_site_lte_avail = (
+            site_lte_avail if summary_lte_avail is not None else None
+        )
+        # Build availability summary across RATs for export
+        availability_frames = []
+        if summary_2g_avail is not None:
+            tmp = summary_2g_avail.copy()
+            tmp["RAT"] = "2G"
+            availability_frames.append(tmp)
+        if summary_3g_avail is not None:
+            tmp = summary_3g_avail.copy()
+            tmp["RAT"] = "3G"
+            availability_frames.append(tmp)
+        if summary_lte_avail is not None:
+            tmp = summary_lte_avail.copy()
+            tmp["RAT"] = "LTE"
+            availability_frames.append(tmp)
+        current_availability_summary_all_df = (
+            pd.concat(availability_frames, ignore_index=True)
+            if availability_frames
+            else pd.DataFrame()
+        )
+        multi_rat_df = analyze_multirat_availability(
+            analysis_df,
+            float(sla_2g.value),
+            float(sla_3g.value),
+            float(sla_lte.value),
+        )
+        if multi_rat_df is not None:
+            multi_rat_table.value = multi_rat_df.round(2)
+        else:
+            multi_rat_table.value = pd.DataFrame()
+        current_multi_rat_df = multi_rat_df if multi_rat_df is not None else None
+        # Persistent availability (UI uses configurable threshold, export keeps 3 days)
+        persistent_df = pd.DataFrame()
+        if multi_rat_df is not None:
+            persistent_df = analyze_persistent_availability(
+                analysis_df,
+                multi_rat_df,
+                float(sla_2g.value),
+                float(sla_3g.value),
+                float(sla_lte.value),
+                int(min_persistent_days_widget.value),
+            )
+        current_persistent_df = (
+            persistent_df
+            if persistent_df is not None and not persistent_df.empty
+            else None
+        )
+        # Export-specific multi-RAT & persistent (based on full_df as in Streamlit app)
+        export_multi_rat_base = analyze_multirat_availability(
+            full_df,
+            float(sla_2g.value),
+            float(sla_3g.value),
+            float(sla_lte.value),
+        )
+        current_export_multi_rat_df = (
+            export_multi_rat_base
+            if export_multi_rat_base is not None
+            else pd.DataFrame()
+        )
+        export_persistent_tmp = pd.DataFrame()
+        if export_multi_rat_base is not None:
+            export_persistent_tmp = analyze_persistent_availability(
+                full_df,
+                export_multi_rat_base,
+                float(sla_2g.value),
+                float(sla_3g.value),
+                float(sla_lte.value),
+                3,
+            )
+        current_export_persistent_df = (
+            export_persistent_tmp
+            if export_persistent_tmp is not None and not export_persistent_tmp.empty
+            else pd.DataFrame()
+        )
+        # Precompute export bytes so the download button is instant
+        current_export_bytes = _build_export_bytes()
+        # Update export filename with timestamp for clarity
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        export_button.filename = f"Global_Trafic_Analysis_Report_{timestamp}.xlsx"
+        # Update all drill-down & map views
+        _update_site_controls()
+        _update_city_controls()
+        _update_daily_availability_view()
+        _update_top_sites_and_maps()
+        _update_persistent_table_view()
+        status_pane.alert_type = "success"
+        status_pane.object = "Analysis completed."
+    except Exception as exc:  # noqa: BLE001
+        status_pane.alert_type = "danger"
+        status_pane.object = f"Error: {exc}"
+run_button.on_click(run_analysis)
+def _update_site_controls() -> None:
+    """Populate site selection widget based on current_analysis_df and refresh view."""
+    if current_analysis_df is None or current_analysis_df.empty:
+        site_select.options = {}
+        site_select.value = None
+        site_traffic_plot_pane.object = None
+        site_avail_plot_pane.object = None
+        site_degraded_table.value = pd.DataFrame()
+        return
+    sites_df = (
+        current_analysis_df[["code", "City"]]
+        .drop_duplicates()
+        .sort_values(by=["City", "code"])
+    )
+    options: dict[str, int] = {}
+    for _, row in sites_df.iterrows():
+        label = (
+            f"{row['City']}_{row['code']}"
+            if pd.notna(row["City"])
+            else str(row["code"])
+        )
+        options[label] = int(row["code"])
+    site_select.options = options
+    if options and site_select.value not in options.values():
+        # When options is a dict, Select.value is the mapped value (code)
+        site_select.value = next(iter(options.values()))
+    _update_site_view()
+def _update_site_view(event=None) -> None:  # noqa: D401, ARG001
+    """Update site drill-down plots and table from current_analysis_df and site_select."""
+    if current_analysis_df is None or current_analysis_df.empty:
+        site_traffic_plot_pane.object = None
+        site_avail_plot_pane.object = None
+        site_degraded_table.value = pd.DataFrame()
+        return
+    selected_code = site_select.value
+    if selected_code is None:
+        site_traffic_plot_pane.object = None
+        site_avail_plot_pane.object = None
+        site_degraded_table.value = pd.DataFrame()
+        return
+    site_detail_df = current_analysis_df[
+        current_analysis_df["code"] == int(selected_code)
+    ].copy()
+    if site_detail_df.empty:
+        site_traffic_plot_pane.object = None
+        site_avail_plot_pane.object = None
+        site_degraded_table.value = pd.DataFrame()
+        return
+    site_detail_df = site_detail_df.sort_values("date")
+    # Traffic over time
+    traffic_cols = [
+        col
+        for col in ["total_voice_trafic", "total_data_trafic"]
+        if col in site_detail_df.columns
+    ]
+    first_row = site_detail_df.iloc[0]
+    site_label = f"{first_row['code']}"
+    if pd.notna(first_row.get("City")):
+        site_label += f" ({first_row['City']})"
+    if traffic_cols:
+        traffic_long = site_detail_df[["date"] + traffic_cols].melt(
+            id_vars="date",
+            value_vars=traffic_cols,
+            var_name="metric",
+            value_name="value",
+        )
+        fig_traffic = px.line(
+            traffic_long,
+            x="date",
+            y="value",
+            color="metric",
+            color_discrete_sequence=px.colors.qualitative.Plotly,
+        )
+        fig_traffic.update_layout(
+            title=f"Traffic Evolution - Site: {site_label}",
+            template="plotly_white",
+            plot_bgcolor="white",
+            paper_bgcolor="white",
+        )
+        site_traffic_plot_pane.object = fig_traffic
+    else:
+        site_traffic_plot_pane.object = None
+    # Availability over time per RAT
+    avail_cols: list[str] = []
+    rename_map: dict[str, str] = {}
+    if "2g_tch_avail" in site_detail_df.columns:
+        avail_cols.append("2g_tch_avail")
+        rename_map["2g_tch_avail"] = "2G"
+    if "3g_cell_avail" in site_detail_df.columns:
+        avail_cols.append("3g_cell_avail")
+        rename_map["3g_cell_avail"] = "3G"
+    if "lte_cell_avail" in site_detail_df.columns:
+        avail_cols.append("lte_cell_avail")
+        rename_map["lte_cell_avail"] = "LTE"
+    if avail_cols:
+        avail_df = site_detail_df[["date"] + avail_cols].copy()
+        avail_df = avail_df.rename(columns=rename_map)
+        value_cols = [c for c in avail_df.columns if c != "date"]
+        avail_long = avail_df.melt(
+            id_vars="date",
+            value_vars=value_cols,
+            var_name="RAT",
+            value_name="availability",
+        )
+        fig_avail = px.line(
+            avail_long,
+            x="date",
+            y="availability",
+            color="RAT",
+            color_discrete_sequence=px.colors.qualitative.Plotly,
+        )
+        fig_avail.update_layout(
+            title=f"Availability vs SLA - Site: {site_label}",
+            template="plotly_white",
+            plot_bgcolor="white",
+            paper_bgcolor="white",
+        )
+        site_avail_plot_pane.object = fig_avail
+        # Days with availability below SLA per RAT
+        site_detail_df["date_only"] = site_detail_df["date"].dt.date
+        degraded_rows_site: list[dict] = []
+        for rat_col, rat_name, sla_value in [
+            ("2g_tch_avail", "2G", float(sla_2g.value)),
+            ("3g_cell_avail", "3G", float(sla_3g.value)),
+            ("lte_cell_avail", "LTE", float(sla_lte.value)),
+        ]:
+            if rat_col in site_detail_df.columns:
+                daily_site = (
+                    site_detail_df.groupby("date_only")[rat_col].mean().dropna()
+                )
+                mask = daily_site < sla_value
+                for d, val in daily_site[mask].items():
+                    degraded_rows_site.append(
+                        {
+                            "RAT": rat_name,
+                            "date": d,
+                            "avg_availability": val,
+                            "SLA": sla_value,
+                        }
+                    )
+        if degraded_rows_site:
+            degraded_site_df = pd.DataFrame(degraded_rows_site)
+            site_degraded_table.value = degraded_site_df.round(2)
+        else:
+            site_degraded_table.value = pd.DataFrame()
+    else:
+        site_avail_plot_pane.object = None
+        site_degraded_table.value = pd.DataFrame()
+def _update_city_controls() -> None:
+    """Populate city selection widget based on current_analysis_df and refresh view."""
+    if current_analysis_df is None or current_analysis_df.empty:
+        city_select.options = []
+        city_select.value = None
+        city_traffic_plot_pane.object = None
+        city_avail_plot_pane.object = None
+        city_degraded_table.value = pd.DataFrame()
+        return
+    if (
+        "City" not in current_analysis_df.columns
+        or not current_analysis_df["City"].notna().any()
+    ):
+        city_select.options = []
+        city_select.value = None
+        city_traffic_plot_pane.object = None
+        city_avail_plot_pane.object = pd.DataFrame()
+        city_degraded_table.value = pd.DataFrame()
+        return
+    cities_df = (
+        current_analysis_df[["City"]].dropna().drop_duplicates().sort_values(by="City")
+    )
+    options = cities_df["City"].tolist()
+    city_select.options = options
+    if options and city_select.value not in options:
+        city_select.value = options[0]
+    _update_city_view()
+def _update_city_view(event=None) -> None:  # noqa: D401, ARG001
+    """Update city drill-down plots and degraded days table based on city_select."""
+    if current_analysis_df is None or current_analysis_df.empty:
+        city_traffic_plot_pane.object = None
+        city_avail_plot_pane.object = None
+        city_degraded_table.value = pd.DataFrame()
+        return
+    selected_city = city_select.value
+    if not selected_city:
+        city_traffic_plot_pane.object = None
+        city_avail_plot_pane.object = None
+        city_degraded_table.value = pd.DataFrame()
+        return
+    city_detail_df = current_analysis_df[
+        current_analysis_df["City"] == selected_city
+    ].copy()
+    if city_detail_df.empty:
+        city_traffic_plot_pane.object = None
+        city_avail_plot_pane.object = None
+        city_degraded_table.value = pd.DataFrame()
+        return
+    city_detail_df = city_detail_df.sort_values("date")
+    # Traffic aggregated at city level
+    traffic_cols_city = [
+        col
+        for col in ["total_voice_trafic", "total_data_trafic"]
+        if col in city_detail_df.columns
+    ]
+    if traffic_cols_city:
+        city_traffic = (
+            city_detail_df.groupby("date")[traffic_cols_city].sum().reset_index()
+        )
+        traffic_long_city = city_traffic.melt(
+            id_vars="date",
+            value_vars=traffic_cols_city,
+            var_name="metric",
+            value_name="value",
+        )
+        fig_traffic_city = px.line(
+            traffic_long_city,
+            x="date",
+            y="value",
+            color="metric",
+            color_discrete_sequence=px.colors.qualitative.Plotly,
+        )
+        fig_traffic_city.update_layout(
+            title=f"Total Traffic Evolution - City: {selected_city}",
+            template="plotly_white",
+            plot_bgcolor="white",
+            paper_bgcolor="white",
+        )
+        city_traffic_plot_pane.object = fig_traffic_city
+    else:
+        city_traffic_plot_pane.object = None
+    # Availability aggregated at city level
+    avail_cols_city: list[str] = []
+    rename_map_city: dict[str, str] = {}
+    if "2g_tch_avail" in city_detail_df.columns:
+        avail_cols_city.append("2g_tch_avail")
+        rename_map_city["2g_tch_avail"] = "2G"
+    if "3g_cell_avail" in city_detail_df.columns:
+        avail_cols_city.append("3g_cell_avail")
+        rename_map_city["3g_cell_avail"] = "3G"
+    if "lte_cell_avail" in city_detail_df.columns:
+        avail_cols_city.append("lte_cell_avail")
+        rename_map_city["lte_cell_avail"] = "LTE"
+    if avail_cols_city:
+        avail_city_df = city_detail_df[["date"] + avail_cols_city].copy()
+        avail_city_df = avail_city_df.rename(columns=rename_map_city)
+        value_cols_city = [c for c in avail_city_df.columns if c != "date"]
+        avail_long_city = avail_city_df.melt(
+            id_vars="date",
+            value_vars=value_cols_city,
+            var_name="RAT",
+            value_name="availability",
+        )
+        fig_avail_city = px.line(
+            avail_long_city,
+            x="date",
+            y="availability",
+            color="RAT",
+            color_discrete_sequence=px.colors.qualitative.Plotly,
+        )
+        fig_avail_city.update_layout(
+            title=f"Availability vs SLA - City: {selected_city}",
+            template="plotly_white",
+            plot_bgcolor="white",
+            paper_bgcolor="white",
+        )
+        city_avail_plot_pane.object = fig_avail_city
+        city_detail_df["date_only"] = city_detail_df["date"].dt.date
+        degraded_rows_city: list[dict] = []
+        for rat_col, rat_name, sla_value in [
+            ("2g_tch_avail", "2G", float(sla_2g.value)),
+            ("3g_cell_avail", "3G", float(sla_3g.value)),
+            ("lte_cell_avail", "LTE", float(sla_lte.value)),
+        ]:
+            if rat_col in city_detail_df.columns:
+                daily_city = (
+                    city_detail_df.groupby("date_only")[rat_col].mean().dropna()
+                )
+                mask_city = daily_city < sla_value
+                for d, val in daily_city[mask_city].items():
+                    degraded_rows_city.append(
+                        {
+                            "RAT": rat_name,
+                            "date": d,
+                            "avg_availability": val,
+                            "SLA": sla_value,
+                        }
+                    )
+        if degraded_rows_city:
+            degraded_city_df = pd.DataFrame(degraded_rows_city)
+            city_degraded_table.value = degraded_city_df.round(2)
+        else:
+            city_degraded_table.value = pd.DataFrame()
+    else:
+        city_avail_plot_pane.object = None
+        city_degraded_table.value = pd.DataFrame()
+def _update_daily_availability_view() -> None:
+    """Daily average availability per RAT over the full analysis_df."""
+    if current_analysis_df is None or current_analysis_df.empty:
+        daily_avail_plot_pane.object = None
+        daily_degraded_table.value = pd.DataFrame()
+        return
+    temp_df = current_analysis_df.copy()
+    if not any(
+        col in temp_df.columns
+        for col in ["2g_tch_avail", "3g_cell_avail", "lte_cell_avail"]
+    ):
+        daily_avail_plot_pane.object = None
+        daily_degraded_table.value = pd.DataFrame()
+        return
+    temp_df["date_only"] = temp_df["date"].dt.date
+    agg_dict: dict[str, str] = {}
+    if "2g_tch_avail" in temp_df.columns:
+        agg_dict["2g_tch_avail"] = "mean"
+    if "3g_cell_avail" in temp_df.columns:
+        agg_dict["3g_cell_avail"] = "mean"
+    if "lte_cell_avail" in temp_df.columns:
+        agg_dict["lte_cell_avail"] = "mean"
+    daily_avail = (
+        temp_df.groupby("date_only", as_index=False).agg(agg_dict)
+        if agg_dict
+        else pd.DataFrame()
+    )
+    if daily_avail.empty:
+        daily_avail_plot_pane.object = None
+        daily_degraded_table.value = pd.DataFrame()
+        return
+    rename_map: dict[str, str] = {}
+    if "2g_tch_avail" in daily_avail.columns:
+        rename_map["2g_tch_avail"] = "2G"
+    if "3g_cell_avail" in daily_avail.columns:
+        rename_map["3g_cell_avail"] = "3G"
+    if "lte_cell_avail" in daily_avail.columns:
+        rename_map["lte_cell_avail"] = "LTE"
+    daily_avail = daily_avail.rename(columns=rename_map)
+    value_cols = [c for c in daily_avail.columns if c != "date_only"]
+    if not value_cols:
+        daily_avail_plot_pane.object = None
+        daily_degraded_table.value = pd.DataFrame()
+        return
+    daily_melt = daily_avail.melt(
+        id_vars="date_only",
+        value_vars=value_cols,
+        var_name="RAT",
+        value_name="availability",
+    )
+    fig = px.line(
+        daily_melt,
+        x="date_only",
+        y="availability",
+        color="RAT",
+        markers=True,
+        color_discrete_sequence=px.colors.qualitative.Plotly,
+    )
+    fig.update_layout(
+        template="plotly_white",
+        plot_bgcolor="white",
+        paper_bgcolor="white",
+    )
+    daily_avail_plot_pane.object = fig
+    degraded_rows: list[dict] = []
+    for rat_name, sla_value in [
+        ("2G", float(sla_2g.value)),
+        ("3G", float(sla_3g.value)),
+        ("LTE", float(sla_lte.value)),
+    ]:
+        if rat_name in daily_avail.columns:
+            series = daily_avail[rat_name]
+            mask = series < sla_value
+            for d, val in zip(daily_avail.loc[mask, "date_only"], series[mask]):
+                degraded_rows.append(
+                    {
+                        "RAT": rat_name,
+                        "date": d,
+                        "avg_availability": val,
+                        "SLA": sla_value,
+                    }
+                )
+    if degraded_rows:
+        degraded_df = pd.DataFrame(degraded_rows)
+        daily_degraded_table.value = degraded_df.round(2)
+    else:
+        daily_degraded_table.value = pd.DataFrame()
+def _update_top_sites_and_maps() -> None:
+    """Top traffic sites and geographic maps based on last analysis period."""
+    if current_analysis_last_period_df is None or current_analysis_last_period_df.empty:
+        top_data_sites_table.value = pd.DataFrame()
+        top_voice_sites_table.value = pd.DataFrame()
+        top_data_bar_plot_pane.object = None
+        top_voice_bar_plot_pane.object = None
+        data_map_plot_pane.object = None
+        voice_map_plot_pane.object = None
+        return
+    df = current_analysis_last_period_df
+    n = int(number_of_top_trafic_sites.value or 25)
+    # Top sites by data traffic
+    top_sites = (
+        df.groupby(["code", "City"])["total_data_trafic"]
+        .sum()
+        .sort_values(ascending=False)
+        .head(n)
+    )
+    top_data_sites_table.value = top_sites.sort_values(ascending=True).reset_index()
+    fig_data = px.bar(
+        top_sites.reset_index(),
+        y=top_sites.reset_index()[["City", "code"]].agg(
+            lambda x: "_".join(map(str, x)), axis=1
+        ),
+        x="total_data_trafic",
+        title=f"Top {n} sites by data traffic",
+        orientation="h",
+        text="total_data_trafic",
+        color_discrete_sequence=px.colors.qualitative.Plotly,
+    )
+    fig_data.update_layout(
+        template="plotly_white",
+        plot_bgcolor="white",
+        paper_bgcolor="white",
+    )
+    top_data_bar_plot_pane.object = fig_data
+    # Top sites by voice traffic
+    top_sites_voice = (
+        df.groupby(["code", "City"])["total_voice_trafic"]
+        .sum()
+        .sort_values(ascending=False)
+        .head(n)
+    )
+    top_voice_sites_table.value = top_sites_voice.sort_values(
+        ascending=True
+    ).reset_index()
+    fig_voice = px.bar(
+        top_sites_voice.reset_index(),
+        y=top_sites_voice.reset_index()[["City", "code"]].agg(
+            lambda x: "_".join(map(str, x)), axis=1
+        ),
+        x="total_voice_trafic",
+        title=f"Top {n} sites by voice traffic",
+        orientation="h",
+        text="total_voice_trafic",
+        color_discrete_sequence=px.colors.qualitative.Plotly,
+    )
+    fig_voice.update_layout(
+        template="plotly_white",
+        plot_bgcolor="white",
+        paper_bgcolor="white",
+    )
+    top_voice_bar_plot_pane.object = fig_voice
+    # Maps
+    if {"Latitude", "Longitude"}.issubset(df.columns):
+        min_size = 5
+        max_size = 40
+        # Data traffic map
+        df_data = (
+            df.groupby(["code", "City", "Latitude", "Longitude"])["total_data_trafic"]
+            .sum()
+            .reset_index()
+        )
+        if not df_data.empty:
+            traffic_data_min = df_data["total_data_trafic"].min()
+            traffic_data_max = df_data["total_data_trafic"].max()
+            if traffic_data_max > traffic_data_min:
+                df_data["bubble_size"] = df_data["total_data_trafic"].apply(
+                    lambda x: min_size
+                    + (max_size - min_size)
+                    * (x - traffic_data_min)
+                    / (traffic_data_max - traffic_data_min)
+                )
+            else:
+                df_data["bubble_size"] = min_size
+            custom_blue_red = [
+                [0.0, "#4292c6"],
+                [0.2, "#2171b5"],
+                [0.4, "#084594"],
+                [0.6, "#cb181d"],
+                [0.8, "#a50f15"],
+                [1.0, "#67000d"],
+            ]
+            fig_map_data = px.scatter_map(
+                df_data,
+                lat="Latitude",
+                lon="Longitude",
+                color="total_data_trafic",
+                size="bubble_size",
+                color_continuous_scale=custom_blue_red,
+                size_max=max_size,
+                zoom=10,
+                height=600,
+                title="Data traffic distribution",
+                hover_data={"code": True, "total_data_trafic": True},
+                hover_name="code",
+                text=[str(x) for x in df_data["code"]],
+            )
+            fig_map_data.update_layout(
+                mapbox_style="open-street-map",
+                coloraxis_colorbar=dict(title="Total Data Traffic (MB)"),
+                coloraxis=dict(cmin=traffic_data_min, cmax=traffic_data_max),
+                font=dict(size=10, color="black"),
+            )
+            data_map_plot_pane.object = fig_map_data
+        else:
+            data_map_plot_pane.object = None
+        # Voice traffic map
+        df_voice = (
+            df.groupby(["code", "City", "Latitude", "Longitude"])["total_voice_trafic"]
+            .sum()
+            .reset_index()
+        )
+        if not df_voice.empty:
+            traffic_voice_min = df_voice["total_voice_trafic"].min()
+            traffic_voice_max = df_voice["total_voice_trafic"].max()
+            if traffic_voice_max > traffic_voice_min:
+                df_voice["bubble_size"] = df_voice["total_voice_trafic"].apply(
+                    lambda x: min_size
+                    + (max_size - min_size)
+                    * (x - traffic_voice_min)
+                    / (traffic_voice_max - traffic_voice_min)
+                )
+            else:
+                df_voice["bubble_size"] = min_size
+            custom_blue_red = [
+                [0.0, "#4292c6"],
+                [0.2, "#2171b5"],
+                [0.4, "#084594"],
+                [0.6, "#cb181d"],
+                [0.8, "#a50f15"],
+                [1.0, "#67000d"],
+            ]
+            fig_map_voice = px.scatter_map(
+                df_voice,
+                lat="Latitude",
+                lon="Longitude",
+                color="total_voice_trafic",
+                size="bubble_size",
+                color_continuous_scale=custom_blue_red,
+                size_max=max_size,
+                zoom=10,
+                height=600,
+                title="Voice traffic distribution",
+                hover_data={"code": True, "total_voice_trafic": True},
+                hover_name="code",
+                text=[str(x) for x in df_voice["code"]],
+            )
+            fig_map_voice.update_layout(
+                mapbox_style="open-street-map",
+                coloraxis_colorbar=dict(title="Total Voice Traffic (MB)"),
+                coloraxis=dict(cmin=traffic_voice_min, cmax=traffic_voice_max),
+                font=dict(size=10, color="black"),
+            )
+            voice_map_plot_pane.object = fig_map_voice
+        else:
+            voice_map_plot_pane.object = None
+    else:
+        data_map_plot_pane.object = None
+        voice_map_plot_pane.object = None
+def _update_persistent_table_view(event=None) -> None:  # noqa: D401, ARG001
+    """Update persistent issues table based on current_persistent_df and top_critical_n."""
+    if current_persistent_df is None or current_persistent_df.empty:
+        persistent_table.value = pd.DataFrame()
+        return
+    n = int(top_critical_n_widget.value or 25)
+    persistent_table.value = current_persistent_df.head(n).round(2)
+def _recompute_persistent_from_widget(event=None) -> None:  # noqa: ARG001
+    """Recompute persistent issues when the minimum consecutive days widget changes."""
+    global current_persistent_df
+    if (
+        current_analysis_df is None
+        or current_analysis_df.empty
+        or current_multi_rat_df is None
+        or current_multi_rat_df.empty
+    ):
+        current_persistent_df = None
+        persistent_table.value = pd.DataFrame()
+        return
+    persistent_df = analyze_persistent_availability(
+        current_analysis_df,
+        current_multi_rat_df,
+        float(sla_2g.value),
+        float(sla_3g.value),
+        float(sla_lte.value),
+        int(min_persistent_days_widget.value),
+    )
+    current_persistent_df = (
+        persistent_df if persistent_df is not None and not persistent_df.empty else None
+    )
+    _update_persistent_table_view()
+def _build_input_parameters_df() -> pd.DataFrame:
+    """Build DataFrame with input parameters used for the report."""
+    params = []
+    if file_2g.filename:
+        params.append({"Parameter": "2G Report File", "Value": file_2g.filename})
+    if file_3g.filename:
+        params.append({"Parameter": "3G Report File", "Value": file_3g.filename})
+    if file_lte.filename:
+        params.append({"Parameter": "LTE Report File", "Value": file_lte.filename})
+    if pre_range.value and len(pre_range.value) == 2:
+        params.append({"Parameter": "Pre-Period Start", "Value": pre_range.value[0]})
+        params.append({"Parameter": "Pre-Period End", "Value": pre_range.value[1]})
+    if post_range.value and len(post_range.value) == 2:
+        params.append({"Parameter": "Post-Period Start", "Value": post_range.value[0]})
+        params.append({"Parameter": "Post-Period End", "Value": post_range.value[1]})
+    if last_range.value and len(last_range.value) == 2:
+        params.append({"Parameter": "Last Period Start", "Value": last_range.value[0]})
+        params.append({"Parameter": "Last Period End", "Value": last_range.value[1]})
+    params.append({"Parameter": "2G TCH Availability SLA (%)", "Value": sla_2g.value})
+    params.append({"Parameter": "3G Cell Availability SLA (%)", "Value": sla_3g.value})
+    params.append(
+        {"Parameter": "LTE Cell Availability SLA (%)", "Value": sla_lte.value}
+    )
+    params.append(
+        {
+            "Parameter": "Number of Top Traffic Sites",
+            "Value": number_of_top_trafic_sites.value,
+        }
+    )
+    params.append(
+        {
+            "Parameter": "Number of Top Critical Sites",
+            "Value": top_critical_n_widget.value,
+        }
+    )
+    params.append(
+        {
+            "Parameter": "Minimum Consecutive Days Below SLA",
+            "Value": min_persistent_days_widget.value,
+        }
+    )
+    params.append({"Parameter": "Export Timestamp", "Value": datetime.now()})
+    return pd.DataFrame(params)
+def _build_export_bytes() -> bytes:
+    """Build Excel report bytes mirroring Streamlit export structure."""
+    if current_full_df is None:
+        return b""
+    dfs: list[pd.DataFrame] = [
+        _build_input_parameters_df(),
+        current_full_df,
+        (
+            current_sum_pre_post_df
+            if current_sum_pre_post_df is not None
+            else pd.DataFrame()
+        ),
+        (
+            current_avg_pre_post_df
+            if current_avg_pre_post_df is not None
+            else pd.DataFrame()
+        ),
+        (
+            current_monthly_voice_df
+            if current_monthly_voice_df is not None
+            else pd.DataFrame()
+        ),
+        (
+            current_monthly_data_df
+            if current_monthly_data_df is not None
+            else pd.DataFrame()
+        ),
+        (
+            current_availability_summary_all_df
+            if current_availability_summary_all_df is not None
+            else pd.DataFrame()
+        ),
+        current_site_2g_avail if current_site_2g_avail is not None else pd.DataFrame(),
+        current_site_3g_avail if current_site_3g_avail is not None else pd.DataFrame(),
+        (
+            current_site_lte_avail
+            if current_site_lte_avail is not None
+            else pd.DataFrame()
+        ),
+        (
+            current_export_multi_rat_df
+            if current_export_multi_rat_df is not None
+            else pd.DataFrame()
+        ),
+        (
+            current_export_persistent_df
+            if current_export_persistent_df is not None
+            else pd.DataFrame()
+        ),
+    ]
+    sheet_names = [
+        "Input_Parameters",
+        "Global_Trafic_Analysis",
+        "Sum_pre_post_analysis",
+        "Avg_pre_post_analysis",
+        "Monthly_voice_analysis",
+        "Monthly_data_analysis",
+        "Availability_Summary_All_RAT",
+        "TwoG_Availability_By_Site",
+        "ThreeG_Availability_By_Site",
+        "LTE_Availability_By_Site",
+        "MultiRAT_Availability_By_Site",
+        "Top_Critical_Sites",
+    ]
+    return write_dfs_to_excel(dfs, sheet_names, index=True)
+def _export_callback() -> bytes:
+    # Use cached bytes from the last completed analysis to make download instant
+    data = current_export_bytes or b""
+    if not data:
+        return io.BytesIO()
+    # FileDownload expects a file path or file-like object, not raw bytes
+    return io.BytesIO(data)
+def _df_to_csv_bytes(df: pd.DataFrame | None) -> io.BytesIO:
+    if df is None or getattr(df, "empty", True):  # handles None and empty DataFrame
+        return io.BytesIO()
+    return io.BytesIO(df.to_csv(index=False).encode("utf-8"))
+def _download_multi_rat_table() -> io.BytesIO:
+    value = getattr(multi_rat_table, "value", None)
+    return _df_to_csv_bytes(value if isinstance(value, pd.DataFrame) else None)
+def _download_persistent_table() -> io.BytesIO:
+    value = getattr(persistent_table, "value", None)
+    return _df_to_csv_bytes(value if isinstance(value, pd.DataFrame) else None)
+def _download_top_data_sites() -> io.BytesIO:
+    value = getattr(top_data_sites_table, "value", None)
+    return _df_to_csv_bytes(value if isinstance(value, pd.DataFrame) else None)
+def _download_top_voice_sites() -> io.BytesIO:
+    value = getattr(top_voice_sites_table, "value", None)
+    return _df_to_csv_bytes(value if isinstance(value, pd.DataFrame) else None)
+# Client-side Fullscreen JS logic
+# We target the specific CSS class assigned to each plot pane.
+# Client-side Fullscreen JS logic with Shadow DOM support
+_JS_FULLSCREEN = """
+function findDeep(root, cls) {
+    if (!root) return null;
+    if (root.classList && root.classList.contains(cls)) return root;
+    if (root.shadowRoot) {
+        var found = findDeep(root.shadowRoot, cls);
+        if (found) return found;
+    }
+    var children = root.children;
+    if (children) {
+        for (var i = 0; i < children.length; i++) {
+            var found = findDeep(children[i], cls);
+            if (found) return found;
+        }
+    }
+    return null;
+}
+var el = findDeep(document.body, target_class);
+if (el) {
+    if (el.requestFullscreen) {
+        el.requestFullscreen();
+    } else if (el.webkitRequestFullscreen) {
+        el.webkitRequestFullscreen();
+    } else if (el.msRequestFullscreen) {
+        el.msRequestFullscreen();
+    }
+} else {
+    // Debug info
+    alert("Impossible de passer en plein écran : élément '" + target_class + "' introuvable même après recherche approfondie (Shadow DOM).");
+}
+"""
+# Reactive bindings for drill-down controls & export
+site_select.param.watch(_update_site_view, "value")
+city_select.param.watch(_update_city_view, "value")
+top_critical_n_widget.param.watch(_update_persistent_table_view, "value")
+number_of_top_trafic_sites.param.watch(_update_top_sites_and_maps, "value")
+min_persistent_days_widget.param.watch(_recompute_persistent_from_widget, "value")
+export_button.callback = _export_callback
+multi_rat_download.callback = _download_multi_rat_table
+persistent_download.callback = _download_persistent_table
+top_data_download.callback = _download_top_data_sites
+top_voice_download.callback = _download_top_voice_sites
+site_traffic_fullscreen_btn.js_on_click(
+    args={"target_class": "site-traffic-wrapper"},
+    code=_JS_FULLSCREEN,
+)
+site_avail_fullscreen_btn.js_on_click(
+    args={"target_class": "site-avail-wrapper"},
+    code=_JS_FULLSCREEN,
+)
+city_traffic_fullscreen_btn.js_on_click(
+    args={"target_class": "city-traffic-wrapper"},
+    code=_JS_FULLSCREEN,
+)
+city_avail_fullscreen_btn.js_on_click(
+    args={"target_class": "city-avail-wrapper"},
+    code=_JS_FULLSCREEN,
+)
+daily_avail_fullscreen_btn.js_on_click(
+    args={"target_class": "daily-avail-wrapper"},
+    code=_JS_FULLSCREEN,
+)
+top_data_fullscreen_btn.js_on_click(
+    args={"target_class": "top-data-bar-wrapper"},
+    code=_JS_FULLSCREEN,
+)
+top_voice_fullscreen_btn.js_on_click(
+    args={
+        "target_class": "top-voice-bar-wrapper",
+    },
+    code=_JS_FULLSCREEN,
+)
+data_map_fullscreen_btn.js_on_click(
+    args={"target_class": "data-map-wrapper"},
+    code=_JS_FULLSCREEN,
+)
+voice_map_fullscreen_btn.js_on_click(
+    args={"target_class": "voice-map-wrapper"},
+    code=_JS_FULLSCREEN,
+)
+# --------------------------------------------------------------------------------------
+# Material Template layout
+# --------------------------------------------------------------------------------------
+template = pn.template.MaterialTemplate(
+    title="📊 Global Trafic Analysis - Panel (2G / 3G / LTE)",
+)
+# Ensure the template modal is large enough for fullscreen charts
+# Modal CSS override removed as we switched to native fullscreen.
+sidebar_content = pn.Column(
+    """This Panel app is a migration of the existing Streamlit-based global traffic analysis.
+Upload the 3 traffic reports (2G / 3G / LTE), configure the analysis periods and SLAs, then run the analysis.
+In this first step, the app only validates the pipeline and shows a lightweight summary of the inputs.\nFull KPIs and visualizations will be added progressively.""",
+    "---",
+    file_2g,
+    file_3g,
+    file_lte,
+    "---",
+    pre_range,
+    post_range,
+    last_range,
+    "---",
+    sla_2g,
+    sla_3g,
+    sla_lte,
+    "---",
+    number_of_top_trafic_sites,
+    min_persistent_days_widget,
+    top_critical_n_widget,
+    "---",
+    run_button,
+)
+main_content = pn.Column(
+    status_pane,
+    pn.pane.Markdown("## Input datasets summary"),
+    summary_table,
+    pn.layout.Divider(),
+    pn.pane.Markdown("## Summary Analysis Pre / Post"),
+    sum_pre_post_table,
+    pn.layout.Divider(),
+    pn.pane.Markdown("## Availability vs SLA (per RAT)"),
+    pn.Tabs(
+        (
+            "2G",
+            pn.Column(
+                summary_2g_table, pn.pane.Markdown("Worst 25 sites"), worst_2g_table
+            ),
+        ),
+        (
+            "3G",
+            pn.Column(
+                summary_3g_table, pn.pane.Markdown("Worst 25 sites"), worst_3g_table
+            ),
+        ),
+        (
+            "LTE",
+            pn.Column(
+                summary_lte_table, pn.pane.Markdown("Worst 25 sites"), worst_lte_table
+            ),
+        ),
+    ),
+    pn.layout.Divider(),
+    pn.pane.Markdown("## Multi-RAT Availability (post-period)"),
+    multi_rat_table,
+    multi_rat_download,
+    pn.layout.Divider(),
+    pn.pane.Markdown("## Persistent availability issues (critical sites)"),
+    persistent_table,
+    persistent_download,
+    pn.layout.Divider(),
+    pn.pane.Markdown("## Site drill-down: traffic and availability over time"),
+    site_select,
+    site_traffic_plot,
+    site_traffic_fullscreen_btn,
+    site_avail_plot,
+    site_avail_fullscreen_btn,
+    site_degraded_table,
+    pn.layout.Divider(),
+    pn.pane.Markdown("## City drill-down: traffic and availability over time"),
+    city_select,
+    city_traffic_plot,
+    city_traffic_fullscreen_btn,
+    city_avail_plot,
+    city_avail_fullscreen_btn,
+    city_degraded_table,
+    pn.layout.Divider(),
+    pn.pane.Markdown("## Daily average availability per RAT"),
+    daily_avail_plot,
+    daily_avail_fullscreen_btn,
+    daily_degraded_table,
+    pn.layout.Divider(),
+    pn.pane.Markdown("## Top traffic sites and geographic maps (last period)"),
+    pn.Row(
+        pn.Column(
+            pn.pane.Markdown("### Top sites by data traffic"),
+            top_data_sites_table,
+            top_data_download,
+            top_data_bar_plot,
+            top_data_fullscreen_btn,
+        ),
+        pn.Column(
+            pn.pane.Markdown("### Top sites by voice traffic"),
+            top_voice_sites_table,
+            top_voice_download,
+            top_voice_bar_plot,
+            top_voice_fullscreen_btn,
+        ),
+    ),
+    pn.Row(
+        pn.Column(
+            pn.pane.Markdown("### Data traffic map"),
+            data_map_plot,
+            data_map_fullscreen_btn,
+        ),
+        pn.Column(
+            pn.pane.Markdown("### Voice traffic map"),
+            voice_map_plot,
+            voice_map_fullscreen_btn,
+        ),
+    ),
+    pn.layout.Divider(),
+    pn.pane.Markdown("## Export"),
+    export_button,
+)
+def get_page_components():
+    return sidebar_content, main_content
+if __name__ == "__main__":
+    template.sidebar.append(sidebar_content)
+    template.main.append(main_content)
+    template.servable()

physical_db/physical_database.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

process_kpi/__init__.py ADDED Viewed

File without changes

process_kpi/gsm_kpi_requirements.md ADDED Viewed

	@@ -0,0 +1,47 @@

+# Required Input
+- BH report
+- Daily Report
+- Dump file (2G dump)
+- Number of last day for the analysis
+- Number of days for blocking
+- Sddch blocking threshold
+- TCH blocking threshold
+- Availability threshold
+- TCH abis fails threshold
+Analyse
+DUMP
+- Check that mandatory sheet exists in the dump
+- Parse 2G databases
+- Get number of TRX,TCH,SDCCH,amrSegLoadDepTchRateLower,amrSegLoadDepTchRateUpper from databases
+- Add "GPRS" colomn equal to (dedicatedGPRScapacity * number_tch_per_cell)/100
+- Get "Coef HF rate" by mapping "amrSegLoadDepTchRateLower" to 2G analysis_utility "hf_rate_coef" dict
+- "TCH Actual HR%" equal to "number of TCH" multiplyed by "Coef HF rate"
+- Get "Offered Traffic" by mapping approximate "TCH Actual HR%" to 2G analysis_utility "erlangB" dict
+BH DATA
+- Pivot KPI in BH report
+- Calculate Average and Max of Traffic
+- Average of TCH blocking
+- Average of SDCCH blocking
+- Count number of Days with TCH blocking exceeded TCH blocking threshold
+- Count number of Days with SDCCH blocking exceeded Sddch blocking threshold
+- Count number of Days with Availability below Availability threshold
+- "TCH UTILIZATION (@Max Traffic)" equal to "Max_Trafic" divided by "offered Traffic"
+- Add "ErlabngB_value" =MAX TRAFFIC/(1-(MAX TCH call blocking/200))
+- Get "Target FR CHs" by mapping "ERLANG value" to 2G analysis_utility "erlangB" dict
+- "Target HR CHs" equal to  "Target FR CHs" * 2
+- Get "Signal" and "GPRS" value from databases
+- Target TCHs equal to Target HR CHs + Signal + GPRS + SDCCH
+- "Target TRXs" equal to roundup(Target TCHs/8)
+- "# of required TRXs" equal to difference between "Target TRXs" and "number of TRX"
+Daily DATA
+- Pivot KPI in Daily Report
+- Count number of Days with Availability below Availability threshold
+- Count number of Days with abis fails exceeded TCH abis fails threshold

process_kpi/kpi_health_check/__init__.py ADDED Viewed

File without changes

process_kpi/kpi_health_check/benchmarks.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import pandas as pd
+import numpy as np
+def calculate_sla_metrics(
+    df: pd.DataFrame,
+    kpi: str,
+    rules_df: pd.DataFrame | None = None
+) -> dict:
+    """
+    Calculates simple metrics for the given KPI trace:
+    - SLA value (if exists)
+    - Median (recent window)
+    Returns a dict with: 'sla': float|None, 'median': float|None
+    """
+    res = {"sla": None, "median": None}
+    if df is None or df.empty or kpi not in df.columns:
+        return res
+    # 1. Get SLA from rules
+    if rules_df is not None and not rules_df.empty:
+        # Assuming rules_df has 'KPI' and 'sla' columns
+        # We also need to match RAT? usually passed or handled outside.
+        # Here we do a simplistic lookup.
+        try:
+            row = rules_df[rules_df["KPI"] == kpi]
+            if not row.empty:
+                val = row.iloc[0].get("sla")
+                res["sla"] = float(val) if pd.notna(val) else None
+        except Exception:
+            pass
+    # 2. Calculate Median (entire passed df, usually it's the recent window)
+    try:
+        vals = pd.to_numeric(df[kpi], errors="coerce").dropna()
+        if not vals.empty:
+            res["median"] = float(vals.median())
+    except Exception:
+        pass
+    return res

process_kpi/kpi_health_check/engine.py ADDED Viewed

	@@ -0,0 +1,293 @@

+from datetime import date, datetime, timedelta
+import numpy as np
+import pandas as pd
+def _to_timestamp(value) -> pd.Timestamp | None:
+    if value is None:
+        return None
+    if isinstance(value, pd.Timestamp):
+        return value
+    if isinstance(value, datetime):
+        return pd.Timestamp(value)
+    if isinstance(value, date):
+        return pd.Timestamp(value)
+    try:
+        v = pd.to_datetime(value, errors="coerce")
+        return v if pd.notna(v) else None
+    except Exception:  # noqa: BLE001
+        return None
+def window_bounds_period(
+    end_dt: pd.Timestamp,
+    periods: int,
+    step: timedelta,
+) -> tuple[pd.Timestamp, pd.Timestamp]:
+    start = end_dt - step * (int(periods) - 1)
+    return start, end_dt
+def window_bounds(end_date: date, days: int) -> tuple[date, date]:
+    start = end_date - timedelta(days=days - 1)
+    return start, end_date
+def is_bad(
+    value: float | None,
+    baseline: float | None,
+    direction: str,
+    rel_threshold_pct: float,
+    sla: float | None,
+) -> bool:
+    if value is None or (isinstance(value, float) and np.isnan(value)):
+        return False
+    bad = False
+    if sla is not None and not (isinstance(sla, float) and np.isnan(sla)):
+        if direction == "higher_is_better":
+            bad = bad or (value < float(sla))
+        else:
+            bad = bad or (value > float(sla))
+    if baseline is None or (isinstance(baseline, float) and np.isnan(baseline)):
+        return bad
+    thr = float(rel_threshold_pct) / 100.0
+    if direction == "higher_is_better":
+        return bad or (value < baseline - abs(baseline) * thr)
+    return bad or (value > baseline + abs(baseline) * thr)
+def max_consecutive_periods(values: list, step: timedelta) -> int:
+    if not values:
+        return 0
+    ts = [_to_timestamp(v) for v in values]
+    ts2 = [t for t in ts if t is not None]
+    if not ts2:
+        return 0
+    ts_sorted = sorted(set(ts2))
+    streak = 1
+    best = 1
+    for prev, cur in zip(ts_sorted, ts_sorted[1:]):
+        if cur == prev + step:
+            streak += 1
+        else:
+            streak = 1
+        if streak > best:
+            best = streak
+    return best
+def max_consecutive_days(dates: list[date]) -> int:
+    return max_consecutive_periods(dates, step=timedelta(days=1))
+def evaluate_health_check(
+    daily: pd.DataFrame,
+    rat: str,
+    rules_df: pd.DataFrame,
+    baseline_days_n: int,
+    recent_days_n: int,
+    rel_threshold_pct: float,
+    min_consecutive_days: int,
+    granularity: str = "Daily",
+) -> tuple[pd.DataFrame, pd.DataFrame]:
+    if daily.empty:
+        return pd.DataFrame(), pd.DataFrame()
+    g = str(granularity or "Daily").strip().lower()
+    is_hourly = g.startswith("hour") or g.startswith("h")
+    time_col = (
+        "period_start"
+        if (is_hourly and "period_start" in daily.columns)
+        else "date_only"
+    )
+    step = timedelta(hours=1) if is_hourly else timedelta(days=1)
+    baseline_periods = int(baseline_days_n) * 24 if is_hourly else int(baseline_days_n)
+    recent_periods = int(recent_days_n) * 24 if is_hourly else int(recent_days_n)
+    min_periods = (
+        int(min_consecutive_days) * 24 if is_hourly else int(min_consecutive_days)
+    )
+    end_dt = _to_timestamp(pd.to_datetime(daily[time_col], errors="coerce").max())
+    if end_dt is None:
+        return pd.DataFrame(), pd.DataFrame()
+    recent_start_dt, recent_end_dt = window_bounds_period(end_dt, recent_periods, step)
+    baseline_end_dt = recent_start_dt - step
+    baseline_start_dt, _ = window_bounds_period(baseline_end_dt, baseline_periods, step)
+    rat_rules = rules_df[rules_df["RAT"] == rat].copy()
+    kpis = [k for k in rat_rules["KPI"].tolist() if k in daily.columns]
+    rules_by_kpi = {
+        str(r["KPI"]): r
+        for r in rat_rules.to_dict(orient="records")
+        if str(r.get("KPI", ""))
+    }
+    rows = []
+    for site_code, g_site in daily.groupby("site_code"):
+        city = (
+            g_site["City"].dropna().iloc[0]
+            if ("City" in g_site.columns and g_site["City"].notna().any())
+            else None
+        )
+        g_site = g_site.sort_values(time_col)
+        t_all = pd.to_datetime(g_site[time_col], errors="coerce")
+        baseline_mask_all = (t_all >= baseline_start_dt) & (t_all <= baseline_end_dt)
+        recent_mask_all = (t_all >= recent_start_dt) & (t_all <= recent_end_dt)
+        for kpi in kpis:
+            rule = rules_by_kpi.get(str(kpi), {})
+            direction = str(rule.get("direction", "higher_is_better"))
+            policy = str(rule.get("policy", "enforce") or "enforce").strip().lower()
+            sla = rule.get("sla", np.nan)
+            try:
+                sla_val = float(sla) if pd.notna(sla) else None
+            except Exception:
+                sla_val = None
+            sla_eval = None if policy == "notify" else sla_val
+            vals = pd.to_numeric(g_site[kpi], errors="coerce")
+            has_any = bool(vals.notna().any())
+            if not has_any:
+                rows.append(
+                    {
+                        "RAT": rat,
+                        "site_code": int(site_code),
+                        "City": city,
+                        "KPI": kpi,
+                        "status": "NO_DATA",
+                    }
+                )
+                continue
+            baseline_vals = vals.loc[baseline_mask_all]
+            recent_vals = vals.loc[recent_mask_all]
+            t_recent = t_all.loc[recent_vals.index]
+            baseline = (
+                baseline_vals.median(skipna=True) if baseline_mask_all.any() else np.nan
+            )
+            recent = (
+                recent_vals.median(skipna=True) if recent_mask_all.any() else np.nan
+            )
+            bad_dates: list = []
+            if recent_mask_all.any() and recent_vals.notna().any():
+                thr = float(rel_threshold_pct) / 100.0
+                b = float(baseline) if pd.notna(baseline) else None
+                bad_series = pd.Series(False, index=recent_vals.index)
+                if b is not None:
+                    if direction == "higher_is_better":
+                        bad_series = bad_series | (recent_vals < (b - abs(b) * thr))
+                    else:
+                        bad_series = bad_series | (recent_vals > (b + abs(b) * thr))
+                if sla_eval is not None and pd.notna(sla_eval):
+                    if direction == "higher_is_better":
+                        bad_series = bad_series | (recent_vals < float(sla_eval))
+                    else:
+                        bad_series = bad_series | (recent_vals > float(sla_eval))
+                bad_series = bad_series & recent_vals.notna() & t_recent.notna()
+                if bool(bad_series.any()):
+                    bad_dates = t_recent.loc[bad_series].tolist()
+            max_streak = max_consecutive_periods(bad_dates, step=step)
+            persistent = max_streak >= int(min_periods)
+            is_bad_recent = is_bad(
+                float(recent) if pd.notna(recent) else None,
+                float(baseline) if pd.notna(baseline) else None,
+                direction,
+                rel_threshold_pct,
+                sla_eval,
+            )
+            is_bad_current = is_bad_recent
+            try:
+                last_mask = recent_mask_all & vals.notna() & t_all.notna()
+                if bool(last_mask.any()):
+                    idx_last = t_all.loc[last_mask].idxmax()
+                    last_val = vals.loc[idx_last]
+                    is_bad_current = is_bad(
+                        float(last_val) if pd.notna(last_val) else None,
+                        float(baseline) if pd.notna(baseline) else None,
+                        direction,
+                        rel_threshold_pct,
+                        sla_eval,
+                    )
+            except Exception:  # noqa: BLE001
+                pass
+            had_bad_recent = (len(bad_dates) > 0) or bool(is_bad_recent)
+            if policy == "notify":
+                if is_bad_current:
+                    status = "NOTIFY"
+                elif had_bad_recent:
+                    status = "NOTIFY_RESOLVED"
+                else:
+                    status = "OK"
+            else:
+                if is_bad_current and persistent:
+                    status = "PERSISTENT_DEGRADED"
+                elif is_bad_current:
+                    status = "DEGRADED"
+                elif had_bad_recent:
+                    status = "RESOLVED"
+                else:
+                    status = "OK"
+            rows.append(
+                {
+                    "RAT": rat,
+                    "site_code": int(site_code),
+                    "City": city,
+                    "KPI": kpi,
+                    "direction": direction,
+                    "sla": sla_val,
+                    "policy": policy,
+                    "baseline_median": baseline,
+                    "recent_median": recent,
+                    "bad_days_recent": len(bad_dates),
+                    "max_streak_recent": int(max_streak),
+                    "status": status,
+                }
+            )
+    status_df = pd.DataFrame(rows)
+    summary_rows = []
+    for site_code, g in status_df.groupby("site_code"):
+        city = (
+            g["City"].dropna().iloc[0]
+            if ("City" in g.columns and g["City"].notna().any())
+            else None
+        )
+        degraded_cnt = int(g["status"].isin(["DEGRADED", "PERSISTENT_DEGRADED"]).sum())
+        persistent_cnt = int((g["status"] == "PERSISTENT_DEGRADED").sum())
+        resolved_cnt = int((g["status"] == "RESOLVED").sum())
+        summary_rows.append(
+            {
+                "RAT": rat,
+                "site_code": int(site_code),
+                "City": city,
+                "degraded_kpis": degraded_cnt,
+                "persistent_kpis": persistent_cnt,
+                "resolved_kpis": resolved_cnt,
+            }
+        )
+    summary_df = pd.DataFrame(summary_rows).sort_values(
+        by=["degraded_kpis", "persistent_kpis", "resolved_kpis"],
+        ascending=[False, False, False],
+    )
+    return status_df, summary_df

process_kpi/kpi_health_check/engine_v2.py ADDED Viewed

	@@ -0,0 +1,320 @@

+from __future__ import annotations
+from datetime import timedelta
+import numpy as np
+import pandas as pd
+from process_kpi.kpi_health_check.engine import window_bounds_period
+def _to_datetime_series(s: pd.Series) -> pd.Series:
+    try:
+        return pd.to_datetime(s, errors="coerce")
+    except Exception:
+        return pd.to_datetime(pd.Series([], dtype="datetime64[ns]"), errors="coerce")
+def _vector_is_bad(
+    value: pd.Series,
+    baseline: pd.Series,
+    direction: str,
+    rel_threshold_pct: float,
+    sla: float | None,
+) -> pd.Series:
+    v = pd.to_numeric(value, errors="coerce")
+    b = pd.to_numeric(baseline, errors="coerce")
+    bad = pd.Series(False, index=v.index)
+    if sla is not None and not (isinstance(sla, float) and np.isnan(sla)):
+        if str(direction) == "higher_is_better":
+            bad = bad | (v < float(sla))
+        else:
+            bad = bad | (v > float(sla))
+    thr = float(rel_threshold_pct) / 100.0
+    has_b = b.notna()
+    if bool(has_b.any()):
+        if str(direction) == "higher_is_better":
+            bad = bad | (v < (b - b.abs() * thr))
+        else:
+            bad = bad | (v > (b + b.abs() * thr))
+    bad = bad & v.notna()
+    return bad
+def evaluate_health_check(
+    daily: pd.DataFrame,
+    rat: str,
+    rules_df: pd.DataFrame,
+    baseline_days_n: int,
+    recent_days_n: int,
+    rel_threshold_pct: float,
+    min_consecutive_days: int,
+    granularity: str = "Daily",
+) -> tuple[pd.DataFrame, pd.DataFrame]:
+    if daily is None or daily.empty:
+        return pd.DataFrame(), pd.DataFrame()
+    g = str(granularity or "Daily").strip().lower()
+    is_hourly = g.startswith("hour") or g.startswith("h")
+    time_col = (
+        "period_start"
+        if (is_hourly and "period_start" in daily.columns)
+        else "date_only"
+    )
+    step = timedelta(hours=1) if is_hourly else timedelta(days=1)
+    baseline_periods = int(baseline_days_n) * 24 if is_hourly else int(baseline_days_n)
+    recent_periods = int(recent_days_n) * 24 if is_hourly else int(recent_days_n)
+    min_periods = (
+        int(min_consecutive_days) * 24 if is_hourly else int(min_consecutive_days)
+    )
+    t_all = _to_datetime_series(daily[time_col])
+    end_dt = t_all.max()
+    if pd.isna(end_dt):
+        return pd.DataFrame(), pd.DataFrame()
+    end_dt = pd.Timestamp(end_dt)
+    if is_hourly:
+        end_dt = end_dt.floor("h")
+    recent_start_dt, recent_end_dt = window_bounds_period(end_dt, recent_periods, step)
+    baseline_end_dt = recent_start_dt - step
+    baseline_start_dt, _ = window_bounds_period(baseline_end_dt, baseline_periods, step)
+    rat_rules = (
+        rules_df[rules_df["RAT"] == rat].copy()
+        if isinstance(rules_df, pd.DataFrame)
+        else pd.DataFrame()
+    )
+    if rat_rules.empty or "KPI" not in rat_rules.columns:
+        return pd.DataFrame(), pd.DataFrame()
+    kpi_cols = [k for k in rat_rules["KPI"].tolist() if k in daily.columns]
+    if not kpi_cols:
+        return pd.DataFrame(), pd.DataFrame()
+    base_cols = ["site_code", time_col]
+    if "City" in daily.columns:
+        base_cols.append("City")
+    base = daily[base_cols + kpi_cols].copy()
+    base["site_code"] = pd.to_numeric(base["site_code"], errors="coerce")
+    base = base.dropna(subset=["site_code"]).copy()
+    base["site_code"] = base["site_code"].astype(int)
+    base_t = _to_datetime_series(base[time_col])
+    base["_t"] = base_t
+    base = base.dropna(subset=["_t"]).copy()
+    baseline_mask = (base["_t"] >= pd.to_datetime(baseline_start_dt)) & (
+        base["_t"] <= pd.to_datetime(baseline_end_dt)
+    )
+    recent_mask = (base["_t"] >= pd.to_datetime(recent_start_dt)) & (
+        base["_t"] <= pd.to_datetime(recent_end_dt)
+    )
+    counts = base.groupby("site_code")[kpi_cols].count()
+    all_sites = counts.index
+    if "City" in base.columns:
+        city_map = (
+            base[["site_code", "City"]]
+            .dropna(subset=["City"])
+            .drop_duplicates("site_code")
+            .set_index("site_code")["City"]
+        )
+        city = city_map.reindex(all_sites)
+    else:
+        city = pd.Series([None] * len(all_sites), index=all_sites)
+    baseline_subset = base.loc[baseline_mask, ["site_code"] + kpi_cols]
+    recent_subset = base.loc[recent_mask, ["site_code", "_t"] + kpi_cols]
+    baseline_medians = (
+        baseline_subset.groupby("site_code")[kpi_cols].median(numeric_only=True)
+        if not baseline_subset.empty
+        else pd.DataFrame(index=all_sites)
+    )
+    recent_medians = (
+        recent_subset.groupby("site_code")[kpi_cols].median(numeric_only=True)
+        if not recent_subset.empty
+        else pd.DataFrame(index=all_sites)
+    )
+    recent_sorted = (
+        recent_subset.sort_values(["site_code", "_t"])
+        if not recent_subset.empty
+        else recent_subset
+    )
+    gap = recent_sorted.groupby("site_code")["_t"].diff()
+    gap_ok = (gap == step).fillna(False)
+    out_frames: list[pd.DataFrame] = []
+    for _, rr in rat_rules.iterrows():
+        kpi = str(rr.get("KPI"))
+        if not kpi or kpi not in kpi_cols:
+            continue
+        direction = str(rr.get("direction", "higher_is_better"))
+        policy = str(rr.get("policy", "enforce") or "enforce").strip().lower()
+        sla_raw = rr.get("sla", np.nan)
+        try:
+            sla_val = float(sla_raw) if pd.notna(sla_raw) else None
+        except Exception:
+            sla_val = None
+        sla_eval = None if policy == "notify" else sla_val
+        cnt = counts[kpi].reindex(all_sites).fillna(0).astype(int)
+        has_any = cnt > 0
+        baseline = (
+            baseline_medians[kpi].reindex(all_sites)
+            if kpi in baseline_medians.columns
+            else pd.Series([np.nan] * len(all_sites), index=all_sites)
+        )
+        recent = (
+            recent_medians[kpi].reindex(all_sites)
+            if kpi in recent_medians.columns
+            else pd.Series([np.nan] * len(all_sites), index=all_sites)
+        )
+        if not recent_sorted.empty and kpi in recent_sorted.columns:
+            v_recent = pd.to_numeric(recent_sorted[kpi], errors="coerce")
+            b_row = recent_sorted["site_code"].map(
+                pd.to_numeric(
+                    baseline_medians.get(kpi, pd.Series(dtype=float)), errors="coerce"
+                )
+            )
+            bad_row = _vector_is_bad(
+                v_recent, b_row, direction, float(rel_threshold_pct), sla_eval
+            )
+            bad_row = bad_row & recent_sorted["_t"].notna()
+            start = (~gap_ok) | (~bad_row) | gap_ok.isna()
+            run_id = start.groupby(recent_sorted["site_code"]).cumsum()
+            bad_counts = (
+                bad_row.groupby(recent_sorted["site_code"])
+                .sum()
+                .reindex(all_sites)
+                .fillna(0)
+                .astype(int)
+            )
+            streaks = (
+                bad_row.groupby([recent_sorted["site_code"], run_id])
+                .sum()
+                .groupby(level=0)
+                .max()
+                .reindex(all_sites)
+                .fillna(0)
+                .astype(int)
+            )
+            tmp_last = (
+                recent_sorted[["site_code", "_t", kpi]]
+                .dropna(subset=[kpi])
+                .sort_values(["site_code", "_t"])
+            )
+            if not tmp_last.empty:
+                last_vals = tmp_last.groupby("site_code")[kpi].tail(1)
+                last_map = pd.Series(
+                    last_vals.values,
+                    index=tmp_last.groupby("site_code")
+                    .tail(1)["site_code"]
+                    .astype(int)
+                    .values,
+                )
+                last = last_map.reindex(all_sites)
+            else:
+                last = pd.Series([np.nan] * len(all_sites), index=all_sites)
+        else:
+            bad_counts = pd.Series([0] * len(all_sites), index=all_sites)
+            streaks = pd.Series([0] * len(all_sites), index=all_sites)
+            last = pd.Series([np.nan] * len(all_sites), index=all_sites)
+        is_bad_recent = _vector_is_bad(
+            recent, baseline, direction, float(rel_threshold_pct), sla_eval
+        )
+        is_bad_current = _vector_is_bad(
+            last, baseline, direction, float(rel_threshold_pct), sla_eval
+        )
+        had_bad_recent = (bad_counts > 0) | is_bad_recent
+        persistent = streaks >= int(min_periods)
+        status = pd.Series("OK", index=all_sites)
+        status = status.where(has_any, "NO_DATA")
+        if policy == "notify":
+            status = status.where(~has_any, "NO_DATA")
+            status = status.where(~(has_any & is_bad_current), "NOTIFY")
+            status = status.where(
+                ~(has_any & (~is_bad_current) & had_bad_recent), "NOTIFY_RESOLVED"
+            )
+        else:
+            status = status.where(
+                ~(has_any & is_bad_current & persistent), "PERSISTENT_DEGRADED"
+            )
+            status = status.where(
+                ~(has_any & is_bad_current & (~persistent)), "DEGRADED"
+            )
+            status = status.where(
+                ~(has_any & (~is_bad_current) & had_bad_recent), "RESOLVED"
+            )
+        frame = pd.DataFrame(
+            {
+                "RAT": rat,
+                "site_code": all_sites.astype(int),
+                "City": city.values,
+                "KPI": kpi,
+                "direction": direction,
+                "sla": sla_val,
+                "policy": policy,
+                "baseline_median": baseline.values,
+                "recent_median": recent.values,
+                "bad_days_recent": bad_counts.values,
+                "max_streak_recent": streaks.values,
+                "status": status.values,
+            }
+        )
+        out_frames.append(frame)
+    if not out_frames:
+        return pd.DataFrame(), pd.DataFrame()
+    # Filter out empty frames to avoid FutureWarning about empty/all-NA entries
+    non_empty_frames = [f for f in out_frames if not f.empty and not f.isna().all().all()]
+    if not non_empty_frames:
+        return pd.DataFrame(), pd.DataFrame()
+    status_df = pd.concat(non_empty_frames, ignore_index=True)
+    summary = (
+        status_df.groupby("site_code", as_index=False)
+        .agg(
+            RAT=("RAT", "first"),
+            City=("City", "first"),
+            degraded_kpis=(
+                "status",
+                lambda s: int(s.isin(["DEGRADED", "PERSISTENT_DEGRADED"]).sum()),
+            ),
+            persistent_kpis=(
+                "status",
+                lambda s: int((s == "PERSISTENT_DEGRADED").sum()),
+            ),
+            resolved_kpis=("status", lambda s: int((s == "RESOLVED").sum())),
+        )
+        .sort_values(
+            by=["degraded_kpis", "persistent_kpis", "resolved_kpis"],
+            ascending=[False, False, False],
+        )
+    )
+    return status_df, summary

process_kpi/kpi_health_check/export.py ADDED Viewed

	@@ -0,0 +1,264 @@

+import pandas as pd
+from panel_app.convert_to_excel_panel import write_dfs_to_excel
+def _normalize_time_key(
+    df: pd.DataFrame, granularity: str
+) -> tuple[str, pd.Series] | None:
+    if df is None or df.empty:
+        return None
+    g = str(granularity or "Daily").strip().lower()
+    is_hourly = g.startswith("hour") or g.startswith("h")
+    if is_hourly:
+        time_col = "period_start" if "period_start" in df.columns else "date_only"
+        t = pd.to_datetime(df.get(time_col), errors="coerce").dt.floor("h")
+        return time_col, t
+    time_col = "date_only" if "date_only" in df.columns else "period_start"
+    t = pd.to_datetime(df.get(time_col), errors="coerce").dt.date
+    return time_col, t
+def _build_all_tech_sheet(
+    daily_by_rat: dict[str, pd.DataFrame],
+    granularity: str,
+) -> tuple[str, pd.DataFrame] | None:
+    if not daily_by_rat or not isinstance(daily_by_rat, dict):
+        return None
+    g = str(granularity or "Daily").strip().lower()
+    prefix = "Hourly" if (g.startswith("hour") or g.startswith("h")) else "Daily"
+    ordered_rats = ["2G", "3G", "LTE", "TWAMP"]
+    present = [r for r in ordered_rats if r in daily_by_rat]
+    if not present:
+        present = [str(r) for r in daily_by_rat.keys()]
+    time_col = None
+    keys = []
+    coords_parts = []
+    for rat in present:
+        df = daily_by_rat.get(rat)
+        if not isinstance(df, pd.DataFrame) or df.empty:
+            continue
+        nt = _normalize_time_key(df, granularity)
+        if nt is None:
+            continue
+        tc, tkey = nt
+        if time_col is None:
+            time_col = tc
+        tmp = pd.DataFrame(
+            {"site_code": pd.to_numeric(df.get("site_code"), errors="coerce"), tc: tkey}
+        )
+        tmp = tmp.dropna(subset=["site_code", tc]).copy()
+        tmp["site_code"] = tmp["site_code"].astype(int)
+        keys.append(tmp[["site_code", tc]])
+        cols = [
+            c for c in ["site_code", "City", "Longitude", "Latitude"] if c in df.columns
+        ]
+        if cols:
+            cp = df[cols].copy()
+            cp["site_code"] = pd.to_numeric(cp["site_code"], errors="coerce")
+            cp = cp.dropna(subset=["site_code"]).copy()
+            cp["site_code"] = cp["site_code"].astype(int)
+            coords_parts.append(cp)
+    if not keys or time_col is None:
+        return None
+    base = pd.concat(keys, ignore_index=True).drop_duplicates(
+        subset=["site_code", time_col]
+    )
+    coords = None
+    if coords_parts:
+        coords_all = pd.concat(coords_parts, ignore_index=True)
+        coords_all = coords_all.drop_duplicates(subset=["site_code"])
+        keep = [
+            c
+            for c in ["site_code", "City", "Longitude", "Latitude"]
+            if c in coords_all.columns
+        ]
+        coords = coords_all[keep].copy() if keep else None
+    if isinstance(coords, pd.DataFrame) and not coords.empty:
+        base = pd.merge(base, coords, on="site_code", how="left")
+    base["ID"] = base[time_col].astype(str) + "_" + base["site_code"].astype(str)
+    meta_cols = {
+        "site_code",
+        "period_start",
+        "date_only",
+        "Longitude",
+        "Latitude",
+        "City",
+        "RAT",
+        "ID",
+    }
+    out = base
+    for rat in present:
+        df = daily_by_rat.get(rat)
+        if not isinstance(df, pd.DataFrame) or df.empty:
+            continue
+        nt = _normalize_time_key(df, granularity)
+        if nt is None:
+            continue
+        tc, tkey = nt
+        tmp = df.copy()
+        tmp["site_code"] = pd.to_numeric(tmp.get("site_code"), errors="coerce")
+        tmp = tmp.dropna(subset=["site_code"]).copy()
+        tmp["site_code"] = tmp["site_code"].astype(int)
+        tmp[tc] = tkey
+        tmp = tmp.dropna(subset=[tc]).copy()
+        kpi_cols = [c for c in tmp.columns if c not in meta_cols]
+        keep_cols = ["site_code", tc] + kpi_cols
+        tmp2 = tmp[keep_cols].copy()
+        rename = {c: f"{rat}_{c}" for c in kpi_cols}
+        tmp2 = tmp2.rename(columns=rename)
+        out = pd.merge(
+            out,
+            tmp2,
+            left_on=["site_code", time_col],
+            right_on=["site_code", tc],
+            how="left",
+        )
+        if tc != time_col and tc in out.columns:
+            out = out.drop(columns=[tc], errors="ignore")
+    first_cols = [
+        c
+        for c in ["ID", time_col, "site_code", "City", "Longitude", "Latitude"]
+        if c in out.columns
+    ]
+    rest = [c for c in out.columns if c not in first_cols]
+    out = out[first_cols + rest]
+    try:
+        out = out.sort_values(by=[time_col, "site_code"], ascending=[True, True])
+    except Exception:
+        pass
+    return f"{prefix}_All", out
+def build_export_bytes(
+    datasets_df: pd.DataFrame | None,
+    rules_df: pd.DataFrame | None,
+    summary_df: pd.DataFrame | None,
+    status_df: pd.DataFrame | None,
+    daily_by_rat: dict[str, pd.DataFrame] | None = None,
+    granularity: str = "Daily",
+    multirat_summary_df: pd.DataFrame | None = None,
+    top_anomalies_df: pd.DataFrame | None = None,
+    complaint_multirat_df: pd.DataFrame | None = None,
+    complaint_top_anomalies_df: pd.DataFrame | None = None,
+    ops_queue_df: pd.DataFrame | None = None,
+    delta_df: pd.DataFrame | None = None,
+    profile: dict | None = None,
+) -> bytes:
+    if profile is not None:
+        profile["export_prep_seconds"] = 0.0
+        profile["excel_total_seconds"] = 0.0
+    t_prep0 = pd.Timestamp.utcnow() if profile is not None else None
+    dfs = [
+        datasets_df if isinstance(datasets_df, pd.DataFrame) else pd.DataFrame(),
+        rules_df if isinstance(rules_df, pd.DataFrame) else pd.DataFrame(),
+        summary_df if isinstance(summary_df, pd.DataFrame) else pd.DataFrame(),
+        status_df if isinstance(status_df, pd.DataFrame) else pd.DataFrame(),
+    ]
+    sheet_names = [
+        "Datasets",
+        "KPI_Rules",
+        "Site_Summary",
+        "Site_KPI_Status",
+    ]
+    max_data_rows = 1048575
+    if daily_by_rat and isinstance(daily_by_rat, dict):
+        g = str(granularity or "Daily").strip().lower()
+        prefix = "Hourly" if (g.startswith("hour") or g.startswith("h")) else "Daily"
+        combined = _build_all_tech_sheet(daily_by_rat, granularity)
+        if combined is not None:
+            base, df_all = combined
+            if len(df_all) <= max_data_rows:
+                dfs.append(df_all)
+                sheet_names.append(base[:31])
+            else:
+                part = 1
+                for start in range(0, len(df_all), max_data_rows):
+                    end = min(start + max_data_rows, len(df_all))
+                    dfs.append(df_all.iloc[start:end].copy())
+                    sheet_names.append(f"{base}_p{part}"[:31])
+                    part += 1
+        else:
+            for rat, df in daily_by_rat.items():
+                if not isinstance(df, pd.DataFrame):
+                    continue
+                base = f"{prefix}_All_{str(rat)}"
+                if len(df) <= max_data_rows:
+                    dfs.append(df)
+                    sheet_names.append(base[:31])
+                else:
+                    part = 1
+                    for start in range(0, len(df), max_data_rows):
+                        end = min(start + max_data_rows, len(df))
+                        dfs.append(df.iloc[start:end].copy())
+                        sheet_names.append(f"{base}_p{part}"[:31])
+                        part += 1
+    dfs.extend(
+        [
+            (
+                multirat_summary_df
+                if isinstance(multirat_summary_df, pd.DataFrame)
+                else pd.DataFrame()
+            ),
+            (
+                top_anomalies_df
+                if isinstance(top_anomalies_df, pd.DataFrame)
+                else pd.DataFrame()
+            ),
+            (
+                complaint_multirat_df
+                if isinstance(complaint_multirat_df, pd.DataFrame)
+                else pd.DataFrame()
+            ),
+            (
+                complaint_top_anomalies_df
+                if isinstance(complaint_top_anomalies_df, pd.DataFrame)
+                else pd.DataFrame()
+            ),
+            ops_queue_df if isinstance(ops_queue_df, pd.DataFrame) else pd.DataFrame(),
+            delta_df if isinstance(delta_df, pd.DataFrame) else pd.DataFrame(),
+        ]
+    )
+    sheet_names.extend(
+        [
+            "MultiRAT_Summary",
+            "Top_Anomalies",
+            "Complaint_MultiRAT",
+            "Complaint_Top_Anomalies",
+            "Ops_Queue",
+            "Delta",
+        ]
+    )
+    if profile is not None:
+        t_prep1 = pd.Timestamp.utcnow()
+        if t_prep0 is not None:
+            profile["export_prep_seconds"] = float((t_prep1 - t_prep0).total_seconds())
+        profile["sheet_count"] = int(len(sheet_names))
+    return write_dfs_to_excel(dfs, sheet_names, index=False, profile=profile)

process_kpi/kpi_health_check/io.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import io
+import zipfile
+import pandas as pd
+def read_bytes_to_df(file_bytes: bytes, filename: str) -> pd.DataFrame:
+    if not file_bytes:
+        raise ValueError("Empty file")
+    filename_l = (filename or "").lower()
+    data = io.BytesIO(file_bytes)
+    if filename_l.endswith(".zip"):
+        with zipfile.ZipFile(data) as z:
+            csv_files = [f for f in z.namelist() if f.lower().endswith(".csv")]
+            if not csv_files:
+                raise ValueError("No CSV file found in the ZIP archive")
+            dfs = []
+            for csv_name in csv_files:
+                try:
+                    with z.open(csv_name) as f:
+                        df = pd.read_csv(
+                            f,
+                            encoding="latin1",
+                            sep=";",
+                            low_memory=False,
+                        )
+                    if isinstance(df, pd.DataFrame) and not df.empty:
+                        dfs.append(df)
+                except Exception:
+                    continue
+            if not dfs:
+                raise ValueError("No readable CSV content found in the ZIP archive")
+            if len(dfs) == 1:
+                return dfs[0]
+            return pd.concat(dfs, ignore_index=True, sort=False)
+    if filename_l.endswith(".csv"):
+        return pd.read_csv(data, encoding="latin1", sep=";", low_memory=False)
+    raise ValueError("Unsupported file format. Please upload a ZIP or CSV file.")

process_kpi/kpi_health_check/kpi_groups.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import re
+import pandas as pd
+# Regex patterns for KPI classification
+# Order matters: first match wins
+PATTERNS = {
+    # Refined patterns based on user data
+    "Transmission": r"(?i)(abis|a-?bis|iub|x2|backhaul|transport|transmission|s1(?:\s|_)*sig(?:\s|_)*conn(?:\s|_)*sr)",
+    "Interference": r"(?i)(\brtwp\b|avg(?:\s|_)*rtwp|rtwp(?:\s|_)*rx(?:\s|_)*ant)",
+    "Mobility": r"(?i)(handover|(?<![A-Za-z0-9])ho(?![A-Za-z0-9])|soft(?:\s|_|-)*ho|intra(?:\s|_|-)*freq(?:\s|_|-)*ho|inter(?:\s|_|-)*freq(?:\s|_|-)*ho|csfb)",
+    "Success Rate": r"(?i)(cssr|success|attach|setup|establ|answer|complete|connected|ho.*succ|\berab\b|\brrc\b.*(?:\bsr\b|rate|succ)|\basr\b|\bsr\b)",
+    "Fails/Drop/Block": r"(?i)(drop|dcr|fail|block|reject|deny|loss|lost|discard|congestion|accessibility.*fail|retention.*fail)",
+    "Throughput": r"(?i)(throughput|thp|thrput|PDCP|debit|dl.*rate|ul.*rate|bitrate)",
+    "Traffic": r"(?i)(traffic|volume|erl|payload|gbytes|gb|load|usage|utilization)",
+    "Availability": r"(?i)(availability|avail|unavailability|unavail|dispo|disponibil|uptime)",
+    "Latency": r"(?i)(latency|delay|\brt\b|rtt)",
+}
+def classify_kpi(kpi_name: str) -> str:
+    """
+    Classifies a KPI name into a group based on regex patterns.
+    Returns 'Other' if no match found.
+    """
+    kpi_str = str(kpi_name)
+    for group, pattern in PATTERNS.items():
+        if re.search(pattern, kpi_str):
+            return group
+    return "Other"
+def get_kpis_by_group(all_kpis: list[str]) -> dict[str, list[str]]:
+    """
+    Returns a dictionary mapping group names to lists of KPIs.
+    """
+    groups = {g: [] for g in PATTERNS.keys()}
+    groups["Other"] = []
+    for kpi in sorted(all_kpis):
+        group = classify_kpi(kpi)
+        groups[group].append(kpi)
+    # Remove empty groups
+    return {k: v for k, v in groups.items() if v}
+def filter_kpis(
+    all_kpis: list[str],
+    group: str,
+    mode: str = "Filter",
+    top_n: int = 12,
+    stats_df: pd.DataFrame | None = None,
+) -> list[str]:
+    """
+    Filters KPIs based on the selected group and mode.
+    Args:
+        all_kpis: List of available KPI names.
+        group: Selected group name (or 'All').
+        mode: 'Filter' or 'Top-N'.
+        top_n: Max KPIs to return if filtering needs truncation or specific selection.
+        stats_df: Optional DataFrame with 'site_code', 'KPI', 'is_bad', etc. for sorting.
+    """
+    if not all_kpis:
+        return []
+    # 1. Filter by group
+    if group and group != "All (selected KPIs)":
+        # Handle "Success Rate (>= SLA...)" formatted names if passed from UI
+        clean_group = group.split(" (")[0]
+        # Basic mapping check - if the group name in UI has extra text, we match key prefix
+        target_group = "Other"
+        for k in PATTERNS.keys():
+            if k in group:
+                target_group = k
+                break
+        if "Other" in group:
+            target_group = "Other"
+        candidates = [k for k in all_kpis if classify_kpi(k) == target_group]
+    else:
+        candidates = list(all_kpis)
+    if not candidates:
+        return []
+    # 2. Sort/Limit if needed
+    # If we have stats, we can sort by "badness" or variance
+    # For now, simplistic alpha sort unless we have stats
+    if stats_df is not None and not stats_df.empty:
+        # TODO: Implement smart sorting based on stats if available
+        # For V1, we just return candidates sorted alphabetically
+        pass
+    return sorted(candidates)

process_kpi/kpi_health_check/multi_rat.py ADDED Viewed

	@@ -0,0 +1,253 @@

+import pandas as pd
+from process_kpi.kpi_health_check.kpi_groups import classify_kpi
+def _slug(value: str) -> str:
+    s = str(value or "").strip().lower()
+    out = []
+    prev_underscore = False
+    for ch in s:
+        if ch.isalnum():
+            out.append(ch)
+            prev_underscore = False
+        else:
+            if not prev_underscore:
+                out.append("_")
+                prev_underscore = True
+    return "".join(out).strip("_")
+def _fmt_num(value) -> str:
+    try:
+        v = pd.to_numeric(value, errors="coerce")
+        if pd.isna(v):
+            return "NA"
+        return f"{float(v):.3g}"
+    except Exception:  # noqa: BLE001
+        return "NA"
+def _build_rca_tags(row: dict) -> str:
+    tags: list[str] = []
+    group = str(row.get("rca_group") or "Other")
+    tags.append(_slug(group) if group else "other")
+    status = str(row.get("status") or "").strip().upper()
+    if status == "PERSISTENT_DEGRADED":
+        tags.append("persistent")
+    elif status == "DEGRADED":
+        tags.append("degraded")
+    elif status:
+        tags.append(_slug(status))
+    baseline = row.get("baseline_median")
+    recent = row.get("recent_median")
+    if pd.isna(pd.to_numeric(baseline, errors="coerce")):
+        tags.append("missing_baseline")
+    if pd.isna(pd.to_numeric(recent, errors="coerce")):
+        tags.append("missing_recent")
+    impact = pd.to_numeric(row.get("impacted_rats"), errors="coerce")
+    if pd.notna(impact) and float(impact) >= 2:
+        tags.append("multi_rat")
+    return ",".join([t for t in tags if t])
+def _build_rca_hint(row: dict) -> str:
+    group = str(row.get("rca_group") or "Other")
+    kpi = str(row.get("KPI") or "")
+    rat = str(row.get("RAT") or "")
+    status = str(row.get("status") or "")
+    baseline_s = _fmt_num(row.get("baseline_median"))
+    recent_s = _fmt_num(row.get("recent_median"))
+    streak = int(pd.to_numeric(row.get("max_streak_recent"), errors="coerce") or 0)
+    bad = int(pd.to_numeric(row.get("bad_days_recent"), errors="coerce") or 0)
+    return (
+        f"{group} | {rat} | {kpi} | {status} | "
+        f"baseline={baseline_s} recent={recent_s} | streak={streak}d bad={bad}d"
+    )
+def compute_multirat_views(
+    status_df: pd.DataFrame,
+) -> tuple[pd.DataFrame, pd.DataFrame]:
+    if status_df is None or status_df.empty:
+        return pd.DataFrame(), pd.DataFrame()
+    df = status_df.copy()
+    df["is_degraded"] = df["status"].isin(["DEGRADED", "PERSISTENT_DEGRADED"])
+    df["is_persistent"] = df["status"].isin(["PERSISTENT_DEGRADED"])
+    df["is_resolved"] = df["status"].isin(["RESOLVED"])
+    def _first_city(s: pd.Series):
+        s2 = s.dropna()
+        return s2.iloc[0] if not s2.empty else None
+    base = (
+        df.groupby("site_code", as_index=False)
+        .agg(
+            City=("City", _first_city),
+            degraded_kpis_total=("is_degraded", "sum"),
+            persistent_kpis_total=("is_persistent", "sum"),
+            resolved_kpis_total=("is_resolved", "sum"),
+        )
+        .copy()
+    )
+    impacted = (
+        df[df["is_degraded"]]
+        .groupby("site_code")["RAT"]
+        .nunique()
+        .rename("impacted_rats")
+        .reset_index()
+    )
+    resolved_pivot = (
+        df[df["is_resolved"]]
+        .pivot_table(
+            index="site_code",
+            columns="RAT",
+            values="KPI",
+            aggfunc="count",
+            fill_value=0,
+        )
+        .rename(columns=lambda c: f"resolved_{c}")
+        .reset_index()
+    )
+    base = pd.merge(base, impacted, on="site_code", how="left")
+    base["impacted_rats"] = base["impacted_rats"].fillna(0).astype(int)
+    degraded_pivot = (
+        df[df["is_degraded"]]
+        .pivot_table(
+            index="site_code",
+            columns="RAT",
+            values="KPI",
+            aggfunc="count",
+            fill_value=0,
+        )
+        .rename(columns=lambda c: f"degraded_{c}")
+        .reset_index()
+    )
+    persistent_pivot = (
+        df[df["is_persistent"]]
+        .pivot_table(
+            index="site_code",
+            columns="RAT",
+            values="KPI",
+            aggfunc="count",
+            fill_value=0,
+        )
+        .rename(columns=lambda c: f"persistent_{c}")
+        .reset_index()
+    )
+    out = base
+    if not degraded_pivot.empty:
+        out = pd.merge(out, degraded_pivot, on="site_code", how="left")
+    if not persistent_pivot.empty:
+        out = pd.merge(out, persistent_pivot, on="site_code", how="left")
+    if not resolved_pivot.empty:
+        out = pd.merge(out, resolved_pivot, on="site_code", how="left")
+    metric_cols = [c for c in out.columns if c != "City"]
+    out[metric_cols] = out[metric_cols].fillna(0)
+    resolved_total = (
+        out["resolved_kpis_total"].astype(float)
+        if "resolved_kpis_total" in out.columns
+        else 0.0
+    )
+    out["criticality_score"] = (
+        (
+            out["persistent_kpis_total"].astype(float) * 5.0
+            + out["degraded_kpis_total"].astype(float) * 2.0
+            + out["impacted_rats"].astype(float) * 1.0
+            + resolved_total * 0.5
+        )
+        .round(0)
+        .astype(int)
+    )
+    out = out.sort_values(
+        by=[
+            "criticality_score",
+            "persistent_kpis_total",
+            "degraded_kpis_total",
+            "impacted_rats",
+        ],
+        ascending=[False, False, False, False],
+    )
+    top = df[df["is_degraded"]].copy()
+    sev = {"PERSISTENT_DEGRADED": 2, "DEGRADED": 1}
+    top["severity"] = top["status"].map(sev).fillna(0).astype(int)
+    for col in ["bad_days_recent", "max_streak_recent"]:
+        if col not in top.columns:
+            top[col] = pd.NA
+    top["anomaly_score"] = (
+        (
+            top["severity"].astype(float) * 100.0
+            + pd.to_numeric(top["max_streak_recent"], errors="coerce")
+            .fillna(0)
+            .astype(float)
+            * 10.0
+            + pd.to_numeric(top["bad_days_recent"], errors="coerce")
+            .fillna(0)
+            .astype(float)
+        )
+        .round(0)
+        .astype(int)
+    )
+    top = top.sort_values(
+        by=["anomaly_score", "severity", "max_streak_recent", "bad_days_recent"],
+        ascending=[False, False, False, False],
+    )
+    try:
+        top = pd.merge(top, impacted, on="site_code", how="left")
+        top["impacted_rats"] = (
+            pd.to_numeric(top["impacted_rats"], errors="coerce").fillna(0).astype(int)
+        )
+    except Exception:  # noqa: BLE001
+        top["impacted_rats"] = 0
+    top["rca_group"] = top["KPI"].apply(classify_kpi)
+    try:
+        top["rca_hint"] = top.apply(lambda r: _build_rca_hint(r.to_dict()), axis=1)
+        top["rca_tags"] = top.apply(lambda r: _build_rca_tags(r.to_dict()), axis=1)
+    except Exception:  # noqa: BLE001
+        top["rca_hint"] = ""
+        top["rca_tags"] = ""
+    top_cols = [
+        c
+        for c in [
+            "anomaly_score",
+            "severity",
+            "RAT",
+            "site_code",
+            "City",
+            "KPI",
+            "rca_group",
+            "rca_tags",
+            "rca_hint",
+            "status",
+            "impacted_rats",
+            "baseline_median",
+            "recent_median",
+            "bad_days_recent",
+            "max_streak_recent",
+        ]
+        if c in top.columns
+    ]
+    top = top[top_cols].head(300)
+    return out, top

process_kpi/kpi_health_check/normalization.py ADDED Viewed

	@@ -0,0 +1,292 @@

+import re
+import numpy as np
+import pandas as pd
+from utils.utils_vars import get_physical_db
+def to_numeric(series: pd.Series) -> pd.Series:
+    if pd.api.types.is_numeric_dtype(series):
+        return pd.to_numeric(series, errors="coerce")
+    s = series.astype(str)
+    s = s.str.replace("\u00a0", "", regex=False)
+    s = s.str.replace(" ", "", regex=False)
+    s = s.str.replace("%", "", regex=False)
+    s = s.replace({"nan": np.nan, "None": np.nan, "": np.nan})
+    has_comma = s.str.contains(",", na=False, regex=False)
+    has_dot = s.str.contains(".", na=False, regex=False)
+    both = has_comma & has_dot
+    if bool(both.any()):
+        last_comma = s.str.rfind(",")
+        last_dot = s.str.rfind(".")
+        euro = both & (last_comma > last_dot)
+        us = both & (last_dot > last_comma)
+        if bool(euro.any()):
+            s.loc[euro] = (
+                s.loc[euro]
+                .str.replace(".", "", regex=False)
+                .str.replace(",", ".", regex=False)
+            )
+        if bool(us.any()):
+            s.loc[us] = s.loc[us].str.replace(",", "", regex=False)
+    comma_only = has_comma & ~has_dot
+    if bool(comma_only.any()):
+        s.loc[comma_only] = s.loc[comma_only].str.replace(",", ".", regex=False)
+    return pd.to_numeric(s, errors="coerce")
+def parse_datetime(series: pd.Series) -> pd.Series:
+    if series.empty:
+        return pd.to_datetime(series, errors="coerce")
+    first = series.dropna().astype(str).iloc[0] if series.dropna().any() else ""
+    formats: list[str | None] = []
+    if len(first) > 10:
+        formats.extend(
+            [
+                "%m.%d.%Y %H:%M:%S",
+                "%d.%m.%Y %H:%M:%S",
+                "%Y-%m-%d %H:%M:%S",
+                "%Y/%m/%d %H:%M:%S",
+                "%d/%m/%Y %H:%M:%S",
+                "%m/%d/%Y %H:%M:%S",
+            ]
+        )
+    formats.extend(
+        [
+            "%m.%d.%Y",
+            "%d.%m.%Y",
+            "%Y-%m-%d",
+            "%Y/%m/%d",
+            "%d/%m/%Y",
+            "%m/%d/%Y",
+        ]
+    )
+    for fmt in formats:
+        dt = pd.to_datetime(series, errors="coerce", format=fmt)
+        if dt.notna().any():
+            return dt
+    return pd.to_datetime(series, errors="coerce")
+def extract_site_code(value: object) -> int | None:
+    if value is None or (isinstance(value, float) and np.isnan(value)):
+        return None
+    s = str(value)
+    # Prefer explicit node patterns when present (DN strings often contain multiple numbers).
+    for pat in [
+        r"(?:WBTS)\D*(\d{3,7})",
+        r"(?:LNBTS)\D*(\d{3,7})",
+        r"(?:BTS)\D*(\d{3,7})",
+        r"(?:BCF)\D*(\d{3,7})",
+        r"(?:MRBTS)\D*(\d{3,7})",
+        r"(?:SBTS)\D*(\d{3,7})",
+    ]:
+        m = re.search(pat, s, flags=re.IGNORECASE)
+        if m:
+            try:
+                return int(m.group(1))
+            except ValueError:
+                pass
+    # Fallback: accept 3-digit sites (common) while keeping the upper bound used previously.
+    m = re.search(r"(\d{3,7})", s)
+    if not m:
+        return None
+    try:
+        return int(m.group(1))
+    except ValueError:
+        return None
+def infer_date_col(df: pd.DataFrame) -> str:
+    for c in ["PERIOD_START_TIME", "PERIOD_START_DATE", "date", "Date", "DATE"]:
+        if c in df.columns:
+            return c
+    raise ValueError("Cannot find a date column (expected PERIOD_START_TIME)")
+def infer_id_col(df: pd.DataFrame, rat: str) -> str:
+    rat_candidates = {
+        "2G": ["BCF name", "BCF", "BTS name", "BSC name", "DN"],
+        "3G": ["WBTS name", "WBTS ID", "DN"],
+        "LTE": ["LNBTS name", "MRBTS/SBTS name", "DN"],
+        "TWAMP": ["MRBTS name", "MRBTS/SBTS name", "LNBTS name", "DN"],
+    }
+    candidates = [c for c in rat_candidates.get(rat, []) if c in df.columns]
+    if not candidates and "DN" in df.columns:
+        candidates = ["DN"]
+    if not candidates:
+        raise ValueError(f"Cannot infer an entity/site column for {rat} dataset")
+    physical_codes: set[int] | None = None
+    try:
+        physical = load_physical_db()
+        if not physical.empty and "code" in physical.columns:
+            physical_codes = set(
+                pd.to_numeric(physical["code"], errors="coerce")
+                .dropna()
+                .astype(int)
+                .tolist()
+            )
+    except Exception:
+        physical_codes = None
+    if not physical_codes:
+        return candidates[0]
+    best_col = candidates[0]
+    best_score = -1.0
+    for c in candidates:
+        sample = df[c].head(2000)
+        codes = sample.apply(extract_site_code)
+        non_null = float(codes.notna().mean()) if len(codes) else 0.0
+        if physical_codes:
+            match = (
+                float(codes.dropna().astype(int).isin(physical_codes).mean())
+                if codes.notna().any()
+                else 0.0
+            )
+            score = match * 10.0 + non_null
+        else:
+            score = non_null
+        if score > best_score:
+            best_score = score
+            best_col = c
+    return best_col
+def non_kpi_identifier_cols(df: pd.DataFrame, rat: str) -> set[str]:
+    common = {
+        "DN",
+        "PLMN name",
+        "RNC name",
+        "BSC name",
+        "BCF name",
+        "MRBTS name",
+        "MRBTS/SBTS name",
+        "LNBTS name",
+        "WBTS name",
+        "WBTS ID",
+    }
+    rat_specific = {
+        "2G": {"BSC name", "BSC", "BCF name", "BCF", "BTS name"},
+        "3G": {"PLMN name", "RNC name", "WBTS name", "WBTS ID"},
+        "LTE": {"MRBTS/SBTS name", "LNBTS name"},
+    }
+    cols = set()
+    for c in common.union(rat_specific.get(rat, set())):
+        if c in df.columns:
+            cols.add(c)
+    return cols
+def infer_agg(kpi: str) -> str:
+    k = str(kpi).lower()
+    if any(x in k for x in ["traffic", "volume", "erl", "total", "gbytes", "gb"]):
+        return "sum"
+    return "mean"
+def _is_availability_kpi(kpi: str) -> bool:
+    k = str(kpi).strip().lower()
+    if not k:
+        return False
+    return any(
+        x in k
+        for x in [
+            "availability",
+            "avail",
+            "unavailability",
+            "unavail",
+            "dispo",
+            "disponibil",
+            "uptime",
+        ]
+    )
+def load_physical_db() -> pd.DataFrame:
+    physical_db = get_physical_db().copy()
+    physical_db["code"] = physical_db["Code_Sector"].str.split("_").str[0]
+    physical_db["code"] = pd.to_numeric(physical_db["code"], errors="coerce")
+    physical_db = physical_db.dropna(subset=["code"])
+    physical_db["code"] = physical_db["code"].astype(int)
+    keep = [
+        c for c in ["code", "Longitude", "Latitude", "City"] if c in physical_db.columns
+    ]
+    return physical_db[keep].drop_duplicates("code")
+def build_period_kpi(
+    df_raw: pd.DataFrame,
+    rat: str,
+    granularity: str = "Daily",
+) -> tuple[pd.DataFrame, list[str]]:
+    df = df_raw.copy()
+    date_col = infer_date_col(df)
+    id_col = infer_id_col(df, rat)
+    df["date"] = parse_datetime(df[date_col])
+    df = df.dropna(subset=["date"])
+    g = str(granularity or "Daily").strip().lower()
+    if g.startswith("hour") or g.startswith("h"):
+        df["period_start"] = df["date"].dt.floor("h")
+    else:
+        df["period_start"] = df["date"].dt.floor("D")
+    df["site_code"] = df[id_col].apply(extract_site_code)
+    df = df.dropna(subset=["site_code"])
+    df["site_code"] = df["site_code"].astype(int)
+    meta = {date_col, id_col, "date", "site_code", "period_start"}
+    meta = meta.union(non_kpi_identifier_cols(df, rat))
+    candidate_cols = [c for c in df.columns if c not in meta]
+    numeric_cols: dict[str, pd.Series] = {}
+    for c in candidate_cols:
+        numeric_cols[c] = to_numeric(df[c])
+    numeric_df = pd.DataFrame(numeric_cols)
+    for c in list(numeric_df.columns):
+        if _is_availability_kpi(c):
+            numeric_df[c] = numeric_df[c].fillna(0.0)
+    kpi_cols = [c for c in numeric_df.columns if numeric_df[c].notna().any()]
+    if not kpi_cols:
+        raise ValueError(f"No numeric KPI columns detected for {rat}")
+    base = pd.concat(
+        [
+            df[["site_code", "period_start"]].reset_index(drop=True),
+            numeric_df[kpi_cols].reset_index(drop=True),
+        ],
+        axis=1,
+    )
+    agg_dict = {k: infer_agg(k) for k in kpi_cols}
+    out = base.groupby(["site_code", "period_start"], as_index=False).agg(agg_dict)
+    out["date_only"] = pd.to_datetime(out["period_start"]).dt.date
+    physical = load_physical_db()
+    if not physical.empty:
+        out = pd.merge(out, physical, left_on="site_code", right_on="code", how="left")
+        out = out.drop(columns=[c for c in ["code"] if c in out.columns])
+    out["RAT"] = rat
+    return out, kpi_cols
+def build_daily_kpi(df_raw: pd.DataFrame, rat: str) -> tuple[pd.DataFrame, list[str]]:
+    return build_period_kpi(df_raw, rat, granularity="Daily")

process_kpi/kpi_health_check/presets.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import json
+import os
+from datetime import datetime
+import pandas as pd
+def presets_dir() -> str:
+    root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+    return os.path.join(root, "data", "kpi_health_check_presets")
+def _safe_name(name: str) -> str:
+    s = (name or "").strip()
+    s = s.replace("..", "")
+    s = s.replace("/", "_").replace("\\", "_")
+    s = "_".join([p for p in s.split() if p])
+    return s
+def list_presets() -> list[str]:
+    d = presets_dir()
+    if not os.path.isdir(d):
+        return []
+    out = []
+    for fn in os.listdir(d):
+        if fn.lower().endswith(".json"):
+            out.append(os.path.splitext(fn)[0])
+    return sorted(set(out))
+def load_preset(name: str) -> pd.DataFrame:
+    d = presets_dir()
+    safe = _safe_name(name)
+    path = os.path.join(d, f"{safe}.json")
+    with open(path, "r", encoding="utf-8") as f:
+        obj = json.load(f)
+    rows = obj.get("rules", []) if isinstance(obj, dict) else []
+    df = pd.DataFrame(rows)
+    if not df.empty:
+        df["RAT"] = df["RAT"].astype(str)
+        df["KPI"] = df["KPI"].astype(str)
+    return df
+def save_preset(name: str, rules_df: pd.DataFrame) -> str:
+    safe = _safe_name(name)
+    if not safe:
+        raise ValueError("Preset name is empty")
+    d = presets_dir()
+    os.makedirs(d, exist_ok=True)
+    path = os.path.join(d, f"{safe}.json")
+    df = rules_df.copy() if isinstance(rules_df, pd.DataFrame) else pd.DataFrame()
+    if df.empty:
+        raise ValueError("Rules dataframe is empty")
+    keep = [c for c in ["RAT", "KPI", "direction", "sla", "policy"] if c in df.columns]
+    df = df[keep].copy()
+    obj = {
+        "name": safe,
+        "saved_at": datetime.utcnow().isoformat() + "Z",
+        "rules": df.to_dict(orient="records"),
+    }
+    with open(path, "w", encoding="utf-8") as f:
+        json.dump(obj, f, ensure_ascii=False, indent=2)
+    return path
+def delete_preset(name: str) -> None:
+    d = presets_dir()
+    safe = _safe_name(name)
+    path = os.path.join(d, f"{safe}.json")
+    if os.path.isfile(path):
+        os.remove(path)

process_kpi/kpi_health_check/profiles.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import json
+import os
+from datetime import datetime
+def profiles_dir() -> str:
+    root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+    return os.path.join(root, "data", "kpi_health_check_profiles")
+def _safe_name(name: str) -> str:
+    s = (name or "").strip()
+    s = s.replace("..", "")
+    s = s.replace("/", "_").replace("\\", "_")
+    s = "_".join([p for p in s.split() if p])
+    return s
+def list_profiles() -> list[str]:
+    d = profiles_dir()
+    if not os.path.isdir(d):
+        return []
+    out: list[str] = []
+    for fn in os.listdir(d):
+        if fn.lower().endswith(".json"):
+            out.append(os.path.splitext(fn)[0])
+    return sorted(set(out))
+def load_profile(name: str) -> dict:
+    d = profiles_dir()
+    safe = _safe_name(name)
+    path = os.path.join(d, f"{safe}.json")
+    with open(path, "r", encoding="utf-8") as f:
+        obj = json.load(f)
+    if isinstance(obj, dict) and "config" in obj and isinstance(obj["config"], dict):
+        return obj["config"]
+    if isinstance(obj, dict):
+        return obj
+    return {}
+def save_profile(name: str, config: dict) -> str:
+    safe = _safe_name(name)
+    if not safe:
+        raise ValueError("Profile name is empty")
+    if config is None or not isinstance(config, dict) or not config:
+        raise ValueError("Profile config is empty")
+    d = profiles_dir()
+    os.makedirs(d, exist_ok=True)
+    path = os.path.join(d, f"{safe}.json")
+    obj = {
+        "name": safe,
+        "saved_at": datetime.utcnow().isoformat() + "Z",
+        "config": config,
+    }
+    with open(path, "w", encoding="utf-8") as f:
+        json.dump(obj, f, ensure_ascii=False, indent=2)
+    return path
+def delete_profile(name: str) -> None:
+    d = profiles_dir()
+    safe = _safe_name(name)
+    path = os.path.join(d, f"{safe}.json")
+    if os.path.isfile(path):
+        os.remove(path)

process_kpi/kpi_health_check/rules.py ADDED Viewed

	@@ -0,0 +1,132 @@

+import json
+import os
+import re
+from functools import lru_cache
+def _norm(value: str) -> str:
+    s = str(value or "").strip().lower()
+    s = re.sub(r"[^0-9a-z]+", " ", s)
+    s = re.sub(r"\s+", " ", s).strip()
+    return s
+def _project_root() -> str:
+    return os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
+def _load_curated_rows() -> list[dict]:
+    base_dir = os.path.join(_project_root(), "data", "kpi_health_check_presets")
+    candidates = [
+        os.path.join(base_dir, "presets_1.json"),
+        os.path.join(base_dir, "profil_1.json"),
+    ]
+    path = next((p for p in candidates if os.path.exists(p)), None)
+    if not path:
+        return []
+    try:
+        with open(path, "r", encoding="utf-8") as f:
+            txt = f.read()
+        txt = re.sub(r"\bNaN\b", "null", txt)
+        obj = json.loads(txt)
+        rows = obj.get("rules", []) if isinstance(obj, dict) else []
+        return rows if isinstance(rows, list) else []
+    except Exception:  # noqa: BLE001
+        return []
+@lru_cache(maxsize=1)
+def _curated_rules_map() -> dict[tuple[str, str], dict]:
+    out: dict[tuple[str, str], dict] = {}
+    for r in _load_curated_rows():
+        if not isinstance(r, dict):
+            continue
+        rat = _norm(r.get("RAT"))
+        kpi = _norm(r.get("KPI"))
+        if not rat or not kpi:
+            continue
+        direction = str(r.get("direction") or "").strip()
+        policy_raw = str(r.get("policy") or "").strip().lower()
+        policy = policy_raw if policy_raw in {"enforce", "notify"} else None
+        sla_raw = r.get("sla", None)
+        try:
+            sla = float(sla_raw) if sla_raw is not None else None
+        except Exception:  # noqa: BLE001
+            sla = None
+        out[(rat, kpi)] = {
+            "direction": direction or None,
+            "sla": sla,
+            "policy": policy,
+        }
+    return out
+def _curated_rule(kpi: str, rat: str | None = None) -> dict | None:
+    if not kpi or not rat:
+        return None
+    key = (_norm(rat), _norm(kpi))
+    return _curated_rules_map().get(key)
+def infer_kpi_direction(kpi: str, rat: str | None = None) -> str:
+    curated = _curated_rule(kpi, rat)
+    if curated and curated.get("direction"):
+        return str(curated["direction"])
+    k = _norm(kpi)
+    if _norm(rat) == "twamp" and any(x in k for x in ["lost", "loss"]):
+        return "lower_is_better"
+    lower_is_better = [
+        "drop",
+        "dcr",
+        "blocking",
+        "block",
+        "congestion",
+        "loss",
+        "lost",
+        "discard",
+        "rtwp",
+        "prb usage",
+        "usage",
+        "fail",
+    ]
+    if any(x in k for x in lower_is_better):
+        return "lower_is_better"
+    return "higher_is_better"
+def infer_kpi_sla(kpi: str, direction: str, rat: str | None = None) -> float | None:
+    curated = _curated_rule(kpi, rat)
+    if curated and curated.get("sla") is not None:
+        try:
+            return float(curated["sla"])
+        except Exception:  # noqa: BLE001
+            pass
+    k = _norm(kpi)
+    if _norm(rat) == "twamp" and any(x in k for x in ["lost", "loss"]):
+        return 1000.0
+    if direction == "higher_is_better" and any(
+        x in k for x in ["availability", "cssr", "success", " sr"]
+    ):
+        return 98.0
+    if direction == "lower_is_better" and any(
+        x in k for x in ["drop", "dcr", "blocking", "congestion", "loss", "discard"]
+    ):
+        return 2.0
+    return None
+def infer_kpi_policy(kpi: str, rat: str | None = None) -> str:
+    curated = _curated_rule(kpi, rat)
+    if curated and curated.get("policy"):
+        return str(curated["policy"])
+    k = _norm(kpi)
+    if "distance" in k:
+        return "notify"
+    return "enforce"

process_kpi/lte_kpi_requirements.md ADDED Viewed

	@@ -0,0 +1,46 @@

+# LTE CAPACITY REPORT
+Based on gsm and wcdma exemple let's build LTE capacity report
+## Required Input
+- File : LTE BH report with columns :
+  - PERIOD_START_TIME
+  - MRBTS/SBTS name
+  - LNBTS name
+  - LNCEL name
+  - DN
+  - Cell Avail excl BLU
+  - E-UTRAN Avg PRB usage per TTI DL
+- Number of last day for the analysis
+- Number of days for threshold
+- Availability threshold
+- PRB usage per TTI DL threshold
+- Max difference between PRB usage over cells of the same BTS
+### TASK
+- Pivot KPI in BH report per KPI (Cell Avail excl BLU, E-UTRAN Avg PRB usage per TTI DL)
+- Calculate Average and Max of PRB usage per TTI DL
+- Calculate Average and Max of Cell Avail excl BLU
+- Count number of Days with Cell Avail excl BLU below Availability threshold
+- Count number of Days with PRB usage per TTI DL exceeded PRB usage per TTI DL threshold
+- Create separate DF per sector and band based on LNCEL name
+  - _1_L800: column_name = Sector_1_L800
+  - _2_L800: column_name = Sector_2_L800
+  - _3_L800: column_name = Sector_3_L800
+  - _1_L1800: column_name = Sector_1_L1800
+  - _2_L1800: column_name = Sector_2_L1800
+  - _3_L1800: column_name = Sector_3_L1800
+  - _1_L2300: column_name = Sector_1_L2300
+  - _2_L2300: column_name = Sector_2_L2300
+  - _3_L2300: column_name = Sector_3_L2300
+  - _1_L2600: column_name = Sector_1_L2600
+  - _2_L2600: column_name = Sector_2_L2600
+  - _3_L2600: column_name = Sector_3_L2600
+  - _1S_L1800: column_name = Sector_1S_L1800
+  - _2S_L1800: column_name = Sector_2S_L1800
+  - _3S_L1800: column_name = Sector_3S_L1800
+- Merge DFs per sector LNBTS name
+- Concat dfs per Bands

process_kpi/process_gsm_capacity.py ADDED Viewed

	@@ -0,0 +1,719 @@

+import numpy as np
+import pandas as pd
+from queries.process_gsm import combined_gsm_database
+from utils.check_sheet_exist import execute_checks_sheets_exist
+from utils.convert_to_excel import convert_dfs, save_dataframe
+from utils.kpi_analysis_utils import (
+    GsmAnalysis,
+    GsmCapacity,
+    analyze_sdcch_call_blocking,
+    analyze_tch_abis_fails,
+    analyze_tch_call_blocking,
+    cell_availability_analysis,
+    combine_comments,
+    create_daily_date,
+    create_dfs_per_kpi,
+    create_hourly_date,
+    kpi_naming_cleaning,
+)
+from utils.utils_functions import calculate_distances
+GSM_ANALYSIS_COLUMNS = [
+    "ID_BTS",
+    "site_name",
+    "name",
+    "BSC",
+    "BCF",
+    "BTS",
+    "code",
+    "Region",
+    "adminState",
+    "frequencyBandInUse",
+    "cellId",
+    "band",
+    "site_config_band",
+    "trxRfPower",
+    "BCCH",
+    "Longitude",
+    "Latitude",
+    "TRX_TCH",
+    "MAL_TCH",
+    "amrSegLoadDepTchRateLower",
+    "amrSegLoadDepTchRateUpper",
+    "btsSpLoadDepTchRateLower",
+    "btsSpLoadDepTchRateUpper",
+    "amrWbFrCodecModeSet",
+    "dedicatedGPRScapacity",
+    "defaultGPRScapacity",
+    "number_trx_per_cell",
+    "number_trx_per_bcf",
+    "number_tch_per_cell",
+    "number_sd_per_cell",
+    "number_bcch_per_cell",
+    "number_ccch_per_cell",
+    "number_cbc_per_cell",
+    "number_total_channels_per_cell",
+    "number_signals_per_cell",
+    "hf_rate_coef",
+    "GPRS",
+    "TCH Actual HR%",
+    "Offered Traffic BH",
+    "Max_Traffic BH",
+    "Avg_Traffic BH",
+    "TCH UTILIZATION (@Max Traffic)",
+    "Tch utilization comments",
+    "ErlabngB_value",
+    "Target FR CHs",
+    "Target HR CHs",
+    "Target TCHs",
+    "Target TRXs",
+    "Number of required TRXs",
+    "max_tch_call_blocking_bh",
+    "avg_tch_call_blocking_bh",
+    "number_of_days_with_tch_blocking_exceeded_bh",
+    "tch_call_blocking_bh_comment",
+    "max_sdcch_real_blocking_bh",
+    "avg_sdcch_real_blocking_bh",
+    "number_of_days_with_sdcch_blocking_exceeded_bh",
+    "sdcch_real_blocking_bh_comment",
+    "Average_cell_availability_bh",
+    "number_of_days_exceeding_availability_threshold_bh",
+    "availability_comment_bh",
+    "max_tch_abis_fail_bh",
+    "avg_tch_abis_fail_bh",
+    "number_of_days_with_tch_abis_fail_exceeded_bh",
+    "tch_abis_fail_bh_comment",
+    "Average_cell_availability_daily",
+    "number_of_days_exceeding_availability_threshold_daily",
+    "availability_comment_daily",
+    "max_tch_abis_fail_daily",
+    "avg_tch_abis_fail_daily",
+    "number_of_days_with_tch_abis_fail_exceeded_daily",
+    "tch_abis_fail_daily_comment",
+    "BH Congestion status",
+    "operational_comment",
+    "Final comment",
+    "Final comment summary",
+]
+OPERATIONAL_NEIGHBOURS_COLUMNS = [
+    "ID_BTS",
+    "name",
+    "operational_comment",
+    "BH Congestion status",
+    "Longitude",
+    "Latitude",
+]
+GSM_COLUMNS = [
+    "ID_BTS",
+    "site_name",
+    "name",
+    "BSC",
+    "BCF",
+    "BTS",
+    "code",
+    "Region",
+    "adminState",
+    "frequencyBandInUse",
+    "amrSegLoadDepTchRateLower",
+    "amrSegLoadDepTchRateUpper",
+    "btsSpLoadDepTchRateLower",
+    "btsSpLoadDepTchRateUpper",
+    "amrWbFrCodecModeSet",
+    "dedicatedGPRScapacity",
+    "defaultGPRScapacity",
+    "cellId",
+    "band",
+    "site_config_band",
+    "trxRfPower",
+    "BCCH",
+    "number_trx_per_cell",
+    "number_trx_per_bcf",
+    "TRX_TCH",
+    "MAL_TCH",
+    "Longitude",
+    "Latitude",
+]
+TRX_COLUMNS = [
+    "ID_BTS",
+    "number_tch_per_cell",
+    "number_sd_per_cell",
+    "number_bcch_per_cell",
+    "number_ccch_per_cell",
+    "number_cbc_per_cell",
+    "number_total_channels_per_cell",
+    "number_signals_per_cell",
+]
+KPI_COLUMNS = [
+    "date",
+    "BTS_name",
+    "TCH_availability_ratio",
+    "2G_Carried_Traffic",
+    "TCH_call_blocking",
+    "TCH_ABIS_FAIL_CALL_c001084",
+    "SDCCH_real_blocking",
+]
+BH_COLUMNS_FOR_CAPACITY = [
+    "Max_Traffic BH",
+    "Avg_Traffic BH",
+    "max_tch_call_blocking_bh",
+    "avg_tch_call_blocking_bh",
+    "number_of_days_with_tch_blocking_exceeded_bh",
+    "tch_call_blocking_bh_comment",
+    "max_sdcch_real_blocking_bh",
+    "avg_sdcch_real_blocking_bh",
+    "number_of_days_with_sdcch_blocking_exceeded_bh",
+    "sdcch_real_blocking_bh_comment",
+    "Average_cell_availability_bh",
+    "number_of_days_exceeding_availability_threshold_bh",
+    "availability_comment_bh",
+    "max_tch_abis_fail_bh",
+    "avg_tch_abis_fail_bh",
+    "number_of_days_with_tch_abis_fail_exceeded_bh",
+    "tch_abis_fail_bh_comment",
+]
+DAILY_COLUMNS_FOR_CAPACITY = [
+    "Average_cell_availability_daily",
+    "number_of_days_exceeding_availability_threshold_daily",
+    "availability_comment_daily",
+    "max_tch_abis_fail_daily",
+    "avg_tch_abis_fail_daily",
+    "number_of_days_with_tch_abis_fail_exceeded_daily",
+    "tch_abis_fail_daily_comment",
+]
+def bh_traffic_analysis(
+    df: pd.DataFrame,
+    number_of_kpi_days: int,
+) -> pd.DataFrame:
+    result_df = df.copy()
+    last_days_df: pd.DataFrame = result_df.iloc[:, -number_of_kpi_days:]
+    # last_days_df = last_days_df.fillna(0)
+    result_df["Avg_Traffic BH"] = last_days_df.mean(axis=1).round(2)
+    result_df["Max_Traffic BH"] = last_days_df.max(axis=1)
+    return result_df
+def bh_dfs_per_kpi(
+    df: pd.DataFrame,
+    number_of_kpi_days: int = 7,
+    tch_blocking_threshold: int = 0.50,
+    sdcch_blocking_threshold: int = 0.50,
+    number_of_threshold_days: int = 3,
+    tch_abis_fails_threshold: int = 10,
+    availability_threshold: int = 95,
+) -> pd.DataFrame:
+    """
+    Create pivoted DataFrames for each KPI and perform analysis.
+    Args:
+        df: DataFrame containing KPI data
+        number_of_kpi_days: Number of days to analyze
+        threshold: Utilization threshold percentage for flagging
+        number_of_threshold_days: Minimum days above threshold to flag for upgrade
+    Returns:
+        DataFrame with combined analysis results
+    """
+    pivoted_kpi_dfs = {}
+    pivoted_kpi_dfs = create_dfs_per_kpi(
+        df=df,
+        pivot_date_column="date",
+        pivot_name_column="BTS_name",
+        kpi_columns_from=2,
+    )
+    tch_call_blocking_df: pd.DataFrame = pivoted_kpi_dfs["TCH_call_blocking"]
+    sdcch_real_blocking_df: pd.DataFrame = pivoted_kpi_dfs["SDCCH_real_blocking"]
+    Carried_Traffic_df: pd.DataFrame = pivoted_kpi_dfs["2G_Carried_Traffic"]
+    tch_availability_ratio_df: pd.DataFrame = pivoted_kpi_dfs["TCH_availability_ratio"]
+    tch_abis_fails_df: pd.DataFrame = pivoted_kpi_dfs["TCH_ABIS_FAIL_CALL_c001084"]
+    # ANALISYS
+    tch_call_blocking_df = analyze_tch_call_blocking(
+        df=tch_call_blocking_df,
+        number_of_kpi_days=number_of_kpi_days,
+        number_of_threshold_days=number_of_threshold_days,
+        tch_blocking_threshold=tch_blocking_threshold,
+        analysis_type="BH",
+    )
+    sdcch_real_blocking_df = analyze_sdcch_call_blocking(
+        df=sdcch_real_blocking_df,
+        number_of_kpi_days=number_of_kpi_days,
+        sdcch_blocking_threshold=sdcch_blocking_threshold,
+        number_of_threshold_days=number_of_threshold_days,
+        analysis_type="BH",
+    )
+    Carried_Traffic_df = bh_traffic_analysis(
+        df=Carried_Traffic_df,
+        number_of_kpi_days=number_of_kpi_days,
+    )
+    tch_abis_fails_df = analyze_tch_abis_fails(
+        df=tch_abis_fails_df,
+        number_of_kpi_days=number_of_kpi_days,
+        tch_abis_fails_threshold=tch_abis_fails_threshold,
+        number_of_threshold_days=number_of_threshold_days,
+        analysis_type="BH",
+    )
+    tch_availability_ratio_df = cell_availability_analysis(
+        df=tch_availability_ratio_df,
+        days=number_of_kpi_days,
+        availability_threshold=availability_threshold,
+        analysis_type="BH",
+    )
+    bh_kpi_df = pd.concat(
+        [
+            Carried_Traffic_df,
+            tch_call_blocking_df,
+            sdcch_real_blocking_df,
+            tch_availability_ratio_df,
+            tch_abis_fails_df,
+        ],
+        axis=1,
+    )
+    return bh_kpi_df
+def analyse_bh_data(
+    bh_report_path: str,
+    number_of_kpi_days: int,
+    tch_blocking_threshold: int,
+    sdcch_blocking_threshold: int,
+    number_of_threshold_days: int,
+    tch_abis_fails_threshold: int,
+    availability_threshold: int,
+) -> pd.DataFrame:
+    df = pd.read_csv(bh_report_path, delimiter=";")
+    df = kpi_naming_cleaning(df)
+    df = create_hourly_date(df)
+    df = df[KPI_COLUMNS]
+    df = bh_dfs_per_kpi(
+        df=df,
+        number_of_kpi_days=number_of_kpi_days,
+        tch_blocking_threshold=tch_blocking_threshold,
+        sdcch_blocking_threshold=sdcch_blocking_threshold,
+        number_of_threshold_days=number_of_threshold_days,
+        tch_abis_fails_threshold=tch_abis_fails_threshold,
+        availability_threshold=availability_threshold,
+    )
+    bh_df_for_capacity = df.copy()
+    bh_df_for_capacity = bh_df_for_capacity[BH_COLUMNS_FOR_CAPACITY]
+    bh_df_for_capacity = bh_df_for_capacity.reset_index()
+    # If columns have multiple levels (MultiIndex), flatten them
+    if isinstance(bh_df_for_capacity.columns, pd.MultiIndex):
+        bh_df_for_capacity.columns = [
+            "_".join([str(el) for el in col if el])
+            for col in bh_df_for_capacity.columns.values
+        ]
+    # bh_df_for_capacity = bh_df_for_capacity.reset_index()
+    # rename Bts_name to name
+    bh_df_for_capacity = bh_df_for_capacity.rename(columns={"BTS_name": "name"})
+    return [bh_df_for_capacity, df]
+def daily_dfs_per_kpi(
+    df: pd.DataFrame,
+    number_of_kpi_days: int = 7,
+    availability_threshold: int = 95,
+    number_of_threshold_days: int = 3,
+    tch_abis_fails_threshold: int = 10,
+    sdcch_blocking_threshold: int = 0.5,
+    tch_blocking_threshold: int = 0.5,
+) -> pd.DataFrame:
+    """
+    Create pivoted DataFrames for each KPI and perform analysis.
+    Args:
+        df: DataFrame containing KPI data
+        number_of_kpi_days: Number of days to analyze
+        threshold: Utilization threshold percentage for flagging
+        number_of_threshold_days: Minimum days above threshold to flag for upgrade
+    Returns:
+        DataFrame with combined analysis results
+    """
+    pivoted_kpi_dfs = {}
+    pivoted_kpi_dfs = create_dfs_per_kpi(
+        df=df,
+        pivot_date_column="date",
+        pivot_name_column="BTS_name",
+        kpi_columns_from=2,
+    )
+    tch_call_blocking_df: pd.DataFrame = pivoted_kpi_dfs["TCH_call_blocking"]
+    sdcch_real_blocking_df: pd.DataFrame = pivoted_kpi_dfs["SDCCH_real_blocking"]
+    Carried_Traffic_df: pd.DataFrame = pivoted_kpi_dfs["2G_Carried_Traffic"]
+    tch_availability_ratio_df: pd.DataFrame = pivoted_kpi_dfs["TCH_availability_ratio"]
+    tch_abis_fails_df: pd.DataFrame = pivoted_kpi_dfs["TCH_ABIS_FAIL_CALL_c001084"]
+    tch_availability_ratio_df = cell_availability_analysis(
+        df=tch_availability_ratio_df,
+        days=number_of_kpi_days,
+        availability_threshold=availability_threshold,
+    )
+    sdcch_real_blocking_df = analyze_sdcch_call_blocking(
+        df=sdcch_real_blocking_df,
+        number_of_kpi_days=number_of_kpi_days,
+        sdcch_blocking_threshold=sdcch_blocking_threshold,
+        number_of_threshold_days=number_of_threshold_days,
+        analysis_type="Daily",
+    )
+    tch_call_blocking_df = analyze_tch_call_blocking(
+        df=tch_call_blocking_df,
+        number_of_kpi_days=number_of_kpi_days,
+        number_of_threshold_days=number_of_threshold_days,
+        tch_blocking_threshold=tch_blocking_threshold,
+        analysis_type="Daily",
+    )
+    tch_abis_fails_df = analyze_tch_abis_fails(
+        df=tch_abis_fails_df,
+        number_of_kpi_days=number_of_kpi_days,
+        tch_abis_fails_threshold=tch_abis_fails_threshold,
+        number_of_threshold_days=number_of_threshold_days,
+        analysis_type="Daily",
+    )
+    daily_kpi_df = pd.concat(
+        [
+            tch_availability_ratio_df,
+            Carried_Traffic_df,
+            tch_call_blocking_df,
+            sdcch_real_blocking_df,
+            tch_abis_fails_df,
+        ],
+        axis=1,
+    )
+    daily_kpi_df = combine_comments(
+        daily_kpi_df,
+        "availability_comment_daily",
+        "tch_abis_fail_daily_comment",
+        "sdcch_real_blocking_daily_comment",
+        new_column="sdcch_comments",
+    )
+    daily_kpi_df = combine_comments(
+        daily_kpi_df,
+        "availability_comment_daily",
+        "tch_abis_fail_daily_comment",
+        "tch_call_blocking_daily_comment",
+        new_column="tch_comments",
+    )
+    return daily_kpi_df
+def analyse_daily_data(
+    daily_report_path: str,
+    number_of_kpi_days: int,
+    tch_abis_fails_threshold: int,
+    availability_threshold: int,
+    number_of_threshold_days: int,
+    sdcch_blocking_threshold: int,
+    tch_blocking_threshold: int,
+) -> pd.DataFrame:
+    df = pd.read_csv(daily_report_path, delimiter=";")
+    df = kpi_naming_cleaning(df)
+    df = create_daily_date(df)
+    df = df[KPI_COLUMNS]
+    df = daily_dfs_per_kpi(
+        df=df,
+        number_of_kpi_days=number_of_kpi_days,
+        availability_threshold=availability_threshold,
+        tch_abis_fails_threshold=tch_abis_fails_threshold,
+        number_of_threshold_days=number_of_threshold_days,
+        sdcch_blocking_threshold=sdcch_blocking_threshold,
+        tch_blocking_threshold=tch_blocking_threshold,
+    )
+    daily_df_for_capacity = df.copy()
+    daily_df_for_capacity = daily_df_for_capacity[DAILY_COLUMNS_FOR_CAPACITY]
+    daily_df_for_capacity = daily_df_for_capacity.reset_index()
+    if isinstance(daily_df_for_capacity.columns, pd.MultiIndex):
+        daily_df_for_capacity.columns = [
+            "_".join([str(el) for el in col if el])
+            for col in daily_df_for_capacity.columns.values
+        ]
+    # Rename "BTS_name" to "name"
+    daily_df_for_capacity = daily_df_for_capacity.rename(columns={"BTS_name": "name"})
+    return daily_df_for_capacity, df
+def get_gsm_databases(dump_path: str) -> pd.DataFrame:
+    dfs = combined_gsm_database(dump_path)
+    bts_df: pd.DataFrame = dfs[0]
+    trx_df: pd.DataFrame = dfs[2]
+    # Clean GSM df
+    bts_df = bts_df[GSM_COLUMNS]
+    trx_df = trx_df[TRX_COLUMNS]
+    # Remove duplicate in TRX df
+    trx_df = trx_df.drop_duplicates(subset=["ID_BTS"])
+    gsm_df = pd.merge(bts_df, trx_df, on="ID_BTS", how="left")
+    # add hf_rate_coef
+    gsm_df["hf_rate_coef"] = gsm_df["amrSegLoadDepTchRateLower"].map(
+        GsmAnalysis.hf_rate_coef
+    )
+    # Add "GPRS" colomn equal to (dedicatedGPRScapacity * number_tch_per_cell)/100
+    gsm_df["GPRS"] = (
+        gsm_df["dedicatedGPRScapacity"] * gsm_df["number_tch_per_cell"]
+    ) / 100
+    # "TCH Actual HR%" equal to "number of TCH" multiplyed by "Coef HF rate"
+    gsm_df["TCH Actual HR%"] = gsm_df["number_tch_per_cell"] * gsm_df["hf_rate_coef"]
+    # Remove empty rows
+    gsm_df = gsm_df.dropna(subset=["TCH Actual HR%"])
+    # Get "Offered Traffic BH" by mapping approximate "TCH Actual HR%" to 2G analysis_utility "erlangB" dict
+    gsm_df["Offered Traffic BH"] = gsm_df["TCH Actual HR%"].apply(
+        lambda x: GsmAnalysis.erlangB_table.get(int(x), 0)
+    )
+    return gsm_df
+def get_operational_neighbours(distance: int) -> pd.DataFrame:
+    operational_df: pd.DataFrame = GsmCapacity.operational_neighbours_df
+    operational_df = operational_df[
+        ["ID_BTS", "name", "operational_comment", "Longitude", "Latitude"]
+    ]
+    # keep row only if column "operational_comment" is not "Operational is OK"
+    operational_df = operational_df[
+        operational_df["operational_comment"] != "Operational is OK"
+    ]
+    operational_df = operational_df[
+        operational_df[["Latitude", "Longitude"]].notna().all(axis=1)
+    ]
+    # Rename all columns in operational_df by adding "Dataset2_" prefix
+    operational_df = operational_df.add_prefix("Dataset2_")
+    congested_df: pd.DataFrame = GsmCapacity.operational_neighbours_df
+    congested_df = congested_df[
+        ["ID_BTS", "name", "BH Congestion status", "Longitude", "Latitude"]
+    ]
+    # Remove rows where "BH Congestion status" is empty or NaN
+    congested_df = congested_df[
+        congested_df["BH Congestion status"].notna()
+        & congested_df["BH Congestion status"].astype(str).str.len().astype(bool)
+    ]
+    # Remove rows where "BH Congestion status" is "nan, nan"
+    congested_df = congested_df[congested_df["BH Congestion status"] != "nan, nan"]
+    # Remove rows where Latitude and Longitude are empty
+    congested_df = congested_df[
+        congested_df[["Latitude", "Longitude"]].notna().all(axis=1)
+    ]
+    # Rename all columns in congested_df by adding "Dataset1_" prefix
+    congested_df = congested_df.add_prefix("Dataset1_")
+    distances_dfs = calculate_distances(
+        congested_df,
+        operational_df,
+        "Dataset1_ID_BTS",
+        "Dataset1_Latitude",
+        "Dataset1_Longitude",
+        "Dataset2_ID_BTS",
+        "Dataset2_Latitude",
+        "Dataset2_Longitude",
+    )
+    distances_df = distances_dfs[0]
+    df1 = distances_df[distances_df["Distance_km"] <= distance]
+    # Rename all columns in df1
+    df1 = df1.rename(
+        columns={
+            "Dataset1_ID_BTS": "Source_ID_BTS",
+            "Dataset1_name": "Source_name",
+            "Dataset1_BH Congestion status": "Source_BH Congestion status",
+            "Dataset1_Longitude": "Source_Longitude",
+            "Dataset1_Latitude": "Source_Latitude",
+            "Dataset2_ID_BTS_Dataset2": "Neighbour_ID_BTS",
+            "Dataset2_name_Dataset2": "Neighbour_name",
+            "Dataset2_operational_comment_Dataset2": "Neighbour_operational_comment",
+            "Dataset2_Longitude_Dataset2": "Neighbour_Longitude",
+            "Dataset2_Latitude_Dataset2": "Neighbour_Latitude",
+        }
+    )
+    # Remove rows if Source_name = Neighbour_name
+    df1 = df1[df1["Source_name"] != df1["Neighbour_name"]]
+    # Reset index
+    df1 = df1.reset_index(drop=True)
+    return df1
+def analyze_gsm_data(
+    dump_path: str,
+    daily_report_path: str,
+    bh_report_path: str,
+    number_of_kpi_days: int,
+    number_of_threshold_days: int,
+    availability_threshold: int,
+    tch_abis_fails_threshold: int,
+    sdcch_blocking_threshold: float,
+    tch_blocking_threshold: float,
+    max_traffic_threshold: int,
+    operational_neighbours_distance: int,
+):
+    GsmCapacity.operational_neighbours_df = None
+    daily_kpi_dfs: pd.DataFrame = analyse_daily_data(
+        daily_report_path=daily_report_path,
+        number_of_kpi_days=number_of_kpi_days,
+        availability_threshold=availability_threshold,
+        tch_abis_fails_threshold=tch_abis_fails_threshold,
+        number_of_threshold_days=number_of_threshold_days,
+        sdcch_blocking_threshold=sdcch_blocking_threshold,
+        tch_blocking_threshold=tch_blocking_threshold,
+    )
+    gsm_database_df: pd.DataFrame = get_gsm_databases(dump_path)
+    bh_kpi_dfs = analyse_bh_data(
+        bh_report_path=bh_report_path,
+        number_of_kpi_days=number_of_kpi_days,
+        tch_blocking_threshold=tch_blocking_threshold,
+        sdcch_blocking_threshold=sdcch_blocking_threshold,
+        number_of_threshold_days=number_of_threshold_days,
+        tch_abis_fails_threshold=tch_abis_fails_threshold,
+        availability_threshold=availability_threshold,
+    )
+    bh_kpi_df = bh_kpi_dfs[0]
+    bh_kpi_full_df = bh_kpi_dfs[1]
+    daily_kpi_df = daily_kpi_dfs[0]
+    daily_kpi_full_df = daily_kpi_dfs[1]
+    gsm_analysis_df = gsm_database_df.merge(bh_kpi_df, on="name", how="left")
+    gsm_analysis_df = gsm_analysis_df.merge(daily_kpi_df, on="name", how="left")
+    # "TCH UTILIZATION (@Max Traffic)" equal to "(Max_Trafic" divided by "Offered Traffic BH)*100"
+    gsm_analysis_df["TCH UTILIZATION (@Max Traffic)"] = (
+        gsm_analysis_df["Max_Traffic BH"] / gsm_analysis_df["Offered Traffic BH"]
+    ) * 100
+    # Add column "Tch utilization comments" : if "TCH UTILIZATION (@Max Traffic)" exceeded it's threshold then "Tch utilization exceeded threshold else None
+    gsm_analysis_df["Tch utilization comments"] = np.where(
+        gsm_analysis_df["TCH UTILIZATION (@Max Traffic)"] > max_traffic_threshold,
+        "Tch utilization exceeded threshold",
+        None,
+    )
+    # Add "BH Congestion status" : concatenate "Tch utilization comments" + "tch_call_blocking_bh_comment" + "sdcch_real_blocking_bh_comment"
+    gsm_analysis_df = combine_comments(
+        gsm_analysis_df,
+        "Tch utilization comments",
+        "tch_call_blocking_bh_comment",
+        "sdcch_real_blocking_bh_comment",
+        new_column="BH Congestion status",
+    )
+    # Add "ERLANGB value" =MAX TRAFFIC/(1-(MAX TCH call blocking/200))
+    gsm_analysis_df["ErlabngB_value"] = gsm_analysis_df["Max_Traffic BH"] / (
+        1 - (gsm_analysis_df["max_tch_call_blocking_bh"] / 200)
+    )
+    # - Get "Target FR CHs" by mapping "ERLANG value" to 2G analysis_utility "erlangB" dict
+    gsm_analysis_df["Target FR CHs"] = gsm_analysis_df["ErlabngB_value"].apply(
+        lambda x: GsmAnalysis.erlangB_table.get(int(x) if pd.notnull(x) else 0, 0)
+    )
+    # "Target HR CHs" equal to  "Target FR CHs" * 2
+    gsm_analysis_df["Target HR CHs"] = gsm_analysis_df["Target FR CHs"] * 2
+    # - Target TCHs equal to Target HR CHs + Signal + GPRS + SDCCH
+    gsm_analysis_df["Target TCHs"] = (
+        gsm_analysis_df["Target HR CHs"]
+        + gsm_analysis_df["number_signals_per_cell"]
+        + gsm_analysis_df["GPRS"]
+        + gsm_analysis_df["number_sd_per_cell"]
+    )
+    # "Target TRXs" equal to roundup(Target TCHs/8)
+    gsm_analysis_df["Target TRXs"] = np.ceil(
+        gsm_analysis_df["Target TCHs"] / 8
+    )  # df["Target TCHs"] / 8
+    # "Number of required TRXs" equal to difference between "Target TRXs" and "number_trx_per_cell"
+    gsm_analysis_df["Number of required TRXs"] = (
+        gsm_analysis_df["Target TRXs"] - gsm_analysis_df["number_trx_per_cell"]
+    )
+    # if "availability_comment_daily" equal to "Down Site" then "Down Site"
+    # if "availability_comment_daily" is not "Availability OK" and  "tch_abis_fail_daily_comment" equal to "tch abis fail exceeded threshold" then "Availability and TX issues"
+    # if "availability_comment_daily" is not "Availability OK" and  "tch_abis_fail_daily_comment" is empty then "Availability issues"
+    # if "availability_comment_daily" is "Availability OK" and  "tch_abis_fail_daily_comment" equal to "tch abis fail exceeded threshold" then "TX issues"
+    # Else "Operational is OK"
+    gsm_analysis_df["operational_comment"] = np.select(
+        [
+            gsm_analysis_df["availability_comment_daily"] == "Down Site",  # 1
+            (gsm_analysis_df["availability_comment_daily"] != "Availability OK")
+            & (
+                gsm_analysis_df["tch_abis_fail_daily_comment"]
+                == "tch abis fail exceeded threshold"
+            ),  # 2
+            (gsm_analysis_df["availability_comment_daily"] != "Availability OK")
+            & pd.isna(gsm_analysis_df["tch_abis_fail_daily_comment"]),  # 3
+            (gsm_analysis_df["availability_comment_daily"] == "Availability OK")
+            & (
+                gsm_analysis_df["tch_abis_fail_daily_comment"]
+                == "tch abis fail exceeded threshold"
+            ),  # 4
+        ],
+        [
+            "Down Site",  # 1
+            "Availability and TX issues",  # 2
+            "Availability issues",  # 3
+            "TX issues",  # 4
+        ],
+        default="Operational is OK",
+    )
+    # Add "Final comment" with "BH Congestion status" + "operational_comment"
+    gsm_analysis_df = combine_comments(
+        gsm_analysis_df,
+        "BH Congestion status",
+        "operational_comment",
+        new_column="Final comment",
+    )
+    # Map the final comment using final_comment_mapping
+    gsm_analysis_df["Final comment summary"] = gsm_analysis_df["Final comment"].map(
+        GsmCapacity.final_comment_mapping
+    )
+    gsm_analysis_df = gsm_analysis_df[GSM_ANALYSIS_COLUMNS]
+    GsmCapacity.operational_neighbours_df = gsm_analysis_df[
+        OPERATIONAL_NEIGHBOURS_COLUMNS
+    ]
+    distance_df = get_operational_neighbours(operational_neighbours_distance)
+    return [gsm_analysis_df, bh_kpi_full_df, daily_kpi_full_df, distance_df]
+    # return [gsm_analysis_df, bh_kpi_full_df, daily_kpi_full_df]

process_kpi/process_lcg_capacity.py ADDED Viewed

	@@ -0,0 +1,286 @@

+import numpy as np
+import pandas as pd
+from utils.kpi_analysis_utils import (
+    analyze_lcg_utilization,
+    combine_comments,
+    create_daily_date,
+    create_dfs_per_kpi,
+    kpi_naming_cleaning,
+)
+from utils.utils_vars import get_physical_db
+lcg_comments_mapping = {
+    "2": "No Congestion",
+    "1": "No Congestion",
+    "lcg1 exceeded threshold, lcg2 exceeded threshold, 2": "Need BB SU upgrage",
+    "lcg1 exceeded threshold, 2": "Need LCG balancing",
+    "lcg1 exceeded threshold,  1": "Need BB SU upgrage",
+    "lcg2 exceeded threshold, 2": "Need LCG balancing",
+}
+KPI_COLUMNS = [
+    "date",
+    "WBTS_name",
+    "lcg_id",
+    "BB_SU_LCG_MAX_R",
+]
+LCG_ANALYSIS_COLUMNS = [
+    "WBTS_name",
+    "lcg1_utilisation",
+    "avg_lcg1",
+    "max_lcg1",
+    "number_of_days_with_lcg1_exceeded",
+    "lcg1_comment",
+    "lcg2_utilisation",
+    "avg_lcg2",
+    "max_lcg2",
+    "number_of_days_with_lcg2_exceeded",
+    "lcg2_comment",
+    "difference_between_lcgs",
+    "difference_between_lcgs_comment",
+    "lcg_comment",
+    "number_of_lcg",
+    "final_comments",
+]
+def lcg_kpi_analysis(
+    df,
+    num_last_days,
+    num_threshold_days,
+    lcg_utilization_threshold,
+    difference_between_lcgs,
+) -> pd.DataFrame:
+    """
+    Analyze LCG capacity data.
+    Args:
+        df: DataFrame containing LCG capacity data
+        num_last_days: Number of days for analysis
+        num_threshold_days: Minimum days above threshold to flag for upgrade
+        lcg_utilization_threshold: Utilization threshold percentage for flagging
+        difference_between_lcgs: Difference between LCGs for flagging
+    Returns:
+        Processed DataFrame with LCG capacity analysis results
+    """
+    lcg1_df = df[df["lcg_id"] == 1]
+    lcg2_df = df[df["lcg_id"] == 2]
+    pivoted_kpi_dfs = create_dfs_per_kpi(
+        df=df,
+        pivot_date_column="date",
+        pivot_name_column="WBTS_name",
+        kpi_columns_from=2,
+    )
+    pivoted_lcg1_df = create_dfs_per_kpi(
+        df=lcg1_df,
+        pivot_date_column="date",
+        pivot_name_column="WBTS_name",
+        kpi_columns_from=2,
+    )
+    pivoted_lcg2_df = create_dfs_per_kpi(
+        df=lcg2_df,
+        pivot_date_column="date",
+        pivot_name_column="WBTS_name",
+        kpi_columns_from=2,
+    )
+    # BB_SU_LCG_MAX_R to have all site with LCG 1 and/ or LCG 2
+    BB_SU_LCG_MAX_R_df = pivoted_kpi_dfs["BB_SU_LCG_MAX_R"]
+    pivoted_lcg1_df = pivoted_lcg1_df["BB_SU_LCG_MAX_R"]
+    pivoted_lcg2_df = pivoted_lcg2_df["BB_SU_LCG_MAX_R"]
+    # rename column
+    pivoted_lcg1_df = pivoted_lcg1_df.rename(
+        columns={"BB_SU_LCG_MAX_R": "lcg1_utilisation"}
+    )
+    pivoted_lcg2_df = pivoted_lcg2_df.rename(
+        columns={"BB_SU_LCG_MAX_R": "lcg2_utilisation"}
+    )
+    # analyze lcg utilization for each site per number_of_kpi_days and number_of_threshold_days
+    pivoted_lcg1_df = analyze_lcg_utilization(
+        df=pivoted_lcg1_df,
+        number_of_kpi_days=num_last_days,
+        number_of_threshold_days=num_threshold_days,
+        kpi_threshold=lcg_utilization_threshold,
+        kpi_column_name="lcg1",
+    )
+    pivoted_lcg2_df = analyze_lcg_utilization(
+        df=pivoted_lcg2_df,
+        number_of_kpi_days=num_last_days,
+        number_of_threshold_days=num_threshold_days,
+        kpi_threshold=lcg_utilization_threshold,
+        kpi_column_name="lcg2",
+    )
+    kpi_df = pd.concat(
+        [
+            BB_SU_LCG_MAX_R_df,
+            pivoted_lcg1_df,
+            pivoted_lcg2_df,
+        ],
+        axis=1,
+    )
+    kpi_df = kpi_df.reset_index()
+    # Number of available lcgs
+    # kpi_df = pd.merge(kpi_df, available_lcgs_df, on="WBTS_name", how="left")
+    # calculate difference between lcg1 and lcg2
+    kpi_df["difference_between_lcgs"] = kpi_df[["avg_lcg1", "avg_lcg2"]].apply(
+        lambda row: max(row) - min(row), axis=1
+    )
+    # flag if difference between lcg1 and lcg2 is above threshold
+    kpi_df["difference_between_lcgs_comment"] = np.where(
+        kpi_df["difference_between_lcgs"] > difference_between_lcgs,
+        "difference between lcgs exceeded threshold",
+        None,
+    )
+    # Combine comments
+    kpi_df = combine_comments(
+        kpi_df,
+        "lcg1_comment",
+        "lcg2_comment",
+        # "difference_between_lcgs_comment",
+        new_column="lcg_comment",
+    )
+    # Replace if "lcg_comment" contains "nan" and ", nan" and "nan, " with None
+    kpi_df["lcg_comment"] = kpi_df["lcg_comment"].replace("nan", None)
+    # Remove "nan" from comma-separated strings
+    kpi_df["lcg_comment"] = (
+        kpi_df["lcg_comment"].str.replace(r"\bnan\b,?\s?", "", regex=True).str.strip()
+    )
+    kpi_df["number_of_lcg"] = np.where(
+        kpi_df["avg_lcg1"].notna() & kpi_df["avg_lcg2"].notna(),
+        2,
+        np.where(kpi_df["avg_lcg1"].notna() | kpi_df["avg_lcg2"].notna(), 1, 0),
+    )
+    # Combine comments
+    kpi_df = combine_comments(
+        kpi_df,
+        "lcg_comment",
+        "number_of_lcg",
+        new_column="final_comments",
+    )
+    kpi_df["final_comments"] = kpi_df["final_comments"].apply(
+        lambda x: lcg_comments_mapping.get(x, x)
+    )
+    kpi_df = kpi_df[LCG_ANALYSIS_COLUMNS]
+    lcg_analysis_df = kpi_df.copy()
+    lcg_analysis_df = lcg_analysis_df[
+        [
+            "WBTS_name",
+            "avg_lcg1",
+            "max_lcg1",
+            "number_of_days_with_lcg1_exceeded",
+            "lcg1_comment",
+            "avg_lcg2",
+            "max_lcg2",
+            "number_of_days_with_lcg2_exceeded",
+            "lcg2_comment",
+            "difference_between_lcgs",
+            "final_comments",
+        ]
+    ]
+    lcg_analysis_df = lcg_analysis_df.droplevel(level=1, axis=1)
+    # Remove row if code less than 5 characters
+    lcg_analysis_df = lcg_analysis_df[lcg_analysis_df["WBTS_name"].str.len() >= 5]
+    # Add code
+    lcg_analysis_df["code"] = lcg_analysis_df["WBTS_name"].str.split("_").str[0]
+    lcg_analysis_df["code"] = (
+        pd.to_numeric(lcg_analysis_df["code"], errors="coerce").fillna(0).astype(int)
+    )
+    lcg_analysis_df["Region"] = (
+        lcg_analysis_df["WBTS_name"].str.split("_").str[1:2].str.join("_")
+    )
+    lcg_analysis_df["Region"] = lcg_analysis_df["Region"].fillna("UNKNOWN")
+    # move code to the first column
+    lcg_analysis_df = lcg_analysis_df[
+        ["code", "Region"]
+        + [col for col in lcg_analysis_df if col != "code" and col != "Region"]
+    ]
+    # Load physical database
+    physical_db: pd.DataFrame = get_physical_db()
+    # Convert code_sector to code
+    physical_db["code"] = physical_db["Code_Sector"].str.split("_").str[0]
+    # remove duplicates
+    physical_db = physical_db.drop_duplicates(subset="code")
+    # keep only code and longitude and latitude
+    physical_db = physical_db[["code", "Longitude", "Latitude"]]
+    physical_db["code"] = (
+        pd.to_numeric(physical_db["code"], errors="coerce").fillna(0).astype(int)
+    )
+    lcg_analysis_df = pd.merge(
+        lcg_analysis_df,
+        physical_db,
+        on="code",
+        how="left",
+    )
+    return [lcg_analysis_df, kpi_df]
+def load_and_process_lcg_data(
+    uploaded_file,
+    num_last_days,
+    num_threshold_days,
+    lcg_utilization_threshold,
+    difference_between_lcgs,
+) -> pd.DataFrame:
+    """Load and process data for LCG capacity analysis."""
+    try:
+        # Load data
+        df = pd.read_csv(uploaded_file, delimiter=";")
+        if df.empty:
+            raise ValueError("Uploaded file is empty")
+        df = kpi_naming_cleaning(df)
+        df = create_daily_date(df)
+        # Validate required columns
+        missing_cols = [col for col in KPI_COLUMNS if col not in df.columns]
+        if missing_cols:
+            raise ValueError(f"Missing required columns: {', '.join(missing_cols)}")
+        df = df[KPI_COLUMNS]
+        # Process the data
+        dfs = lcg_kpi_analysis(
+            df,
+            num_last_days,
+            num_threshold_days,
+            lcg_utilization_threshold,
+            difference_between_lcgs,
+        )
+        return dfs
+    except Exception as e:
+        # Log the error and re-raise with a user-friendly message
+        error_msg = f"Error processing LCG data: {str(e)}"
+        st.error(error_msg)
+        raise

process_kpi/process_lte_capacity.py ADDED Viewed

	@@ -0,0 +1,528 @@

+import numpy as np
+import pandas as pd
+from queries.process_lte import process_lte_data
+from utils.convert_to_excel import save_dataframe
+from utils.kpi_analysis_utils import (
+    LteCapacity,
+    analyze_prb_usage,
+    cell_availability_analysis,
+    create_dfs_per_kpi,
+    create_hourly_date,
+    kpi_naming_cleaning,
+)
+LTE_ANALYSIS_COLUMNS = [
+    "code",
+    "code_sector",
+    "Region",
+    "site_config_band",
+    "Longitude",
+    "Latitude",
+    "LNCEL_name_l800",
+    "LNCEL_name_l1800",
+    "LNCEL_name_l2300",
+    "LNCEL_name_l2600",
+    "LNCEL_name_l1800s",
+    "avg_prb_usage_bh_l800",
+    "avg_prb_usage_bh_l1800",
+    "avg_prb_usage_bh_l2300",
+    "avg_prb_usage_bh_l2600",
+    "avg_prb_usage_bh_l1800s",
+    "avg_prb_usage_bh_l800_2nd",
+    "avg_prb_usage_bh_l1800_2nd",
+    "avg_prb_usage_bh_l2300_2nd",
+    "avg_prb_usage_bh_l2600_2nd",
+    "avg_prb_usage_bh_l1800s_2nd",
+    "avg_act_ues_l800",
+    "avg_act_ues_l1800",
+    "avg_act_ues_l2300",
+    "avg_act_ues_l2600",
+    "avg_act_ues_l1800s",
+    "avg_dl_thp_l800",
+    "avg_dl_thp_l1800",
+    "avg_dl_thp_l2300",
+    "avg_dl_thp_l2600",
+    "avg_dl_thp_l1800s",
+    "avg_ul_thp_l800",
+    "avg_ul_thp_l1800",
+    "avg_ul_thp_l2300",
+    "avg_ul_thp_l2600",
+    "avg_ul_thp_l1800s",
+    "num_congested_cells",
+    "num_cells",
+    "num_cell_with_kpi",
+    "num_down_or_no_kpi_cells",
+    "prb_diff_between_cells",
+    "load_balance_required",
+    "congestion_comment",
+    "final_comments",
+]
+LTE_DATABASE_COLUMNS = [
+    "code",
+    "Region",
+    "site_config_band",
+    "final_name",
+    "Longitude",
+    "Latitude",
+]
+KPI_COLUMNS = [
+    "date",
+    "LNCEL_name",
+    "Cell_Avail_excl_BLU",
+    "E_UTRAN_Avg_PRB_usage_per_TTI_DL",
+    "DL_PRB_Util_p_TTI_Lev_10",
+    "Avg_PDCP_cell_thp_UL",
+    "Avg_PDCP_cell_thp_DL",
+    "Avg_act_UEs_DL",
+]
+PRB_COLUMNS = [
+    "LNCEL_name",
+    "avg_prb_usage_bh",
+    "avg_prb_usage_bh_2nd",
+    "avg_act_ues",
+    "avg_dl_thp",
+    "avg_ul_thp",
+]
+def lte_analysis_logic(
+    df: pd.DataFrame,
+    prb_usage_threshold: int,
+    prb_diff_between_cells_threshold: int,
+) -> pd.DataFrame:
+    lte_analysis_logic_df = df.copy()
+    lte_analysis_logic_df["num_congested_cells"] = (
+        lte_analysis_logic_df[
+            [
+                "avg_prb_usage_bh_l800",
+                "avg_prb_usage_bh_l1800",
+                "avg_prb_usage_bh_l2300",
+                "avg_prb_usage_bh_l2600",
+                "avg_prb_usage_bh_l1800s",
+            ]
+        ]
+        >= prb_usage_threshold
+    ).sum(axis=1)
+    # Add Number of cells  LNCEL_name_l800	LNCEL_name_l1800	LNCEL_name_l2300	LNCEL_name_l2600	LNCEL_name_l1800s
+    lte_analysis_logic_df["num_cells"] = lte_analysis_logic_df[
+        [
+            "LNCEL_name_l800",
+            "LNCEL_name_l1800",
+            "LNCEL_name_l2300",
+            "LNCEL_name_l2600",
+            "LNCEL_name_l1800s",
+        ]
+    ].count(axis=1)
+    # Add Number of cell with KPI
+    lte_analysis_logic_df["num_cell_with_kpi"] = lte_analysis_logic_df[
+        [
+            "avg_prb_usage_bh_l800",
+            "avg_prb_usage_bh_l1800",
+            "avg_prb_usage_bh_l2300",
+            "avg_prb_usage_bh_l2600",
+            "avg_prb_usage_bh_l1800s",
+        ]
+    ].count(axis=1)
+    # Number of Down or No KPI cells = num_cells -num_cell_with_kpi
+    lte_analysis_logic_df["num_down_or_no_kpi_cells"] = (
+        lte_analysis_logic_df["num_cells"] - lte_analysis_logic_df["num_cell_with_kpi"]
+    )
+    # Check Max difference between avg_prb_usage_bh_l800 avg_prb_usage_bh_l1800 avg_prb_usage_bh_l2300 avg_prb_usage_bh_l2600 avg_prb_usage_bh_l1800s
+    lte_analysis_logic_df["prb_diff_between_cells"] = lte_analysis_logic_df[
+        [
+            "avg_prb_usage_bh_l800",
+            "avg_prb_usage_bh_l1800",
+            "avg_prb_usage_bh_l2300",
+            "avg_prb_usage_bh_l2600",
+            "avg_prb_usage_bh_l1800s",
+        ]
+    ].apply(lambda row: max(row) - min(row), axis=1)
+    # Add Load balance required column =  Yes if prb_diff_between_cells > prb_diff_between_cells_threshold else No
+    lte_analysis_logic_df["load_balance_required"] = lte_analysis_logic_df[
+        "prb_diff_between_cells"
+    ].apply(lambda x: "Yes" if x > prb_diff_between_cells_threshold else "No")
+    # Add Next band column
+    lte_analysis_logic_df["next_band"] = lte_analysis_logic_df["site_config_band"].map(
+        LteCapacity.next_band_mapping
+    )
+    # Add congestion comments
+    # if  num_congested_cells == 0 and num_down_or_no_kpi_cells == 0 = " No Congestion"
+    # if  num_congested_cells == 0 and num_down_or_no_kpi_cells > 0 = "No congestion but Down cell"
+    # if  num_congested_cells > 0 and num_down_or_no_kpi_cells > 0 = "Congestion but Colocated Down Cell"
+    # Else Need Action
+    conditions = [
+        (lte_analysis_logic_df["num_congested_cells"] == 0)
+        & (lte_analysis_logic_df["num_down_or_no_kpi_cells"] == 0),
+        (lte_analysis_logic_df["num_congested_cells"] == 0)
+        & (lte_analysis_logic_df["num_down_or_no_kpi_cells"] > 0),
+        (lte_analysis_logic_df["num_congested_cells"] > 0)
+        & (lte_analysis_logic_df["num_down_or_no_kpi_cells"] > 0),
+    ]
+    choices = [
+        "No Congestion",
+        "No congestion but Down cell",
+        "Congestion but Colocated Down Cell",
+    ]
+    lte_analysis_logic_df["congestion_comment"] = np.select(
+        conditions, choices, default="Need Action"
+    )
+    # Add "Actions" column
+    # if load_balance_required = "Yes" and congestion_comment = "Need Action" then "Load Balancing parameter tuning required"
+    # if load_balance_required = "Yes" and congestion_comment = "Need Action" then "Add Layer"
+    # Else keep congestion_comment
+    conditions = [
+        (lte_analysis_logic_df["load_balance_required"] == "Yes")
+        & (lte_analysis_logic_df["congestion_comment"] == "Need Action"),
+        (lte_analysis_logic_df["load_balance_required"] == "No")
+        & (lte_analysis_logic_df["congestion_comment"] == "Need Action"),
+    ]
+    choices = [
+        "Load Balancing parameter tuning required",
+        "Add Layer",
+    ]
+    lte_analysis_logic_df["actions"] = np.select(
+        conditions, choices, default=lte_analysis_logic_df["congestion_comment"]
+    )
+    # Add Final Comments
+    # if "actions" = "Add Layer" then "'Add' + 'next_band''
+    # Else keep "actions" as it is
+    lte_analysis_logic_df["final_comments"] = lte_analysis_logic_df.apply(
+        lambda row: (
+            f"Add {row['next_band']}"
+            if row["actions"] == "Add Layer"
+            else row["actions"]
+        ),
+        axis=1,
+    )
+    # create column "sector" equal to conteent of  "LNCEL_name_l800" if not empty else "LNCEL_name_l1800" if not empty else "LNCEL_name_l2300"
+    lte_analysis_logic_df["sector"] = (
+        lte_analysis_logic_df["LNCEL_name_l800"]
+        .combine_first(lte_analysis_logic_df["LNCEL_name_l1800"])
+        .combine_first(lte_analysis_logic_df["LNCEL_name_l2300"])
+        .combine_first(lte_analysis_logic_df["LNCEL_name_l2600"])
+        .combine_first(lte_analysis_logic_df["LNCEL_name_l1800s"])
+    )
+    # remove rows where sector is empty
+    lte_analysis_logic_df = lte_analysis_logic_df[
+        lte_analysis_logic_df["sector"].notna()
+    ]
+    # Add sector_id column if sector contains : '_1_" then 1 elif sector contains : '_2_" then 2 elif sector contains : '_3_" then 3
+    lte_analysis_logic_df["sector_id"] = np.where(
+        lte_analysis_logic_df["sector"].str.contains("_1_"),
+        1,
+        np.where(
+            lte_analysis_logic_df["sector"].str.contains("_2_"),
+            2,
+            np.where(lte_analysis_logic_df["sector"].str.contains("_3_"), 3, np.nan),
+        ),
+    )
+    # add code_sector column by combine code and sector_id
+    lte_analysis_logic_df["code_sector"] = (
+        lte_analysis_logic_df["code"].astype(str)
+        + "_"
+        + lte_analysis_logic_df["sector_id"].astype(str)
+    )
+    # remove '.0' from code_sector
+    lte_analysis_logic_df["code_sector"] = lte_analysis_logic_df[
+        "code_sector"
+    ].str.replace(".0", "")
+    # lte_analysis_logic_df = lte_analysis_logic_df[LTE_ANALYSIS_COLUMNS]
+    return lte_analysis_logic_df
+def dfs_per_band_cell(df: pd.DataFrame) -> pd.DataFrame:
+    # Base DataFrame with unique codes, Region, and site_config_band
+    all_codes_df = df[
+        ["code", "Region", "site_config_band", "Longitude", "Latitude"]
+    ].drop_duplicates()
+    # Configuration for sector groups and their respective LNCEL patterns and column suffixes
+    # Format: { "group_key": [(lncel_name_pattern_part, column_suffix), ...] }
+    # lncel_name_pattern_part will be combined with "_<group_key>" or similar
+    # Example: for group "1", pattern "_1_L800" gives suffix "l800"
+    sector_groups_config = {
+        "1": [
+            ("_1_L800", "l800"),
+            ("_1_L1800", "l1800"),
+            ("_1_L2300", "l2300"),
+            ("_1_L2600", "l2600"),
+            ("_1S_L1800", "l1800s"),
+        ],
+        "2": [
+            ("_2_L800", "l800"),
+            ("_2_L1800", "l1800"),
+            ("_2_L2300", "l2300"),
+            ("_2_L2600", "l2600"),
+            ("_2S_L1800", "l1800s"),
+        ],
+        "3": [
+            ("_3_L800", "l800"),
+            ("_3_L1800", "l1800"),
+            ("_3_L2300", "l2300"),
+            ("_3_L2600", "l2600"),
+            ("_3S_L1800", "l1800s"),
+        ],
+    }
+    all_processed_sectors_dfs = []
+    for sector_group_key, band_configurations in sector_groups_config.items():
+        # Start with the base DataFrame for the current sector group
+        current_sector_group_df = all_codes_df.copy()
+        for lncel_name_pattern, column_suffix in band_configurations:
+            # Filter the original DataFrame for the current LNCEL pattern
+            # The pattern assumes LNCEL_name contains something like "SITENAME<lncel_name_pattern>"
+            filtered_band_df = df[df["LNCEL_name"].str.contains(lncel_name_pattern)]
+            # Select relevant columns and rename them for the merge
+            # This avoids pandas automatically adding _x, _y suffixes and then needing to rename them
+            df_to_merge = filtered_band_df[
+                [
+                    "code",
+                    "LNCEL_name",
+                    "avg_prb_usage_bh",
+                    "avg_prb_usage_bh_2nd",
+                    "avg_act_ues",
+                    "avg_dl_thp",
+                    "avg_ul_thp",
+                ]
+            ].rename(
+                columns={
+                    "LNCEL_name": f"LNCEL_name_{column_suffix}",
+                    "avg_prb_usage_bh": f"avg_prb_usage_bh_{column_suffix}",
+                    "avg_prb_usage_bh_2nd": f"avg_prb_usage_bh_{column_suffix}_2nd",
+                    "avg_act_ues": f"avg_act_ues_{column_suffix}",
+                    "avg_dl_thp": f"avg_dl_thp_{column_suffix}",
+                    "avg_ul_thp": f"avg_ul_thp_{column_suffix}",
+                }
+            )
+            # Perform a left merge
+            current_sector_group_df = pd.merge(
+                current_sector_group_df, df_to_merge, on="code", how="left"
+            )
+        all_processed_sectors_dfs.append(current_sector_group_df)
+    # Concatenate all the processed sector DataFrames
+    all_sectors_dfs = pd.concat(all_processed_sectors_dfs, axis=0, ignore_index=True)
+    # save_dataframe(all_sectors_dfs, "all_sectors_dfs.csv")
+    return all_sectors_dfs
+def lte_database_for_capacity(dump_path: str):
+    dfs = process_lte_data(dump_path)
+    lte_fdd = dfs[0]
+    lte_tdd = dfs[1]
+    lte_fdd = lte_fdd[LTE_DATABASE_COLUMNS]
+    lte_tdd = lte_tdd[LTE_DATABASE_COLUMNS]
+    lte_db = pd.concat([lte_fdd, lte_tdd], axis=0)
+    # rename final_name to LNCEL_name
+    lte_db = lte_db.rename(columns={"final_name": "LNCEL_name"})
+    # save_dataframe(lte_db, "LTE_Database.csv")
+    return lte_db
+def lte_bh_dfs_per_kpi(
+    dump_path: str,
+    df: pd.DataFrame,
+    number_of_kpi_days: int = 7,
+    availability_threshold: int = 95,
+    prb_usage_threshold: int = 80,
+    prb_diff_between_cells_threshold: int = 20,
+    number_of_threshold_days: int = 3,
+    main_prb_to_use: str = "",
+) -> pd.DataFrame:
+    # print(df.columns)
+    pivoted_kpi_dfs = create_dfs_per_kpi(
+        df=df,
+        pivot_date_column="date",
+        pivot_name_column="LNCEL_name",
+        kpi_columns_from=2,
+    )
+    cell_availability_df = cell_availability_analysis(
+        df=pivoted_kpi_dfs["Cell_Avail_excl_BLU"],
+        days=number_of_kpi_days,
+        availability_threshold=availability_threshold,
+    )
+    prb_usage_df = analyze_prb_usage(
+        df=pivoted_kpi_dfs["E_UTRAN_Avg_PRB_usage_per_TTI_DL"],
+        number_of_kpi_days=number_of_kpi_days,
+        prb_usage_threshold=prb_usage_threshold,
+        analysis_type="BH",
+        number_of_threshold_days=number_of_threshold_days,
+        suffix="" if main_prb_to_use == "E-UTRAN Avg PRB usage per TTI DL" else "_2nd",
+    )
+    prb_lev10_usage_df = analyze_prb_usage(
+        df=pivoted_kpi_dfs["DL_PRB_Util_p_TTI_Lev_10"],
+        number_of_kpi_days=number_of_kpi_days,
+        prb_usage_threshold=prb_usage_threshold,
+        analysis_type="BH",
+        number_of_threshold_days=number_of_threshold_days,
+        suffix="" if main_prb_to_use == "DL PRB Util p TTI Lev_10" else "_2nd",
+    )
+    act_ues_df = pivoted_kpi_dfs["Avg_act_UEs_DL"]
+    # Add Max and avg columns for act_ues_df
+    act_ues_df["max_act_ues"] = act_ues_df.max(axis=1)
+    act_ues_df["avg_act_ues"] = act_ues_df.mean(axis=1)
+    dl_thp_df = pivoted_kpi_dfs["Avg_PDCP_cell_thp_DL"]
+    # Add Max and avg columns for dl_thp_df
+    dl_thp_df["max_dl_thp"] = dl_thp_df.max(axis=1)
+    dl_thp_df["avg_dl_thp"] = dl_thp_df.mean(axis=1)
+    ul_thp_df = pivoted_kpi_dfs["Avg_PDCP_cell_thp_UL"]
+    # Add Max and avg columns for ul_thp_df
+    ul_thp_df["max_ul_thp"] = ul_thp_df.max(axis=1)
+    ul_thp_df["avg_ul_thp"] = ul_thp_df.mean(axis=1)
+    bh_kpi_df = pd.concat(
+        [
+            cell_availability_df,
+            prb_lev10_usage_df,
+            prb_usage_df,
+            act_ues_df,
+            dl_thp_df,
+            ul_thp_df,
+        ],
+        axis=1,
+    )
+    bh_kpi_df = bh_kpi_df.reset_index()
+    prb_df = bh_kpi_df[PRB_COLUMNS]
+    # drop row if lnCEL_name is empty or 1
+    prb_df = prb_df[prb_df["LNCEL_name"].str.len() > 3]
+    # prb_df = prb_df.reset_index()
+    prb_df = prb_df.droplevel(level=1, axis=1)  # Drop the first level (date)
+    # prb_df = prb_df.reset_index()
+    # prb_df["code"] = prb_df["LNCEL_name"].str.split("_").str[0]
+    lte_db = lte_database_for_capacity(dump_path)
+    db_and_prb = pd.merge(lte_db, prb_df, on="LNCEL_name", how="left")
+    # if avg_prb_usage_bh is "" then set it to "cell exists in dump but not in BH report"
+    # db_and_prb.loc[db_and_prb["avg_prb_usage_bh"].isnull(), "avg_prb_usage_bh"] = (
+    #     "cell exists in dump but not in BH report"
+    # )
+    # drop row if lnCEL_name is empty or 1
+    db_and_prb = db_and_prb[db_and_prb["LNCEL_name"].str.len() > 3]
+    lte_analysis_df = dfs_per_band_cell(db_and_prb)
+    lte_analysis_df = lte_analysis_logic(
+        lte_analysis_df,
+        prb_usage_threshold,
+        prb_diff_between_cells_threshold,
+    )
+    lte_analysis_df = lte_analysis_df[LTE_ANALYSIS_COLUMNS]
+    # Rename columns
+    lte_analysis_df = lte_analysis_df.rename(
+        columns={
+            "LNCEL_name_l800": "name_l800",
+            "LNCEL_name_l1800": "name_l1800",
+            "LNCEL_name_l2300": "name_l2300",
+            "LNCEL_name_l2600": "name_l2600",
+            "LNCEL_name_l1800s": "name_l1800s",
+            "avg_prb_usage_bh_l800": "prb_l800",
+            "avg_prb_usage_bh_l1800": "prb_l1800",
+            "avg_prb_usage_bh_l2300": "prb_l2300",
+            "avg_prb_usage_bh_l2600": "prb_l2600",
+            "avg_prb_usage_bh_l1800s": "prb_l1800s",
+            "avg_prb_usage_bh_l800_2nd": "prb_l800_2nd",
+            "avg_prb_usage_bh_l1800_2nd": "prb_l1800_2nd",
+            "avg_prb_usage_bh_l2300_2nd": "prb_l2300_2nd",
+            "avg_prb_usage_bh_l2600_2nd": "prb_l2600_2nd",
+            "avg_prb_usage_bh_l1800s_2nd": "prb_l1800s_2nd",
+            "avg_act_ues_l800": "act_ues_l800",
+            "avg_act_ues_l1800": "act_ues_l1800",
+            "avg_act_ues_l2300": "act_ues_l2300",
+            "avg_act_ues_l2600": "act_ues_l2600",
+            "avg_act_ues_l1800s": "act_ues_l1800s",
+            "avg_dl_thp_l800": "dl_thp_l800",
+            "avg_dl_thp_l1800": "dl_thp_l1800",
+            "avg_dl_thp_l2300": "dl_thp_l2300",
+            "avg_dl_thp_l2600": "dl_thp_l2600",
+            "avg_dl_thp_l1800s": "dl_thp_l1800s",
+            "avg_ul_thp_l800": "ul_thp_l800",
+            "avg_ul_thp_l1800": "ul_thp_l1800",
+            "avg_ul_thp_l2300": "ul_thp_l2300",
+            "avg_ul_thp_l2600": "ul_thp_l2600",
+            "avg_ul_thp_l1800s": "ul_thp_l1800s",
+        }
+    )
+    return [bh_kpi_df, lte_analysis_df]
+def process_lte_bh_report(
+    dump_path: str,
+    bh_report_path: str,
+    num_last_days: int,
+    num_threshold_days: int,
+    availability_threshold: float,
+    prb_usage_threshold: float,
+    prb_diff_between_cells_threshold: float,
+    main_prb_to_use: str,
+) -> dict:
+    """
+    Process LTE Busy Hour report and perform capacity analysis
+    Args:
+        bh_report_path: Path to BH report CSV file
+        num_last_days: Number of last days for analysis
+        num_threshold_days: Number of days for threshold calculation
+        availability_threshold: Minimum required availability
+        prb_usage_threshold: Maximum allowed PRB usage
+        prb_diff_between_cells_threshold: Maximum allowed PRB usage difference between cells
+    Returns:
+        Dictionary containing analysis results and DataFrames
+    """
+    LteCapacity.final_results = None
+    # lte_db_dfs = lte_database_for_capacity(dump_path)
+    # Read BH report
+    df = pd.read_csv(bh_report_path, delimiter=";")
+    df = kpi_naming_cleaning(df)
+    # print(df.columns)
+    df = create_hourly_date(df)
+    df = df[KPI_COLUMNS]
+    pivoted_kpi_dfs = lte_bh_dfs_per_kpi(
+        dump_path=dump_path,
+        df=df,
+        number_of_kpi_days=num_last_days,
+        availability_threshold=availability_threshold,
+        prb_usage_threshold=prb_usage_threshold,
+        prb_diff_between_cells_threshold=prb_diff_between_cells_threshold,
+        number_of_threshold_days=num_threshold_days,
+        main_prb_to_use=main_prb_to_use,
+    )
+    # save_dataframe(pivoted_kpi_dfs, "LTE_BH_Report.csv")
+    return pivoted_kpi_dfs

process_kpi/process_wbts_capacity.py ADDED Viewed

	@@ -0,0 +1,312 @@

+import pandas as pd
+from utils.kpi_analysis_utils import (
+    cell_availability_analysis,
+    combine_comments,
+    create_daily_date,
+    create_dfs_per_kpi,
+    kpi_naming_cleaning,
+)
+class WbtsCapacity:
+    final_results: pd.DataFrame = None
+def check_deviation(row: pd.Series, max_diff: float = 3.0, type: str = "") -> str:
+    """
+    Check if any value in the row deviates more than max_diff from the most common value.
+    Args:
+        row: Series of values to check for deviation
+        max_diff: Maximum allowed difference from the most common value
+        type: Type identifier for the deviation message
+    Returns:
+        A message indicating deviation if found, otherwise an empty string
+    """
+    numeric_row = row.astype(float)  # Ensure numeric
+    mode_series = numeric_row.mode()
+    # Safe fallback in case mode is empty
+    most_common = mode_series.iloc[0] if not mode_series.empty else numeric_row.iloc[0]
+    diffs = abs(numeric_row - most_common)
+    if (diffs > max_diff).any():
+        return f"{type} Deviation > {max_diff} detected"
+    else:
+        return ""
+def max_used_bb_subunits_analysis(
+    df: pd.DataFrame,
+    days: int = 7,
+    threshold: int = 80,
+    number_of_threshold_days: int = 3,
+) -> pd.DataFrame:
+    """
+    Analyze maximum used baseband subunits and identify sites needing upgrades.
+    Args:
+        df: DataFrame containing baseband utilization data
+        days: Number of days to analyze
+        threshold: Utilization threshold percentage for flagging
+        number_of_threshold_days: Minimum days above threshold to flag for upgrade
+    Returns:
+        DataFrame with analysis results and upgrade recommendations
+    """
+    result_df = df.copy()
+    last_days_df = result_df.iloc[:, -days:]
+    last_days_df = last_days_df.fillna(0)
+    result_df["Average_used_bb_ratio"] = last_days_df.mean(axis=1).round(2)
+    # Count the number of days above threshold
+    result_df["bb_number_of_days_exceeding_threshold"] = last_days_df.apply(
+        lambda row: sum(1 for x in row if x >= threshold), axis=1
+    )
+    # Initialize comment column
+    result_df["Average_used_bb_ratio_comment"] = ""
+    # Apply condition for upgrade recommendation
+    result_df.loc[
+        (result_df["bb_number_of_days_exceeding_threshold"] >= number_of_threshold_days)
+        & (result_df["Average_used_bb_ratio"] >= threshold),
+        "Average_used_bb_ratio_comment",
+    ] = "need BB upgrade"
+    return result_df
+def max_used_ce_analysis(
+    df: pd.DataFrame,
+    days: int = 7,
+    threshold: int = 80,
+    number_of_threshold_days: int = 3,
+) -> pd.DataFrame:
+    """
+    Analyze maximum used channel elements and identify sites needing upgrades.
+    Args:
+        df: DataFrame containing channel element utilization data
+        days: Number of days to analyze
+        threshold: Utilization threshold percentage for flagging
+        number_of_threshold_days: Minimum days above threshold to flag for upgrade
+    Returns:
+        DataFrame with analysis results and upgrade recommendations
+    """
+    result_df = df.copy().fillna(0)
+    last_days_df = result_df.iloc[:, -days:]
+    result_df["Average_used_ce_ratio"] = last_days_df.mean(axis=1).round(2)
+    # Count the number of days above threshold
+    result_df["ce_number_of_days_exceeding_threshold"] = last_days_df.apply(
+        lambda row: sum(1 for x in row if x >= threshold), axis=1
+    )
+    # Initialize comment column
+    result_df["Average_used_ce_ratio_comment"] = ""
+    # Apply condition for upgrade recommendation
+    result_df.loc[
+        (result_df["ce_number_of_days_exceeding_threshold"] >= number_of_threshold_days)
+        & (result_df["Average_used_ce_ratio"] >= threshold),
+        "Average_used_ce_ratio_comment",
+    ] = "need CE upgrade"
+    return result_df
+def num_bb_subunits_analysis(df: pd.DataFrame, days: int = 3) -> pd.DataFrame:
+    """
+    Analyze baseband subunit count for deviations.
+    Args:
+        df: DataFrame containing baseband subunit count data
+        days: Number of days to analyze
+    Returns:
+        DataFrame with deviation analysis comments
+    """
+    result_df = df.copy()
+    last_days_df = result_df.iloc[:, -days:]
+    result_df["num_bb_subunits_comment"] = last_days_df.apply(
+        lambda row: check_deviation(row, type="bb"), axis=1
+    )
+    return result_df
+def avail_ce_analysis(df: pd.DataFrame, days: int = 7) -> pd.DataFrame:
+    """
+    Analyze available channel elements for deviations.
+    Args:
+        df: DataFrame containing available channel element data
+        days: Number of days to analyze
+    Returns:
+        DataFrame with deviation analysis comments
+    """
+    result_df = df.copy()
+    last_days_df = result_df.iloc[:, -days:]
+    result_df["avail_ce_comment"] = last_days_df.apply(
+        lambda row: check_deviation(row, max_diff=96, type="ce"), axis=1
+    )
+    return result_df
+def bb_comments_analysis(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Combine baseband related comments into a single column.
+    Args:
+        df: DataFrame containing baseband comment columns
+    Returns:
+        DataFrame with combined baseband comments
+    """
+    return combine_comments(
+        df,
+        "num_bb_subunits_comment",
+        "Average_used_bb_ratio_comment",
+        "availability_comment_daily",
+        new_column="bb_comments",
+    )
+def ce_comments_analysis(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Combine channel element related comments into a single column.
+    Args:
+        df: DataFrame containing channel element comment columns
+    Returns:
+        DataFrame with combined channel element comments
+    """
+    return combine_comments(
+        df,
+        "avail_ce_comment",
+        "Average_used_ce_ratio_comment",
+        "availability_comment_daily",
+        new_column="ce_comments",
+    )
+def wbts_kpi_analysis(
+    df: pd.DataFrame,
+    num_days: int = 7,
+    threshold: int = 80,
+    number_of_threshold_days: int = 3,
+) -> pd.DataFrame:
+    """
+    Create pivoted DataFrames for each KPI and perform analysis.
+    Args:
+        df: DataFrame containing KPI data
+        num_days: Number of days to analyze
+        threshold: Utilization threshold percentage for flagging
+        number_of_threshold_days: Minimum days above threshold to flag for upgrade
+    Returns:
+        DataFrame with combined analysis results
+    """
+    # kpi_columns = df.columns[5:]
+    pivoted_kpi_dfs = {}
+    pivoted_kpi_dfs = create_dfs_per_kpi(
+        df=df, pivot_date_column="date", pivot_name_column="DN", kpi_columns_from=5
+    )
+    # Extract individual KPI DataFrames
+    wbts_name_df = pivoted_kpi_dfs["WBTS_name"].iloc[:, 0]
+    licensed_ce_df = pivoted_kpi_dfs["LICENSED_R99CE_WBTS_M5008C48"]
+    max_used_ce_dl_df = pivoted_kpi_dfs["MAX_USED_CE_R99_DL_M5008C12"]
+    max_used_ce_ul_df = pivoted_kpi_dfs["MAX_USED_CE_R99_UL_M5008C15"]
+    max_avail_ce_df = pivoted_kpi_dfs["MAX_AVAIL_R99_CE_M5006C0"]
+    max_used_bb_subunits_df = pivoted_kpi_dfs["MAX_USED_BB_SUBUNITS_M5008C38"]
+    num_bb_subunits_df = pivoted_kpi_dfs["NUM_BB_SUBUNITS_M5008C39"]
+    max_bb_sus_util_ratio_df = pivoted_kpi_dfs["Max_BB_SUs_Util_ratio"]
+    cell_availability_df = pivoted_kpi_dfs[
+        "Cell_Availability_excluding_blocked_by_user_state_BLU"
+    ]
+    total_cs_traffic_df = pivoted_kpi_dfs["Total_CS_traffic_Erl"]
+    total_data_traffic_df = pivoted_kpi_dfs["Total_Data_Traffic"]
+    max_used_ce_ratio_flexi_df = pivoted_kpi_dfs["Max_Used_CE_s_ratio_Flexi_R2"]
+    # Perform analysis on each KPI DataFrame
+    max_bb_sus_util_ratio_df = max_used_bb_subunits_analysis(
+        max_bb_sus_util_ratio_df, num_days, threshold, number_of_threshold_days
+    )
+    cell_availability_df = cell_availability_analysis(cell_availability_df, num_days)
+    max_used_ce_ratio_flexi_df = max_used_ce_analysis(
+        max_used_ce_ratio_flexi_df, num_days, threshold, number_of_threshold_days
+    )
+    num_bb_subunits_df = num_bb_subunits_analysis(num_bb_subunits_df, num_days)
+    licensed_ce_df = avail_ce_analysis(licensed_ce_df, num_days)
+    # Concatenate all DataFrames
+    result_df = pd.concat(
+        [
+            wbts_name_df,
+            licensed_ce_df,
+            max_used_ce_dl_df,
+            max_used_ce_ul_df,
+            max_avail_ce_df,
+            max_used_bb_subunits_df,
+            num_bb_subunits_df,
+            max_bb_sus_util_ratio_df,
+            cell_availability_df,
+            total_cs_traffic_df,
+            total_data_traffic_df,
+            max_used_ce_ratio_flexi_df,
+        ],
+        axis=1,
+    )
+    # Add combined comments analysis
+    result_df = bb_comments_analysis(result_df)
+    result_df = ce_comments_analysis(result_df)
+    return result_df
+def load_data(
+    filepath: str,
+    num_days: int,
+    threshold: int,
+    number_of_threshold_days: int,
+) -> pd.DataFrame:
+    """
+    Load data from CSV file and perform preprocessing and analysis.
+    Args:
+        filepath: Path to CSV file or uploaded file object
+        num_days: Number of days to analyze
+        threshold: Utilization threshold percentage for flagging
+        number_of_threshold_days: Minimum days above threshold to flag for upgrade
+    Returns:
+        DataFrame with processed and analyzed data
+    """
+    df = pd.read_csv(filepath, delimiter=";")
+    # Preprocess data
+    df = create_daily_date(df)
+    df = kpi_naming_cleaning(df)
+    # Reorder columns for better organization
+    df = df[["date"] + [col for col in df.columns if col not in ["date"]]]
+    df = df[[col for col in df.columns if col != "WBTS_name"] + ["WBTS_name"]]
+    # Perform KPI analysis
+    df = wbts_kpi_analysis(df, num_days, threshold, number_of_threshold_days)
+    # for col, col_index in zip(df.columns, df.columns.get_indexer(df.columns)):
+    #     print(f"Column: {col}, Index: {col_index}")
+    return df

process_kpi/process_wcel_capacity.py ADDED Viewed

	@@ -0,0 +1,348 @@

+import pandas as pd
+from utils.kpi_analysis_utils import (
+    analyze_fails_kpi,
+    cell_availability_analysis,
+    combine_comments,
+    create_daily_date,
+    create_dfs_per_kpi,
+    kpi_naming_cleaning,
+    summarize_fails_comments,
+)
+from utils.utils_vars import get_physical_db
+tx_comments_mapping = {
+    "iub_frameloss exceeded threshold": "iub frameloss",
+    "iub_frameloss exceeded threshold, hsdpa_congestion_rate_iub exceeded threshold": "iub frameloss and hsdpa iub congestion",
+    "hsdpa_congestion_rate_iub exceeded threshold": "hsdpa iub congestion",
+}
+operational_comments_mapping = {
+    "Down Site": "Down Cell",
+    "iub frameloss, instability": "Availability and TX issues",
+    "iub frameloss and hsdpa iub congestion, Availability OK": "TX issues",
+    "iub frameloss, Availability OK": "TX issues",
+    "critical instability": "Availability issues",
+    "iub frameloss, critical instability": "Availability and TX issues",
+    "iub frameloss and hsdpa iub congestion, instability": "Availability and TX issues",
+    "Availability OK": "Site OK",
+    "hsdpa iub congestion, instability": "Availability and TX issues",
+    "instability": "Availability issues",
+    "hsdpa iub congestion, Availability OK": "TX issues",
+    "iub frameloss and hsdpa iub congestion, critical instability": "Availability and TX issues",
+    "hsdpa iub congestion, critical instability": "Availability and TX issues",
+}
+fails_comments_mapping = {
+    "ac, ac_dl, bts, code fails": "Power, Bts and Code fails",
+    "bts fails": "Bts fails",
+    "ac, bts, code fails": "Power and Code fails",
+    "ac, code fails": "Power fails",
+    "ac fails": "Power fails",
+    "ac, ac_dl fails": "Power fails",
+    "ac, bts fails": "Power and Bts fails",
+    "ac, ac_dl, bts fails": "Power and Bts fails",
+    "ac, ac_dl, code fails": "Power and Code fails",
+    "ac, ac_ul, bts, code fails": "Power, Bts and Code fails",
+    "ac, ac_dl, ac_ul, bts, code fails": "Power, Bts and Code fails",
+}
+KPI_COLUMNS = [
+    "WCEL_name",
+    "date",
+    "Cell_Availability_excluding_blocked_by_user_state_BLU",
+    "Total_CS_traffic_Erl",
+    "HSDPA_TRAFFIC_VOLUME",
+    "HSDPA_USER_THROUGHPUT",
+    "Max_simult_HSDPA_users",
+    "IUB_LOSS_CC_FRAME_LOSS_IND_M1022C71",
+    "HSDPA_congestion_rate_in_Iub",
+    "rrc_conn_stp_fail_ac_M1001C3",
+    "RRC_CONN_STP_FAIL_AC_UL_M1001C731",
+    "RRC_CONN_STP_FAIL_AC_DL_M1001C732",
+    "RRC_CONN_STP_FAIL_AC_COD_M1001C733",
+    "rrc_conn_stp_fail_bts_M1001C4",
+]
+WCEL_ANALYSIS_COLUMNS = [
+    "WCEL_name",
+    "Average_cell_availability_daily",
+    "number_of_days_exceeding_availability_threshold_daily",
+    "availability_comment_daily",
+    "sum_traffic_cs",
+    "sum_traffic_dl",
+    "max_dl_throughput",
+    "avg_dl_throughput",
+    "max_users",
+    "max_iub_frameloss",
+    "number_of_days_with_iub_frameloss_exceeded",
+    "max_hsdpa_congestion_rate_iub",
+    "number_of_days_with_hsdpa_congestion_rate_iub_exceeded",
+    "max_rrc_fail_ac",
+    "number_of_days_with_rrc_fail_ac_exceeded",
+    "max_rrc_fail_ac_ul",
+    "number_of_days_with_rrc_fail_ac_ul_exceeded",
+    "max_rrc_fail_ac_dl",
+    "number_of_days_with_rrc_fail_ac_dl_exceeded",
+    "max_rrc_fail_code",
+    "number_of_days_with_rrc_fail_code_exceeded",
+    "max_rrc_fail_bts",
+    "number_of_days_with_rrc_fail_bts_exceeded",
+    "tx_congestion_comments",
+    "operational_comments",
+    "fails_comments",
+    "final_comments",
+]
+class WcelCapacity:
+    final_results: pd.DataFrame = None
+def wcel_kpi_analysis(
+    df: pd.DataFrame,
+    num_last_days: int,
+    num_threshold_days: int,
+    availability_threshold: int,
+    iub_frameloss_threshold: int,
+    hsdpa_congestion_rate_iub_threshold: int,
+    fails_treshold: int,
+) -> pd.DataFrame:
+    pivoted_kpi_dfs = create_dfs_per_kpi(
+        df=df,
+        pivot_date_column="date",
+        pivot_name_column="WCEL_name",
+        kpi_columns_from=2,
+    )
+    cell_availability_df = cell_availability_analysis(
+        df=pivoted_kpi_dfs["Cell_Availability_excluding_blocked_by_user_state_BLU"],
+        days=num_last_days,
+        availability_threshold=availability_threshold,
+    )
+    # Trafics, throughput and max users
+    trafic_cs_df = pivoted_kpi_dfs["Total_CS_traffic_Erl"]
+    hsdpa_traffic_df = pivoted_kpi_dfs["HSDPA_TRAFFIC_VOLUME"]
+    hsdpa_user_throughput_df = pivoted_kpi_dfs["HSDPA_USER_THROUGHPUT"]
+    max_simult_hsdpa_users_df = pivoted_kpi_dfs["Max_simult_HSDPA_users"]
+    # Add Max of Trafics, throughput and max users
+    trafic_cs_df["sum_traffic_cs"] = trafic_cs_df.sum(axis=1)
+    hsdpa_traffic_df["sum_traffic_dl"] = hsdpa_traffic_df.sum(axis=1)
+    hsdpa_user_throughput_df["max_dl_throughput"] = hsdpa_user_throughput_df.max(axis=1)
+    max_simult_hsdpa_users_df["max_users"] = max_simult_hsdpa_users_df.max(axis=1)
+    # add average of Trafics, throughput and max users
+    hsdpa_user_throughput_df["avg_dl_throughput"] = hsdpa_user_throughput_df.mean(
+        axis=1
+    )
+    max_simult_hsdpa_users_df["avg_users"] = max_simult_hsdpa_users_df.mean(axis=1)
+    # TX Congestion
+    iub_frameloss_df = pivoted_kpi_dfs["IUB_LOSS_CC_FRAME_LOSS_IND_M1022C71"]
+    hsdpa_congestion_rate_iub_df = pivoted_kpi_dfs["HSDPA_congestion_rate_in_Iub"]
+    iub_frameloss_df = analyze_fails_kpi(
+        df=iub_frameloss_df,
+        number_of_kpi_days=num_last_days,
+        number_of_threshold_days=num_threshold_days,
+        kpi_threshold=iub_frameloss_threshold,
+        kpi_column_name="iub_frameloss",
+    )
+    hsdpa_congestion_rate_iub_df = analyze_fails_kpi(
+        df=hsdpa_congestion_rate_iub_df,
+        number_of_kpi_days=num_last_days,
+        number_of_threshold_days=num_threshold_days,
+        kpi_threshold=hsdpa_congestion_rate_iub_threshold,
+        kpi_column_name="hsdpa_congestion_rate_iub",
+    )
+    # Fails
+    rrc_conn_stp_fail_ac_df = analyze_fails_kpi(
+        df=pivoted_kpi_dfs["rrc_conn_stp_fail_ac_M1001C3"],
+        number_of_kpi_days=num_last_days,
+        number_of_threshold_days=num_threshold_days,
+        kpi_threshold=fails_treshold,
+        kpi_column_name="rrc_fail_ac",
+    )
+    rrc_conn_stp_fail_ac_ul_df = analyze_fails_kpi(
+        df=pivoted_kpi_dfs["RRC_CONN_STP_FAIL_AC_UL_M1001C731"],
+        number_of_kpi_days=num_last_days,
+        number_of_threshold_days=num_threshold_days,
+        kpi_threshold=fails_treshold,
+        kpi_column_name="rrc_fail_ac_ul",
+    )
+    rrc_conn_stp_fail_ac_dl_df = analyze_fails_kpi(
+        df=pivoted_kpi_dfs["RRC_CONN_STP_FAIL_AC_DL_M1001C732"],
+        number_of_kpi_days=num_last_days,
+        number_of_threshold_days=num_threshold_days,
+        kpi_threshold=fails_treshold,
+        kpi_column_name="rrc_fail_ac_dl",
+    )
+    rrc_conn_stp_fail_ac_cod_df = analyze_fails_kpi(
+        df=pivoted_kpi_dfs["RRC_CONN_STP_FAIL_AC_COD_M1001C733"],
+        number_of_kpi_days=num_last_days,
+        number_of_threshold_days=num_threshold_days,
+        kpi_threshold=fails_treshold,
+        kpi_column_name="rrc_fail_code",
+    )
+    rrc_conn_stp_fail_bts_df = analyze_fails_kpi(
+        df=pivoted_kpi_dfs["rrc_conn_stp_fail_bts_M1001C4"],
+        number_of_kpi_days=num_last_days,
+        number_of_threshold_days=num_threshold_days,
+        kpi_threshold=fails_treshold,
+        kpi_column_name="rrc_fail_bts",
+    )
+    kpi_df = pd.concat(
+        [
+            cell_availability_df,
+            trafic_cs_df,
+            hsdpa_traffic_df,
+            hsdpa_user_throughput_df,
+            max_simult_hsdpa_users_df,
+            iub_frameloss_df,
+            hsdpa_congestion_rate_iub_df,
+            rrc_conn_stp_fail_ac_df,
+            rrc_conn_stp_fail_ac_ul_df,
+            rrc_conn_stp_fail_ac_dl_df,
+            rrc_conn_stp_fail_ac_cod_df,
+            rrc_conn_stp_fail_bts_df,
+        ],
+        axis=1,
+    )
+    kpi_df = kpi_df.reset_index()
+    kpi_df = combine_comments(
+        kpi_df,
+        "iub_frameloss_comment",
+        "hsdpa_congestion_rate_iub_comment",
+        new_column="tx_congestion_comments",
+    )
+    kpi_df["tx_congestion_comments"] = kpi_df["tx_congestion_comments"].apply(
+        lambda x: tx_comments_mapping.get(x, x)
+    )
+    kpi_df = combine_comments(
+        kpi_df,
+        "tx_congestion_comments",
+        "availability_comment_daily",
+        new_column="operational_comments",
+    )
+    kpi_df["operational_comments"] = kpi_df["operational_comments"].apply(
+        lambda x: operational_comments_mapping.get(x, x)
+    )
+    kpi_df = combine_comments(
+        kpi_df,
+        "rrc_fail_ac_comment",
+        "rrc_fail_ac_ul_comment",
+        "rrc_fail_ac_dl_comment",
+        "rrc_fail_code_comment",
+        "rrc_fail_bts_comment",
+        new_column="fails_comments",
+    )
+    kpi_df["fails_comments"] = kpi_df["fails_comments"].apply(summarize_fails_comments)
+    kpi_df["fails_comments"] = kpi_df["fails_comments"].apply(
+        lambda x: fails_comments_mapping.get(x, x)
+    )
+    kpi_df = combine_comments(
+        kpi_df,
+        "operational_comments",
+        "fails_comments",
+        new_column="final_comments",
+    )
+    wcel_analysis_df = kpi_df[WCEL_ANALYSIS_COLUMNS]
+    wcel_analysis_df = wcel_analysis_df.droplevel(level=1, axis=1)
+    # Rename
+    wcel_analysis_df = wcel_analysis_df.rename(
+        columns={
+            "WCEL_name": "name",
+            "Average_cell_availability_daily": "Avg_availability",
+            "number_of_days_exceeding_availability_threshold_daily": "Avail_exceed_days",
+            "availability_comment_daily": "availability_comment",
+            "number_of_days_with_iub_frameloss_exceeded": "iub_frameloss_exceed_days",
+            "number_of_days_with_hsdpa_congestion_rate_iub_exceeded": "hsdpa_iub_exceed_days",
+            "number_of_days_with_rrc_fail_ac_exceeded": "ac_fail_exceed_days",
+            "number_of_days_with_rrc_fail_ac_ul_exceeded": "ac_ul_fail_exceed_days",
+            "number_of_days_with_rrc_fail_ac_dl_exceeded": "ac_dl_fail_exceed_days",
+            "number_of_days_with_rrc_fail_code_exceeded": "code_fail_exceed_days",
+            "number_of_days_with_rrc_fail_bts_exceeded": "bts_fail_exceed_days",
+        }
+    )
+    # remove row if name less than 5 characters
+    wcel_analysis_df = wcel_analysis_df[wcel_analysis_df["name"].str.len() >= 5]
+    wcel_analysis_df["code"] = wcel_analysis_df["name"].str.split("_").str[0]
+    wcel_analysis_df["code"] = (
+        pd.to_numeric(wcel_analysis_df["code"], errors="coerce").fillna(0).astype(int)
+    )
+    wcel_analysis_df["Region"] = wcel_analysis_df["name"].str.split("_").str[1]
+    # move code to the first column
+    wcel_analysis_df = wcel_analysis_df[
+        ["code", "Region"]
+        + [col for col in wcel_analysis_df if col != "code" and col != "Region"]
+    ]
+    # Load physical database
+    physical_db: pd.DataFrame = get_physical_db()
+    # Convert code_sector to code
+    physical_db["code"] = physical_db["Code_Sector"].str.split("_").str[0]
+    # remove duplicates
+    physical_db = physical_db.drop_duplicates(subset="code")
+    # keep only code and longitude and latitude
+    physical_db = physical_db[["code", "Longitude", "Latitude", "City"]]
+    physical_db["code"] = (
+        pd.to_numeric(physical_db["code"], errors="coerce").fillna(0).astype(int)
+    )
+    wcel_analysis_df = pd.merge(
+        wcel_analysis_df,
+        physical_db,
+        on="code",
+        how="left",
+    )
+    return [wcel_analysis_df, kpi_df]
+def load_and_process_wcel_capacity_data(
+    uploaded_file: pd.DataFrame,
+    num_last_days: int,
+    num_threshold_days: int,
+    availability_threshold: int,
+    iub_frameloss_threshold: int,
+    hsdpa_congestion_rate_iub_threshold: int,
+    fails_treshold: int,
+) -> pd.DataFrame:
+    """
+    Load and process data for WCEL capacity analysis.
+    Args:
+        uploaded_file: Uploaded CSV file containing WCEL capacity data
+        num_last_days: Number of days for analysis
+        num_threshold_days: Minimum days above threshold to flag for upgrade
+        availability_threshold: Utilization threshold percentage for flagging
+        iub_frameloss_threshold: Utilization threshold percentage for flagging
+        hsdpa_congestion_rate_iub_threshold: Utilization threshold percentage for flagging
+        fails_treshold: Utilization threshold percentage for flagging
+    Returns:
+        Processed DataFrame with WCEL capacity analysis results
+    """
+    # Load data
+    df = pd.read_csv(uploaded_file, delimiter=";")
+    df = kpi_naming_cleaning(df)
+    df = create_daily_date(df)
+    df = df[KPI_COLUMNS]
+    dfs = wcel_kpi_analysis(
+        df,
+        num_last_days,
+        num_threshold_days,
+        availability_threshold,
+        iub_frameloss_threshold,
+        hsdpa_congestion_rate_iub_threshold,
+        fails_treshold,
+    )
+    return dfs

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+panel>=1.4
+bokeh>=3.4
+pandas>=2.0
+numpy>=1.23
+plotly>=5.0
+xlsxwriter>=3.0
+pyarrow>=14.0
+duckdb>=0.9
+openpyxl>=3.1

utils/azimuth_validation.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import pandas as pd
+from utils.convert_to_excel import save_dataframe
+url = r"./physical_db/physical_database.csv"
+df = pd.read_csv(url)
+def validate_azimuth(group):
+    """
+    Validates the azimuth ordering within a group.
+    This function checks if the azimuth values are strictly increasing when there are exactly three values.
+    To make sure that Sector 3 is higher than Sector 2 and Sector 2 is higher than Sector 1
+    Args:
+        group (pd.DataFrame): A DataFrame group containing an 'Azimut' column.
+    Returns:
+        bool: True if the azimuth values are strictly increasing when there are exactly three values, False otherwise.
+    """
+    azimuths = group.get("Azimut", []).values
+    if len(azimuths) == 3 and not (azimuths[0] < azimuths[1] < azimuths[2]):
+        return False
+    return True
+# Apply validation per 'code'
+azimut_verification = df.groupby("CODE").apply(lambda x: validate_azimuth(x))
+df["Azimut_verification"] = df["CODE"].map(azimut_verification)
+save_dataframe(df, "azimut_verification")
+# print(df)

utils/check_sheet_exist.py ADDED Viewed

	@@ -0,0 +1,90 @@

+import pandas as pd
+class DumpType:
+    full_dump = False
+class Technology:
+    gsm = False
+    wcdma = False
+    lte = False
+    neighbors = False
+    trx = False
+    mrbts = False
+    mal = False
+    invunit = False
+# Dictionary of sheet groups to check
+sheets_to_check = {
+    "gsm": ["BTS", "BCF", "TRX", "MAL"],
+    "neighbors": ["ADCE", "ADJS", "ADJI", "ADJG", "ADJW", "BTS", "WCEL"],
+    "wcdma": ["WCEL", "WBTS", "WNCEL"],
+    "lte": ["LNBTS", "LNCEL", "LNCEL_FDD", "LNCEL_TDD"],
+    "trx": ["TRX", "BTS"],
+    "mrbts": ["MRBTS"],
+    "mal": ["MAL", "BTS"],
+    "invunit": ["INVUNIT"],
+}
+def load(file_path):
+    # Load the Excel file
+    xlsb_file = pd.ExcelFile(file_path, engine="calamine")
+    # Get all sheet names in the file
+    available_sheets = xlsb_file.sheet_names
+    return available_sheets
+def check_sheets(technology_attr, sheet_list, file_path):
+    """
+    Check if all sheets in the given sheet_list exist in the Excel file.
+    Parameters
+    ----------
+    technology_attr : str
+        The attribute of the Technology class to set.
+    sheet_list : list[str]
+        The list of sheet names to check.
+    Returns
+    -------
+    None
+    """
+    available_sheets = load(file_path)
+    missing_sheets = [sheet for sheet in sheet_list if sheet not in available_sheets]
+    available_sheets_in_list = [
+        sheet for sheet in sheet_list if sheet in available_sheets
+    ]
+    if not missing_sheets:
+        setattr(Technology, technology_attr, True)
+    #     print(getattr(Technology, technology_attr))
+    #     print(f"available:", available_sheets_in_list)
+    #     print("All sheets exist")
+    # else:
+    #     print(f"Missing sheets: {missing_sheets}")
+    #     print(f"available:", available_sheets_in_list)
+    #     print(getattr(Technology, technology_attr))
+# Check each technology's sheets
+def execute_checks_sheets_exist(file_path):
+    Technology.gsm = False
+    Technology.wcdma = False
+    Technology.lte = False
+    Technology.neighbors = False
+    Technology.trx = False
+    Technology.mrbts = False
+    Technology.invunit = False
+    Technology.mal = False
+    DumpType.full_dump = False
+    for tech_attr, sheets in sheets_to_check.items():
+        check_sheets(tech_attr, sheets, file_path)
+# execute_checks_sheets_exist(
+#     r"C:\Users\David\Documents\PROJECTS\2023\PROJET 2023\DUMP\DUMP\2142\DUMP 2142.xlsb"
+# )

utils/config_band.py ADDED Viewed

	@@ -0,0 +1,156 @@

+import pandas as pd
+def config_band(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Create a dataframe that contains the site configuration band for each site code.
+    Parameters
+    ----------
+    df : pd.DataFrame
+        The dataframe containing the site information, with columns "code" and "band"
+    Returns
+    -------
+    pd.DataFrame
+        The dataframe containing the site configuration band for each site code, with columns "code" and "site_config_band"
+    """
+    df_band = df[["code", "band"]].copy()
+    df_band["ID"] = df_band[["code", "band"]].astype(str).apply("_".join, axis=1)
+    # remove duplicates ID
+    df_band = df_band.drop_duplicates(subset=["ID"])
+    df_band = df_band[["code", "band"]]
+    df_band["band"] = df_band["band"].fillna("empty")
+    df_band = (
+        df_band.groupby("code")["band"]
+        .apply(lambda x: "/".join(sorted(x)))
+        .reset_index()
+    )
+    # rename band to config
+    df_band.rename(columns={"band": "site_config_band"}, inplace=True)
+    return df_band
+def bcf_band(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Create a dataframe that contains the bcf configuration band for each bcf ID.
+    Parameters
+    ----------
+    df : pd.DataFrame
+        The dataframe containing the bcf information, with columns "ID" and "band"
+    Returns
+    -------
+    pd.DataFrame
+        The dataframe containing the bcf configuration band for each bcf ID, with columns "ID" and "bcf_config_band"
+    """
+    df_band = df[["ID_BCF", "band"]].copy()
+    df_band["ID"] = df_band[["ID_BCF", "band"]].astype(str).apply("_".join, axis=1)
+    # remove duplicates ID
+    df_band = df_band.drop_duplicates(subset=["ID"])
+    df_band = df_band[["ID_BCF", "band"]]
+    df_band["band"] = df_band["band"].fillna("empty")
+    df_band = (
+        df_band.groupby("ID_BCF")["band"]
+        .apply(lambda x: "/".join(sorted(x)))
+        .reset_index()
+    )
+    # rename band to config
+    df_band.rename(columns={"band": "bcf_config_band"}, inplace=True)
+    return df_band
+def wbts_band(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Create a dataframe that contains the wbts configuration band for each wbts ID.
+    Parameters
+    ----------
+    df : pd.DataFrame
+        The dataframe containing the wbts information, with columns "ID" and "band"
+    Returns
+    -------
+    pd.DataFrame
+        The dataframe containing the wbts configuration band for each wbts ID, with columns "ID" and "wbts_config_band"
+    """
+    df_band = df[["WBTS", "band"]].copy()
+    df_band["ID"] = df_band[["WBTS", "band"]].astype(str).apply("_".join, axis=1)
+    # remove duplicates ID
+    df_band = df_band.drop_duplicates(subset=["ID"])
+    df_band = df_band[["WBTS", "band"]]
+    df_band["band"] = df_band["band"].fillna("empty")
+    df_band = (
+        df_band.groupby("WBTS")["band"]
+        .apply(lambda x: "/".join(sorted(x)))
+        .reset_index()
+    )
+    # rename band to config
+    df_band.rename(columns={"band": "wbts_config_band"}, inplace=True)
+    return df_band
+def lte_mrbts_band(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Create a dataframe that contains the mrbts configuration band for each mrbts ID.
+    Parameters
+    ----------
+    df : pd.DataFrame
+        The dataframe containing the mrbts information, with columns "ID" and "band"
+    Returns
+    -------
+    pd.DataFrame
+        The dataframe containing the mrbts configuration band for each mrbts ID, with columns "ID" and "mrbts_config_band"
+    """
+    df_band = df[["MRBTS", "band"]].copy()
+    df_band["ID"] = df_band[["MRBTS", "band"]].astype(str).apply("_".join, axis=1)
+    # remove duplicates ID
+    df_band = df_band.drop_duplicates(subset=["ID"])
+    df_band = df_band[["MRBTS", "band"]]
+    df_band["band"] = df_band["band"].fillna("empty")
+    df_band = (
+        df_band.groupby("MRBTS")["band"]
+        .apply(lambda x: "/".join(sorted(x)))
+        .reset_index()
+    )
+    # rename band to config
+    df_band.rename(columns={"band": "lte_config_band"}, inplace=True)
+    return df_band
+def adjl_band(df: pd.DataFrame, id_col: str, band_col: str) -> pd.DataFrame:
+    """
+    Create a dataframe that contains the adjl configuration band for each adjl ID.
+    Parameters
+    ----------
+    df : pd.DataFrame
+        The dataframe containing the adjl information, with columns "ID" and "band"
+    Returns
+    -------
+    pd.DataFrame
+        The dataframe containing the adjl configuration band for each adjl ID, with columns "ID" and "adjl_config_band"
+    """
+    df_band = df[[id_col, band_col]].copy()
+    df_band["ID"] = df_band[[id_col, band_col]].astype(str).apply("_".join, axis=1)
+    # remove duplicates ID
+    df_band = df_band.drop_duplicates(subset=["ID"])
+    df_band = df_band[[id_col, band_col]]
+    df_band[band_col] = df_band[band_col].fillna("empty")
+    df_band = (
+        df_band.groupby(id_col)[band_col]
+        .apply(lambda x: "/".join(sorted(x)))
+        .reset_index()
+    )
+    # rename band to config
+    df_band.rename(columns={band_col: "adjl_created_band"}, inplace=True)
+    return df_band

utils/convert_to_excel.py ADDED Viewed

	@@ -0,0 +1,365 @@

+import io
+import time
+import pandas as pd
+import streamlit as st
+# @st.cache_data
+# def convert_dfs(dfs: list[pd.DataFrame], sheet_names: list[str]) -> bytes:
+#     # IMPORTANT: Cache the conversion to prevent computation on every rerun
+#     # Create a BytesIO object
+#     bytes_io = io.BytesIO()
+#     # Write the dataframes to the BytesIO object
+#     with pd.ExcelWriter(bytes_io, engine="xlsxwriter") as writer:
+#         for df, sheet_name in zip(dfs, sheet_names):
+#             df.to_excel(writer, sheet_name=sheet_name, index=True)
+#     # Get the bytes data
+#     bytes_data = bytes_io.getvalue()
+#     # Close the BytesIO object
+#     bytes_io.close()
+#     return bytes_data
+def get_formats(workbook):
+    return {
+        "green": workbook.add_format(
+            {"bg_color": "#37CC73", "bold": True, "border": 1}
+        ),
+        "green_light": workbook.add_format(
+            {"bg_color": "#87E0AB", "bold": True, "border": 1}
+        ),
+        "blue": workbook.add_format({"bg_color": "#1A64FF", "bold": True, "border": 1}),
+        "blue_light": workbook.add_format(
+            {"bg_color": "#00B0F0", "bold": True, "border": 1}
+        ),
+        "beurre": workbook.add_format(
+            {"bg_color": "#FFE699", "bold": True, "border": 1}
+        ),
+        "orange": workbook.add_format(
+            {"bg_color": "#F47F31", "bold": True, "border": 1}
+        ),
+        "purple5": workbook.add_format(
+            {"bg_color": "#E03DCD", "bold": True, "border": 1}
+        ),
+        "purple6": workbook.add_format(
+            {"bg_color": "#AE83F8", "bold": True, "border": 1}
+        ),
+        "gray": workbook.add_format({"bg_color": "#D9D9D9", "bold": True, "border": 1}),
+        "red": workbook.add_format({"bg_color": "#FF0000", "bold": True, "border": 1}),
+        "yellow": workbook.add_format(
+            {"bg_color": "#FFFF00", "bold": True, "border": 1}
+        ),
+    }
+def get_format_map_by_format_type(formats: dict, format_type: str) -> dict:
+    if format_type == "GSM_Analysis":
+        return {
+            # "name": formats["blue"],
+            "amrSegLoadDepTchRateLower": formats["beurre"],
+            "amrSegLoadDepTchRateUpper": formats["beurre"],
+            "btsSpLoadDepTchRateLower": formats["beurre"],
+            "btsSpLoadDepTchRateUpper": formats["beurre"],
+            "amrWbFrCodecModeSet": formats["beurre"],
+            "dedicatedGPRScapacity": formats["beurre"],
+            "defaultGPRScapacity": formats["beurre"],
+            "number_trx_per_cell": formats["blue"],
+            "number_trx_per_bcf": formats["blue"],
+            "number_tch_per_cell": formats["blue"],
+            "number_sd_per_cell": formats["blue"],
+            "number_bcch_per_cell": formats["blue"],
+            "number_ccch_per_cell": formats["blue"],
+            "number_cbc_per_cell": formats["blue"],
+            "number_total_channels_per_cell": formats["blue"],
+            "number_signals_per_cell": formats["blue"],
+            "hf_rate_coef": formats["purple5"],
+            "GPRS": formats["purple5"],
+            "TCH Actual HR%": formats["green"],
+            "Offered Traffic BH": formats["green"],
+            "Max_Traffic BH": formats["green"],
+            "Avg_Traffic BH": formats["green"],
+            "TCH UTILIZATION (@Max Traffic)": formats["red"],
+            "Tch utilization comments": formats["orange"],
+            "ErlabngB_value": formats["purple6"],
+            "Target FR CHs": formats["purple6"],
+            "Target HR CHs": formats["purple6"],
+            "Target TCHs": formats["purple6"],
+            "Target TRXs": formats["purple6"],
+            "Number of required TRXs": formats["purple6"],
+            "max_tch_call_blocking_bh": formats["yellow"],
+            "avg_tch_call_blocking_bh": formats["yellow"],
+            "number_of_days_with_tch_blocking_exceeded_bh": formats["yellow"],
+            "tch_call_blocking_bh_comment": formats["orange"],
+            "max_sdcch_real_blocking_bh": formats["yellow"],
+            "avg_sdcch_real_blocking_bh": formats["yellow"],
+            "number_of_days_with_sdcch_blocking_exceeded_bh": formats["yellow"],
+            "sdcch_real_blocking_bh_comment": formats["orange"],
+            "Average_cell_availability_bh": formats["yellow"],
+            "number_of_days_exceeding_availability_threshold_bh": formats["yellow"],
+            "availability_comment_bh": formats["orange"],
+            "max_tch_abis_fail_bh": formats["yellow"],
+            "avg_tch_abis_fail_bh": formats["yellow"],
+            "number_of_days_with_tch_abis_fail_exceeded_bh": formats["yellow"],
+            "tch_abis_fail_bh_comment": formats["orange"],
+            "Average_cell_availability_daily": formats["green_light"],
+            "number_of_days_exceeding_availability_threshold_daily": formats[
+                "green_light"
+            ],
+            "availability_comment_daily": formats["orange"],
+            "max_tch_abis_fail_daily": formats["green_light"],
+            "avg_tch_abis_fail_daily": formats["green_light"],
+            "number_of_days_with_tch_abis_fail_exceeded_daily": formats["green_light"],
+            "tch_abis_fail_daily_comment": formats["orange"],
+            "BH Congestion status": formats["gray"],
+            "operational_comment": formats["gray"],
+            "Final comment": formats["gray"],
+            "Final comment summary": formats["gray"],
+            # Operational Neighbours Distance Sheet
+            "Source_ID_BTS": formats["blue"],
+            "Source_name": formats["blue"],
+            "Source_BH Congestion status": formats["blue"],
+            "Source_Longitude": formats["blue"],
+            "Source_Latitude": formats["blue"],
+            "Neighbour_ID_BTS": formats["green_light"],
+            "Neighbour_name": formats["green_light"],
+            "Neighbour_operational_comment": formats["green_light"],
+            "Neighbour_Longitude": formats["green_light"],
+            "Neighbour_Latitude": formats["green_light"],
+            "Distance_km": formats["beurre"],
+        }
+    elif format_type == "database":
+        return {
+            "code": formats["blue"],
+            "Azimut": formats["green"],
+            "Longitude": formats["green"],
+            "Latitude": formats["green"],
+            "Hauteur": formats["green"],
+            "City": formats["green"],
+            "Adresse": formats["green"],
+            "Commune": formats["green"],
+            "Cercle": formats["green"],
+            "number_trx_per_cell": formats["blue_light"],
+            "number_trx_per_bcf": formats["blue_light"],
+            "number_trx_per_site": formats["blue_light"],
+            # invunit part in database
+            "FBBA": formats["blue_light"],
+            "FBBC": formats["blue_light"],
+            "FSMF": formats["blue_light"],
+            "ABIA": formats["blue_light"],
+            "total_number_of_subunit": formats["blue_light"],
+            "AHDA": formats["beurre"],
+            "AHEGB": formats["beurre"],
+            "AHEGC": formats["beurre"],
+            "AHEGHA": formats["beurre"],
+            "AHGA": formats["beurre"],
+            "AHMA": formats["beurre"],
+            "AHPMDA": formats["beurre"],
+            "AHPMDG": formats["beurre"],
+            "AHPMDI": formats["beurre"],
+            "ARDA": formats["beurre"],
+            "AREA": formats["beurre"],
+            "ARGA": formats["beurre"],
+            "ARMA": formats["beurre"],
+            "AZNA": formats["beurre"],
+            "FHDB": formats["beurre"],
+            "FHEB": formats["beurre"],
+            "FHEL": formats["beurre"],
+            "FRGU": formats["beurre"],
+            "FRGY": formats["beurre"],
+            "FRMB": formats["beurre"],
+            "FRMF": formats["beurre"],
+            "FXDB": formats["beurre"],
+            "FXED": formats["beurre"],
+            "FZNI": formats["beurre"],
+        }
+    elif format_type == "LTE_Analysis":
+        return {
+            "code": formats["blue"],
+            "code_sector": formats["blue"],
+            "Region": formats["blue"],
+            "site_config_band": formats["blue"],
+            "Longitude": formats["blue"],
+            "Latitude": formats["blue"],
+            # "name_l800": formats["beurre"],
+            # "name_l1800": formats["purple5"],
+            # "name_l2300": formats["purple6"],
+            # "name_l2600": formats["blue_light"],
+            # "name_l1800s": formats["gray"],
+            "prb_l800": formats["beurre"],
+            "prb_l1800": formats["beurre"],
+            "prb_l2300": formats["beurre"],
+            "prb_l2600": formats["beurre"],
+            "prb_l1800s": formats["beurre"],
+            "prb_l800_2nd": formats["purple5"],
+            "prb_l1800_2nd": formats["purple5"],
+            "prb_l2300_2nd": formats["purple5"],
+            "prb_l2600_2nd": formats["purple5"],
+            "prb_l1800s_2nd": formats["purple5"],
+            "act_ues_l800": formats["purple6"],
+            "act_ues_l1800": formats["purple6"],
+            "act_ues_l2300": formats["purple6"],
+            "act_ues_l2600": formats["purple6"],
+            "act_ues_l1800s": formats["purple6"],
+            "dl_thp_l800": formats["blue_light"],
+            "dl_thp_l1800": formats["blue_light"],
+            "dl_thp_l2300": formats["blue_light"],
+            "dl_thp_l2600": formats["blue_light"],
+            "dl_thp_l1800s": formats["blue_light"],
+            "ul_thp_l800": formats["gray"],
+            "ul_thp_l1800": formats["gray"],
+            "ul_thp_l2300": formats["gray"],
+            "ul_thp_l2600": formats["gray"],
+            "ul_thp_l1800s": formats["gray"],
+            "num_congested_cells": formats["orange"],
+            "num_cells": formats["orange"],
+            "num_cell_with_kpi": formats["orange"],
+            "num_down_or_no_kpi_cells": formats["orange"],
+            "prb_diff_between_cells": formats["orange"],
+            "load_balance_required": formats["orange"],
+            "congestion_comment": formats["orange"],
+            "final_comments": formats["green"],
+        }
+    elif format_type == "WCEL_capacity":
+        return {
+            "code": formats["blue"],
+            "Region": formats["blue"],
+            "name": formats["blue"],
+            "Avg_availability": formats["blue_light"],
+            "Avail_exceed_days": formats["blue_light"],
+            "availability_comment": formats["blue_light"],
+            "sum_traffic_cs": formats["beurre"],
+            "sum_traffic_dl": formats["beurre"],
+            "max_dl_throughput": formats["beurre"],
+            "avg_dl_throughput": formats["beurre"],
+            "max_users": formats["beurre"],
+            "max_iub_frameloss": formats["purple5"],
+            "iub_frameloss_exceed_days": formats["purple5"],
+            "max_hsdpa_congestion_rate_iub": formats["purple5"],
+            "hsdpa_iub_exceed_days": formats["purple5"],
+            "max_rrc_fail_ac": formats["purple6"],
+            "ac_fail_exceed_days": formats["purple6"],
+            "max_rrc_fail_ac_ul": formats["purple6"],
+            "ac_ul_fail_exceed_days": formats["purple6"],
+            "max_rrc_fail_ac_dl": formats["purple6"],
+            "ac_dl_fail_exceed_days": formats["purple6"],
+            "max_rrc_fail_code": formats["purple6"],
+            "code_fail_exceed_days": formats["purple6"],
+            "max_rrc_fail_bts": formats["yellow"],
+            "bts_fail_exceed_days": formats["yellow"],
+            "tx_congestion_comments": formats["green"],
+            "operational_comments": formats["green"],
+            "fails_comments": formats["green"],
+            "final_comments": formats["green"],
+        }
+    elif format_type == "invunit":
+        return {
+            "code": formats["blue"],
+            "FBBA": formats["blue_light"],
+            "FBBC": formats["blue_light"],
+            "FSMF": formats["blue_light"],
+            "ABIA": formats["blue_light"],
+            "total_number_of_subunit": formats["blue_light"],
+            "AHDA": formats["beurre"],
+            "AHEGB": formats["beurre"],
+            "AHEGC": formats["beurre"],
+            "AHEGHA": formats["beurre"],
+            "AHGA": formats["beurre"],
+            "AHMA": formats["beurre"],
+            "AHPMDA": formats["beurre"],
+            "AHPMDG": formats["beurre"],
+            "AHPMDI": formats["beurre"],
+            "ARDA": formats["beurre"],
+            "AREA": formats["beurre"],
+            "ARGA": formats["beurre"],
+            "ARMA": formats["beurre"],
+            "AZNA": formats["beurre"],
+            "FHDB": formats["beurre"],
+            "FHEB": formats["beurre"],
+            "FHEL": formats["beurre"],
+            "FRGU": formats["beurre"],
+            "FRGY": formats["beurre"],
+            "FRMB": formats["beurre"],
+            "FRMF": formats["beurre"],
+            "FXDB": formats["beurre"],
+            "FXED": formats["beurre"],
+            "FZNI": formats["beurre"],
+        }
+    else:
+        return {}  # No formatting if format_type not matched
+def _apply_custom_formatting(
+    writer, df: pd.DataFrame, sheet_name: str, format_type: str
+):
+    workbook = writer.book
+    worksheet = writer.sheets[sheet_name]
+    formats = get_formats(workbook)
+    format_map = get_format_map_by_format_type(formats, format_type)
+    for col_idx, col_name in enumerate(df.columns):
+        fmt = format_map.get(col_name)
+        if fmt:
+            worksheet.write(0, col_idx + 1, col_name, fmt)
+def _write_to_excel(
+    dfs: list[pd.DataFrame], sheet_names: list[str], index=True, format_type: str = None
+) -> bytes:
+    bytes_io = io.BytesIO()
+    with pd.ExcelWriter(bytes_io, engine="xlsxwriter") as writer:
+        for df, name in zip(dfs, sheet_names):
+            # df.index.name = "index"
+            df.to_excel(writer, sheet_name=name, index=index)
+            if format_type:
+                _apply_custom_formatting(writer, df, name, format_type)
+    return bytes_io.getvalue()
+@st.cache_data
+def convert_dfs(dfs: list[pd.DataFrame], sheet_names: list[str]) -> bytes:
+    return _write_to_excel(dfs, sheet_names, index=True)
+@st.cache_data
+def convert_gsm_dfs(dfs, sheet_names) -> bytes:
+    return _write_to_excel(dfs, sheet_names, index=True, format_type="GSM_Analysis")
+@st.cache_data
+def convert_lte_analysis_dfs(dfs, sheet_names) -> bytes:
+    return _write_to_excel(dfs, sheet_names, index=True, format_type="LTE_Analysis")
+@st.cache_data
+def convert_wcel_capacity_dfs(dfs, sheet_names) -> bytes:
+    return _write_to_excel(dfs, sheet_names, index=True, format_type="WCEL_capacity")
+@st.cache_data
+def convert_database_dfs(dfs, sheet_names) -> bytes:
+    return _write_to_excel(dfs, sheet_names, index=True, format_type="database")
+@st.cache_data
+def convert_invunit_dfs(dfs, sheet_names) -> bytes:
+    return _write_to_excel(dfs, sheet_names, index=True, format_type="invunit")
+def save_dataframe(df: pd.DataFrame, sheet_name: str):
+    """
+    Save the dataframe to a csv file.
+    Args:
+        df (pd.DataFrame): The dataframe to save.
+        sheet_name (str): The name of the sheet.
+    """
+    df.to_csv(f"data2/{sheet_name}_{time.time()}.csv", index=False, encoding="latin1")

utils/extract_code.py ADDED Viewed

	@@ -0,0 +1,34 @@

+def extract_code_from_mrbts(mrbts):
+    """
+    Extracts the code from a MRBTS (Mobile Radio Base Transceiver Station) string.
+    Args:
+        mrbts (int or str): The MRBTS string to extract the code from.
+    Returns:
+        int: The extracted code from the MRBTS string.
+    Raises:
+        None.
+    Notes:
+        This function handles MRBTS strings that start with '10' and have a length greater than 5,
+        as well as MRBTS strings that start with '1', '2', or '3'. For MRBTS strings that do not
+        meet these criteria, the entire MRBTS string is returned as an integer.
+    """
+    str_mrbts = str(mrbts)
+    if len(str_mrbts) > 5 and str_mrbts.startswith("10"):
+        # For MRBTS starting with '10' and having length greater than 5
+        return int(str_mrbts[2:])
+    elif len(str_mrbts) > 4 and str_mrbts.startswith("1"):
+        return int(str_mrbts[1:])
+    elif len(str_mrbts) > 4 and str_mrbts.startswith("2"):
+        # For MRBTS starting with '2' (like 20000 + code)
+        return int(str_mrbts[1:])
+    elif len(str_mrbts) > 4 and str_mrbts.startswith("3"):
+        # For MRBTS starting with '3' (like 30000 + code)
+        return int(str_mrbts[1:])
+    else:
+        # Default case
+        return int(str_mrbts)

utils/kml_creator.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import io
+import math
+import numpy as np
+import pandas as pd
+import simplekml
+def create_sector(kml: simplekml.Kml, row, arc_angle=65):
+    """Create a sector shape for the telecom antenna in KML with sector details."""
+    code, name, azimuth, lon, lat, size, color = (
+        row["code"],
+        row["name"],
+        row["Azimut"],
+        row["Longitude"],
+        row["Latitude"],
+        row["size"],
+        row["color"],
+    )
+    num_points = 20  # Number of points for smooth arc
+    start_angle = azimuth - (arc_angle / 2)
+    end_angle = azimuth + (arc_angle / 2)
+    coords = [(lon, lat)]  # Start with the site location (center point)
+    # Generate points for the sector arc
+    for angle in np.linspace(start_angle, end_angle, num_points):
+        angle_rad = math.radians(angle)
+        arc_lon = lon + (size / 111320) * math.sin(angle_rad)
+        arc_lat = lat + (size / 111320) * math.cos(angle_rad)
+        coords.append((arc_lon, arc_lat))
+    coords.append((lon, lat))  # Close the polygon
+    # Create the sector polygon
+    pol = kml.newpolygon(name=name, outerboundaryis=coords)
+    # Dynamically create the description from all DataFrame columns
+    description = "<b>Sector Details:</b><br>"
+    for column, value in row.items():
+        description += f"<b>{column}:</b> {value}<br>"
+    pol.description = description
+    pol.style.polystyle.color = color  # Set color from DataFrame
+    pol.style.polystyle.outline = 1  # Outline enabled
+    pol.style.linestyle.color = "ff000000"  # Black outline
+def generate_kml_from_df(df: pd.DataFrame):
+    """Generate a KML file from a Pandas DataFrame for telecom sectors."""
+    kml = simplekml.Kml()
+    site_added = set()  # Keep track of sites already added to avoid duplicates
+    # Sort the DataFrame to ensure 900 MHz (smaller) is drawn last (on top)
+    df_sorted = df.sort_values(
+        by="size", ascending=False
+    )  # Larger first, smaller on top
+    for _, row in df_sorted.iterrows():
+        code, lon, lat = row["code"], row["Longitude"], row["Latitude"]
+        # Add site name as a point only once
+        if code not in site_added:
+            pnt = kml.newpoint(name=code, coords=[(lon, lat)])
+            pnt.style.iconstyle.icon.href = (
+                "http://maps.google.com/mapfiles/kml/shapes/placemark_circle.png"
+            )
+            pnt.style.labelstyle.scale = 1.2  # Adjust label size
+            pnt.description = f"Site: {code}<br>Location: {lat}, {lon}"
+            site_added.add(code)
+        create_sector(kml, row)
+    kml_data = io.BytesIO()
+    kml_str = kml.kml()  # Get KML as string
+    kml_data.write(kml_str.encode("utf-8"))  # Write KML to BytesIO
+    kml_data.seek(0)  # Move to beginning of BytesIO
+    return kml_data

utils/kpi_analysis_utils.py ADDED Viewed

	@@ -0,0 +1,666 @@

+import re
+import numpy as np
+import pandas as pd
+class GsmAnalysis:
+    hf_rate_coef = {
+        10: 1.1,
+        20: 1.2,
+        40: 1.4,
+        60: 1.6,
+        70: 1.7,
+        80: 1.8,
+        99: 2.0,
+        100: 1.4,
+    }
+    erlangB_table = {
+        1: 0.0204,
+        2: 0.2234,
+        3: 0.6022,
+        4: 1.092,
+        5: 1.657,
+        6: 2.276,
+        7: 2.935,
+        8: 3.627,
+        9: 4.345,
+        10: 5.084,
+        11: 5.841,
+        12: 6.614,
+        13: 7.401,
+        14: 8.2,
+        15: 9.009,
+        16: 9.828,
+        17: 10.66,
+        18: 11.49,
+        19: 12.33,
+        20: 13.18,
+        21: 14.04,
+        22: 14.9,
+        23: 15.76,
+        24: 16.63,
+        25: 17.5,
+        26: 18.38,
+        27: 19.26,
+        28: 20.15,
+        29: 21.04,
+        30: 21.93,
+        31: 22.83,
+        32: 23.72,
+        33: 24.63,
+        34: 25.53,
+        35: 26.43,
+        36: 27.34,
+        37: 28.25,
+        38: 29.17,
+        39: 30.08,
+        40: 31,
+        41: 31.91,
+        42: 32.84,
+        43: 33.76,
+        44: 34.68,
+        45: 35.61,
+        46: 36.53,
+        47: 37.46,
+        48: 38.39,
+        49: 39.32,
+        50: 40.25,
+        51: 41.19,
+        52: 42.12,
+        53: 43.06,
+        54: 44,
+        55: 44.93,
+        56: 45.88,
+        57: 46.81,
+        58: 47.75,
+        59: 48.7,
+        60: 49.64,
+        61: 50.59,
+        62: 51.53,
+        63: 52.48,
+        64: 53.43,
+        65: 54.38,
+        66: 55.32,
+        67: 56.27,
+        68: 57.22,
+        69: 58.18,
+        70: 59.13,
+        71: 60.08,
+        72: 61.04,
+        73: 61.99,
+        74: 62.94,
+        75: 63.9,
+        76: 64.86,
+        77: 65.81,
+        78: 66.77,
+        79: 67.73,
+        80: 68.69,
+        81: 69.64,
+        82: 70.61,
+        83: 71.57,
+        84: 72.53,
+        85: 73.49,
+        86: 74.45,
+        87: 75.41,
+        88: 76.38,
+        89: 77.34,
+        90: 78.3,
+        91: 79.27,
+        92: 80.23,
+        93: 81.2,
+        94: 82.16,
+        95: 83.13,
+        96: 84.09,
+        97: 85.06,
+        98: 86.03,
+        99: 87,
+        100: 87.97,
+        101: 88.94,
+        102: 89.91,
+        103: 90.88,
+        104: 91.85,
+        105: 92.82,
+        106: 93.79,
+        107: 94.76,
+        108: 95.73,
+        109: 96.71,
+        110: 97.68,
+        111: 98.65,
+        112: 99.63,
+        113: 100.6,
+        114: 101.57,
+        115: 102.54,
+        116: 103.52,
+        117: 104.49,
+        118: 105.47,
+        119: 106.44,
+        120: 107.42,
+        121: 108.4,
+        122: 109.37,
+        123: 110.35,
+        124: 111.32,
+        125: 112.3,
+        126: 113.28,
+        127: 114.25,
+        128: 115.23,
+        129: 116.21,
+        130: 117.19,
+        131: 118.17,
+        132: 119.15,
+        133: 120.12,
+        134: 121.1,
+        135: 122.08,
+        136: 123.07,
+        137: 124.04,
+        138: 125.02,
+        139: 126.01341,
+        140: 127.00918,
+        141: 127.96752,
+        142: 128.98152,
+        143: 129.92152,
+        144: 130.88534,
+        145: 131.96461,
+        146: 132.89897,
+        147: 133.86373,
+        148: 134.82569,
+        149: 135.76295,
+        150: 136.82988,
+        151: 137.79,
+        152: 138.77,
+        153: 139.75,
+        154: 140.74,
+        155: 141.72,
+        156: 142.7,
+        157: 143.69,
+        158: 144.67,
+        159: 145.66,
+        160: 146.64,
+        161: 147.63,
+        162: 148.61,
+        163: 149.6,
+        164: 150.58,
+        165: 151.57,
+        166: 152.55,
+        167: 153.54,
+        168: 154.53,
+        169: 155.51,
+        170: 156.5,
+        171: 157.48,
+        172: 158.47,
+        173: 159.46,
+        174: 160.44,
+        175: 161.43,
+        176: 162.42,
+        177: 163.41,
+        178: 164.39,
+        179: 165.38,
+        180: 166.37,
+        181: 167.36,
+        182: 168.35,
+        183: 169.33,
+        184: 170.32,
+        185: 171.31,
+        186: 172.3,
+        187: 173.29,
+        188: 174.28,
+        189: 175.27,
+        190: 176.26,
+        191: 177.25,
+        192: 178.24,
+        193: 179.23,
+        194: 180.22,
+        195: 181.21,
+        196: 182.2,
+        197: 183.19,
+        198: 184.18,
+        199: 185.17,
+        200: 186.16,
+    }
+class GsmCapacity:
+    final_results = None
+    operational_neighbours_df = None
+    final_comment_mapping = {
+        "Availability and TX issues": "Operational issues with no congestion",
+        "Availability issues": "Operational issues with no congestion",
+        "TX issues": "Operational issues with no congestion",
+        "Operational is OK": "Operational is OK with no congestion",
+        "Tch utilization exceeded threshold, Availability and TX issues": "High utilization with Operational issues",
+        "Tch utilization exceeded threshold, Availability issues": "High utilization with Operational issues",
+        "Tch utilization exceeded threshold, TX issues": "High utilization with Operational issues",
+        "Tch utilization exceeded threshold, SDCCH blocking exceeded threshold, Operational is OK": "High Utilization with Congestion without Operational issues",
+        "Tch utilization exceeded threshold, TCH blocking exceeded threshold, Operational is OK": "High Utilization with Congestion without Operational issues",
+        "Tch utilization exceeded threshold, TCH blocking exceeded threshold, SDCCH blocking exceeded threshold, Operational is OK": "High Utilization with Congestion without Operational issues",
+        "Tch utilization exceeded threshold, TCH blocking exceeded threshold, SDCCH blocking exceeded threshold, TX issues": "High Utilization with Congestion without Operational issues",
+        "Tch utilization exceeded threshold, SDCCH blocking exceeded threshold, Availability and TX issues": "High utilization with Congestion and operational issues",
+        "Tch utilization exceeded threshold, SDCCH blocking exceeded threshold, TX issues": "High utilization with Congestion and operational issues",
+        "Tch utilization exceeded threshold, TCH blocking exceeded threshold, Availability and TX issues": "High utilization with Congestion and operational issues",
+        "Tch utilization exceeded threshold, TCH blocking exceeded threshold, Availability issues": "High utilization with Congestion and operational issues",
+        "Tch utilization exceeded threshold, TCH blocking exceeded threshold, SDCCH blocking exceeded threshold, Availability and TX issues": "High utilization with Congestion and operational issues",
+        "Tch utilization exceeded threshold, TCH blocking exceeded threshold, SDCCH blocking exceeded threshold, Availability issues": "High utilization with Congestion and operational issues",
+        "Tch utilization exceeded threshold, TCH blocking exceeded threshold, TX issues": "High utilization with Congestion and operational issues",
+        "Down Site": "Down Cell",
+        "SDCCH blocking exceeded threshold, Operational is OK": "Congestion without Operational issues",
+        "TCH blocking exceeded threshold, Operational is OK": "Congestion without Operational issues",
+        "TCH blocking exceeded threshold, SDCCH blocking exceeded threshold, Operational is OK": "Congestion without Operational issues",
+        "Tch utilization exceeded threshold, Operational is OK": "High utilization without Congestion and Operational issues",
+        "SDCCH blocking exceeded threshold, Availability and TX issues": "Congestion with Operational issues",
+        "SDCCH blocking exceeded threshold, Availability issues": "Congestion with Operational issues",
+        "SDCCH blocking exceeded threshold, TX issues": "Congestion with Operational issues",
+        "TCH blocking exceeded threshold, Availability and TX issues": "Congestion with Operational issues",
+        "TCH blocking exceeded threshold, Availability issues": "Congestion with Operational issues",
+        "TCH blocking exceeded threshold, SDCCH blocking exceeded threshold, Availability and TX issues": "Congestion with Operational issues",
+        "TCH blocking exceeded threshold, SDCCH blocking exceeded threshold, Availability issues": "Congestion with Operational issues",
+        "TCH blocking exceeded threshold, SDCCH blocking exceeded threshold, TX issues": "Congestion with Operational issues",
+        "TCH blocking exceeded threshold, TX issues": "Congestion with Operational issues",
+    }
+def combine_comments(df: pd.DataFrame, *columns: str, new_column: str) -> pd.DataFrame:
+    """
+    Combine comments from multiple columns into one column.
+    Args:
+        df: DataFrame containing comment columns
+        *columns: Variable number of column names containing comments
+        new_column: Name for the new combined comments column
+    Returns:
+        DataFrame with a new column containing combined comments
+    """
+    result_df = df.copy()
+    result_df[new_column] = result_df[list(columns)].apply(
+        lambda row: ", ".join([str(x) for x in row if x]), axis=1
+    )
+    # Trim all trailing commas
+    result_df[new_column] = result_df[new_column].str.replace(
+        r"^[,\s]+|[,\s]+$", "", regex=True
+    )
+    # Replace multiple commas with a single comma
+    result_df[new_column] = result_df[new_column].str.replace(
+        r",\s*,", ", ", regex=True
+    )
+    return result_df
+def summarize_fails_comments(comment):
+    if not comment or pd.isna(comment) or comment.strip() == "":
+        return ""
+    # Extract all `rrc_fail_xxx` fields
+    matches = re.findall(r"rrc_fail_([a-z_]+)", comment)
+    if not matches:
+        return ""
+    # Remove duplicates, sort alphabetically
+    unique_sorted = sorted(set(matches))
+    # Combine and add 'fails'
+    return ", ".join(unique_sorted) + " fails"
+def kpi_naming_cleaning(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Clean KPI column names by replacing special characters and standardizing format.
+    Args:
+        df: DataFrame with KPI column names to clean
+    Returns:
+        DataFrame with cleaned column names
+    """
+    name_df: pd.DataFrame = df.copy()
+    name_df.columns = name_df.columns.str.replace("[ /(),-.']", "_", regex=True)
+    name_df.columns = name_df.columns.str.replace("___", "_")
+    name_df.columns = name_df.columns.str.replace("__", "_")
+    name_df.columns = name_df.columns.str.replace("%", "perc")
+    name_df.columns = name_df.columns.str.rstrip("_")
+    return name_df
+def create_daily_date(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Create a daily date column from PERIOD_START_TIME and drop unnecessary columns.
+    Args:
+        df: DataFrame containing PERIOD_START_TIME column
+    Returns:
+        DataFrame with new date column and unnecessary columns removed
+    """
+    date_df: pd.DataFrame = df.copy()
+    date_df[["mois", "jour", "annee"]] = date_df["PERIOD_START_TIME"].str.split(
+        ".", expand=True
+    )
+    date_df["date"] = date_df["annee"] + "-" + date_df["mois"] + "-" + date_df["jour"]
+    # Remove unnecessary columns
+    date_df = date_df.drop(["annee", "mois", "jour", "PERIOD_START_TIME"], axis=1)
+    return date_df
+def create_hourly_date(df: pd.DataFrame) -> pd.DataFrame:
+    date_df: pd.DataFrame = df
+    date_df[["date_t", "hour"]] = date_df["PERIOD_START_TIME"].str.split(
+        " ", expand=True
+    )
+    date_df[["mois", "jour", "annee"]] = date_df["date_t"].str.split(".", expand=True)
+    date_df["datetime"] = (
+        date_df["annee"]
+        + "-"
+        + date_df["mois"]
+        + "-"
+        + date_df["jour"]
+        + " "
+        + date_df["hour"]
+    )
+    date_df["date"] = date_df["annee"] + "-" + date_df["mois"] + "-" + date_df["jour"]
+    # Remove columns 'année' and 'mois'
+    date_df = date_df.drop(
+        ["annee", "mois", "jour", "date_t", "PERIOD_START_TIME"], axis=1
+    )
+    return date_df
+def create_dfs_per_kpi(
+    df: pd.DataFrame = None,
+    pivot_date_column: str = "date",
+    pivot_name_column: str = "BTS_name",
+    kpi_columns_from: int = None,
+) -> pd.DataFrame:
+    """
+    Create pivoted DataFrames for each KPI and perform analysis.
+    Args:
+        df: DataFrame containing KPI data
+    Returns:
+        DataFrame with combined analysis results
+    """
+    kpi_columns = df.columns[kpi_columns_from:]
+    pivoted_kpi_dfs = {}
+    # Loop through each KPI and create pivoted DataFrames
+    for kpi in kpi_columns:
+        temp_df = df[[pivot_date_column, pivot_name_column, kpi]].copy()
+        # remove duplicates
+        temp_df = temp_df.drop_duplicates(
+            subset=[pivot_name_column, pivot_date_column], keep="first"
+        )
+        temp_df = temp_df.reset_index()
+        # Pivot the dataframe
+        pivot_df = temp_df.pivot(
+            index=pivot_name_column, columns=pivot_date_column, values=kpi
+        )
+        pivot_df.columns = pd.MultiIndex.from_product([[kpi], pivot_df.columns])
+        pivot_df.columns.names = ["KPI", "Date"]
+        # Store in dictionary with KPI name as key
+        pivoted_kpi_dfs[kpi] = pivot_df
+    return pivoted_kpi_dfs
+def cell_availability_analysis(
+    df: pd.DataFrame,
+    days: int = 7,
+    availability_threshold: int = 95,
+    analysis_type: str = "daily",
+) -> pd.DataFrame:
+    """
+    Analyze cell availability and categorize sites based on availability metrics.
+    Args:
+        df: DataFrame containing cell availability data
+        days: Number of days to analyze
+    Returns:
+        DataFrame with availability analysis and site status comments
+    """
+    result_df: pd.DataFrame = df.copy().fillna(0)
+    last_days_df: pd.DataFrame = result_df.iloc[:, -days:]
+    result_df[f"Average_cell_availability_{analysis_type.lower()}"] = last_days_df.mean(
+        axis=1
+    ).round(2)
+    # Count the number of days above threshold
+    result_df[
+        f"number_of_days_exceeding_availability_threshold_{analysis_type.lower()}"
+    ] = last_days_df.apply(
+        lambda row: sum(1 for x in row if x <= availability_threshold), axis=1
+    )
+    # Categorize sites based on availability
+    def categorize_availability(x: float) -> str:
+        if x == 0 or pd.isnull(x):
+            return "Down Site"
+        elif 0 < x <= 70:
+            return "critical instability"
+        elif 70 < x <= availability_threshold:
+            return "instability"
+        else:
+            return "Availability OK"
+    result_df[f"availability_comment_{analysis_type.lower()}"] = result_df[
+        f"Average_cell_availability_{analysis_type.lower()}"
+    ].apply(categorize_availability)
+    return result_df
+def analyze_tch_abis_fails(
+    df: pd.DataFrame,
+    number_of_kpi_days: int,
+    analysis_type: str,
+    number_of_threshold_days: int,
+    tch_abis_fails_threshold: int,
+) -> pd.DataFrame:
+    result_df: pd.DataFrame = df.copy()
+    last_days_df: pd.DataFrame = result_df.iloc[:, -number_of_kpi_days:]
+    # last_days_df = last_days_df.fillna(0)
+    result_df[f"avg_tch_abis_fail_{analysis_type.lower()}"] = last_days_df.mean(
+        axis=1
+    ).round(2)
+    result_df[f"max_tch_abis_fail_{analysis_type.lower()}"] = last_days_df.max(axis=1)
+    # Count the number of days above threshold
+    result_df[f"number_of_days_with_tch_abis_fail_exceeded_{analysis_type.lower()}"] = (
+        last_days_df.apply(
+            lambda row: sum(1 for x in row if x >= tch_abis_fails_threshold), axis=1
+        )
+    )
+    # Add the daily_tch_comment : if number_of_days_with_tch_abis_fail_exceeded_daily is >= number_of_threshold_days : tch abis fail exceeded threshold , else : None
+    result_df[f"tch_abis_fail_{analysis_type.lower()}_comment"] = np.where(
+        result_df[f"number_of_days_with_tch_abis_fail_exceeded_{analysis_type.lower()}"]
+        >= number_of_threshold_days,
+        "tch abis fail exceeded threshold",
+        None,
+    )
+    return result_df
+def analyze_tch_call_blocking(
+    df: pd.DataFrame,
+    number_of_kpi_days: int,
+    analysis_type: str,
+    number_of_threshold_days: int,
+    tch_blocking_threshold: int,
+) -> pd.DataFrame:
+    result_df = df.copy()
+    last_days_df: pd.DataFrame = result_df.iloc[:, -number_of_kpi_days:]
+    # last_days_df = last_days_df.fillna(0)
+    result_df[f"avg_tch_call_blocking_{analysis_type.lower()}"] = last_days_df.mean(
+        axis=1
+    ).round(2)
+    result_df[f"max_tch_call_blocking_{analysis_type.lower()}"] = last_days_df.max(
+        axis=1
+    )
+    # Count the number of days above threshold
+    result_df[f"number_of_days_with_tch_blocking_exceeded_{analysis_type.lower()}"] = (
+        last_days_df.apply(
+            lambda row: sum(1 for x in row if x >= tch_blocking_threshold), axis=1
+        )
+    )
+    # Add the daily_tch_comment : if number_of_days_with_tch_blocking_exceeded_daily is >= number_of_threshold_days : tch blocking exceeded threshold , else : None
+    result_df[f"tch_call_blocking_{analysis_type.lower()}_comment"] = np.where(
+        result_df[f"number_of_days_with_tch_blocking_exceeded_{analysis_type.lower()}"]
+        >= number_of_threshold_days,
+        "TCH blocking exceeded threshold",
+        None,
+    )
+    return result_df
+def analyze_sdcch_call_blocking(
+    df: pd.DataFrame,
+    number_of_kpi_days: int,
+    sdcch_blocking_threshold: int,
+    analysis_type: str,
+    number_of_threshold_days: int,
+) -> pd.DataFrame:
+    result_df = df.copy()
+    last_days_df: pd.DataFrame = result_df.iloc[:, -number_of_kpi_days:]
+    # last_days_df = last_days_df.fillna(0)
+    result_df[f"avg_sdcch_real_blocking_{analysis_type.lower()}"] = last_days_df.mean(
+        axis=1
+    ).round(2)
+    result_df[f"max_sdcch_real_blocking_{analysis_type.lower()}"] = last_days_df.max(
+        axis=1
+    )
+    # Count the number of days above threshold
+    result_df[
+        f"number_of_days_with_sdcch_blocking_exceeded_{analysis_type.lower()}"
+    ] = last_days_df.apply(
+        lambda row: sum(1 for x in row if x >= sdcch_blocking_threshold), axis=1
+    )
+    # add daily_sdcch_comment : if number_of_days_with_sdcch_blocking_exceeded_daily is >= number_of_threshold_days : sdcch blocking exceeded threshold , else : None
+    result_df[f"sdcch_real_blocking_{analysis_type.lower()}_comment"] = np.where(
+        result_df[
+            f"number_of_days_with_sdcch_blocking_exceeded_{analysis_type.lower()}"
+        ]
+        >= number_of_threshold_days,
+        "SDCCH blocking exceeded threshold",
+        None,
+    )
+    return result_df
+class LteCapacity:
+    final_results = None
+    # Next band mapping
+    next_band_mapping = {
+        "L1800": "L800",
+        "L800": "L1800",
+        "L1800/L800": "L2600",
+        "L1800/L2300/L800": "L2600",
+        "L2300/L800": "L2600",
+        "L1800/L2600/L800": "New site/Dual Beam",
+        "L1800/L2300/L2600/L800": "New site/Dual Beam",
+        "L2300": "FDD H// colocated site",
+    }
+def analyze_prb_usage(
+    df: pd.DataFrame,
+    number_of_kpi_days: int,
+    prb_usage_threshold: int,
+    analysis_type: str,
+    number_of_threshold_days: int,
+    suffix: str = "",
+) -> pd.DataFrame:
+    result_df = df.copy()
+    last_days_df: pd.DataFrame = result_df.iloc[:, -number_of_kpi_days:]
+    # last_days_df = last_days_df.fillna(0)
+    result_df[f"avg_prb_usage_{analysis_type.lower()}{suffix}"] = last_days_df.mean(
+        axis=1
+    ).round(2)
+    result_df[f"max_prb_usage_{analysis_type.lower()}{suffix}"] = last_days_df.max(
+        axis=1
+    )
+    # Count the number of days above threshold
+    result_df[
+        f"number_of_days_with_prb_usage_exceeded_{analysis_type.lower()}{suffix}"
+    ] = last_days_df.apply(
+        lambda row: sum(1 for x in row if x >= prb_usage_threshold), axis=1
+    )
+    # Add the daily_prb_comment : if number_of_days_with_prb_usage_exceeded_daily is >= number_of_threshold_days : prb usage exceeded threshold , else : None
+    result_df[f"prb_usage_{analysis_type.lower()}{suffix}_comment"] = np.where(
+        result_df[
+            f"number_of_days_with_prb_usage_exceeded_{analysis_type.lower()}{suffix}"
+        ]
+        >= number_of_threshold_days,
+        "PRB usage exceeded threshold",
+        None,
+    )
+    return result_df
+def analyze_fails_kpi(
+    df: pd.DataFrame,
+    number_of_kpi_days: int,
+    number_of_threshold_days: int,
+    kpi_threshold: int,
+    kpi_column_name: str,
+) -> pd.DataFrame:
+    result_df: pd.DataFrame = df.copy()
+    last_days_df: pd.DataFrame = result_df.iloc[:, -number_of_kpi_days:]
+    # last_days_df = last_days_df.fillna(0)
+    result_df[f"avg_{kpi_column_name}"] = last_days_df.mean(axis=1).round(2)
+    result_df[f"max_{kpi_column_name}"] = last_days_df.max(axis=1)
+    # Count the number of days above threshold
+    result_df[f"number_of_days_with_{kpi_column_name}_exceeded"] = last_days_df.apply(
+        lambda row: sum(1 for x in row if x >= kpi_threshold), axis=1
+    )
+    # Add the {kpi_column_name}_comment : if number_of_days_with_{kpi_column_name}_exceeded_daily is >= number_of_threshold_days : {kpi_column_name} exceeded threshold , else : None
+    result_df[f"{kpi_column_name}_comment"] = np.where(
+        result_df[f"number_of_days_with_{kpi_column_name}_exceeded"]
+        >= number_of_threshold_days,
+        f"{kpi_column_name} exceeded threshold",
+        None,
+    )
+    return result_df
+def analyze_lcg_utilization(
+    df: pd.DataFrame,
+    number_of_kpi_days: int,
+    number_of_threshold_days: int,
+    kpi_threshold: int,
+    kpi_column_name: str,
+) -> pd.DataFrame:
+    result_df: pd.DataFrame = df.copy()
+    last_days_df: pd.DataFrame = result_df.iloc[:, -number_of_kpi_days:]
+    # last_days_df = last_days_df.fillna(0)
+    result_df[f"avg_{kpi_column_name}"] = last_days_df.mean(axis=1).round(2)
+    result_df[f"max_{kpi_column_name}"] = last_days_df.max(axis=1)
+    # Count the number of days above threshold
+    result_df[f"number_of_days_with_{kpi_column_name}_exceeded"] = last_days_df.apply(
+        lambda row: sum(1 for x in row if x >= kpi_threshold), axis=1
+    )
+    # Add the {kpi_column_name}_comment : if number_of_days_with_{kpi_column_name}_exceeded_daily is >= number_of_threshold_days : {kpi_column_name} exceeded threshold , else : None
+    result_df[f"{kpi_column_name}_comment"] = np.where(
+        result_df[f"number_of_days_with_{kpi_column_name}_exceeded"]
+        >= number_of_threshold_days,
+        f"{kpi_column_name} exceeded threshold",
+        None,
+    )
+    return result_df

utils/rnc_bsc_lac_count_chart.py ADDED Viewed

	@@ -0,0 +1,89 @@

+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+# Reusable function to create subplots
+def create_lac_count_per_controller_subplots(
+    df: pd.DataFrame,
+    controller_column: str,
+    lac_column: str,
+    count_column: str,
+    fig_title: str,
+):
+    # Get unique controller_IDs
+    unique_controllers = df[controller_column].unique()
+    # Calculate the number of rows needed (4 subplots per row)
+    rows_needed = (len(unique_controllers) + 3) // 4  # Round up to ensure enough rows
+    # Create subplot structure with a dynamic number of rows and 4 columns per row
+    fig = make_subplots(
+        rows=rows_needed,
+        cols=4,
+        shared_xaxes=False,
+        subplot_titles=unique_controllers,
+    )
+    # Add a counter for positioning the subplots
+    subplot_position = 1
+    # Iterate over each controller_ID
+    for controller in unique_controllers:
+        # Filter data for each controller_ID (create a small dataframe per controller_ID)
+        controller_data = df[df[controller_column] == controller]
+        # Determine the row and column for the current subplot
+        row = (subplot_position - 1) // 4 + 1
+        col = (subplot_position - 1) % 4 + 1
+        # Add bar chart to the subplot
+        fig.add_trace(
+            go.Bar(
+                x=controller_data[lac_column],
+                y=controller_data[count_column],
+                name=controller,
+                text=controller_data[count_column],
+            ),
+            row=row,
+            col=col,
+        )
+        # Move to the next subplot position
+        subplot_position += 1
+    # Update layout to make it more readable and fit all subplots
+    fig.update_layout(
+        height=300 * rows_needed,
+        title_text=fig_title,
+        showlegend=False,
+    )
+    # Show the plot
+    # fig.show()
+    return fig
+def create_bar_chart(df: pd.DataFrame, title: str = "Chart Title") -> px.bar:
+    """
+    Create a bar chart using Plotly Express with the first column as x and the second column as y.
+    Args:
+        df (pd.DataFrame): Input DataFrame
+    Returns:
+        fig (px.bar): Bar chart figure
+    """
+    fig = px.bar(
+        df,
+        x=df.columns[0],
+        y=df.columns[1],
+        text_auto=True,
+        title=title,
+        height=300,
+        width=600,
+    )
+    fig.update_xaxes(tickvals=df[df.columns[0]].unique())
+    return fig

utils/utils_functions.py ADDED Viewed

	@@ -0,0 +1,126 @@

+import warnings
+import pandas as pd
+from geopy.distance import geodesic
+# Function to calculate distances while preserving all original columns
+# def calculate_distances(
+#     df1: pd.DataFrame,
+#     df2: pd.DataFrame,
+#     code_col1,
+#     lat_col1,
+#     long_col1,
+#     code_col2,
+#     lat_col2,
+#     long_col2,
+#     min_distance: int = 1,
+# ):
+#     distances = []
+#     for _, row1 in df1.iterrows():
+#         for _, row2 in df2.iterrows():
+#             coord1 = (row1[lat_col1], row1[long_col1])
+#             coord2 = (row2[lat_col2], row2[long_col2])
+#             distance_km = geodesic(coord1, coord2).kilometers  # Compute distance
+#             # Combine all original columns + distance
+#             combined_row = {
+#                 **row1.to_dict(),  # Keep all columns from Dataset1
+#                 **{
+#                     f"{col}_Dataset2": row2[col] for col in df2.columns
+#                 },  # Keep all columns from Dataset2
+#                 "Distance_km": distance_km,
+#             }
+#             distances.append(combined_row)
+#     df_distances = pd.DataFrame(distances)
+#     # Find the closest point for each Point1
+#     df_closest: pd.DataFrame = df_distances.loc[
+#         df_distances.groupby(code_col1)["Distance_km"].idxmin()
+#     ]
+#     # Find the distnce below min_distance
+#     df_closest_min_distance = df_distances[df_distances["Distance_km"] < min_distance]
+#     return df_distances, df_closest, df_closest_min_distance
+def calculate_distances(
+    df1: pd.DataFrame,
+    df2: pd.DataFrame,
+    code_col1: str,
+    lat_col1: str,
+    long_col1: str,
+    code_col2: str,
+    lat_col2: str,
+    long_col2: str,
+    min_distance: float = 1.0,
+) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
+    """
+    Calculate distances between points in two datasets and find closest matches.
+    Args:
+        df1: First DataFrame containing reference points
+        df2: Second DataFrame containing points to compare
+        code_col1: Column name in df1 containing point identifiers
+        lat_col1: Column name in df1 containing latitude
+        long_col1: Column name in df1 containing longitude
+        code_col2: Column name in df2 containing point identifiers
+        lat_col2: Column name in df2 containing latitude
+        long_col2: Column name in df2 containing longitude
+        min_distance: Minimum distance threshold in kilometers
+    Returns:
+        tuple: (all_distances, closest_matches, matches_below_threshold)
+    """
+    # Validate input columns
+    required_cols_1 = {code_col1, lat_col1, long_col1}
+    required_cols_2 = {code_col2, lat_col2, long_col2}
+    if not required_cols_1.issubset(df1.columns):
+        raise ValueError(
+            f"df1 is missing required columns: {required_cols_1 - set(df1.columns)}"
+        )
+    if not required_cols_2.issubset(df2.columns):
+        raise ValueError(
+            f"df2 is missing required columns: {required_cols_2 - set(df2.columns)}"
+        )
+    # Convert to list of tuples for vectorized operations
+    coords1 = df1[[lat_col1, long_col1]].apply(tuple, axis=1).tolist()
+    coords2 = df2[[lat_col2, long_col2]].apply(tuple, axis=1).tolist()
+    # Calculate all pairwise distances
+    distances = []
+    for i, coord1 in enumerate(coords1):
+        for j, coord2 in enumerate(coords2):
+            try:
+                distance_km = geodesic(coord1, coord2).kilometers
+                distances.append(
+                    {
+                        **df1.iloc[i].to_dict(),
+                        **{f"{col}_Dataset2": df2.iloc[j][col] for col in df2.columns},
+                        "Distance_km": distance_km,
+                    }
+                )
+            except ValueError as e:
+                warnings.warn(
+                    f"Skipping invalid coordinates: {coord1} or {coord2}: {e}"
+                )
+                continue
+    if not distances:
+        raise ValueError("No valid coordinate pairs were processed")
+    df_distances = pd.DataFrame(distances)
+    # Find closest matches
+    df_closest = df_distances.loc[
+        df_distances.groupby(code_col1)["Distance_km"].idxmin()
+    ]
+    # Filter by minimum distance
+    df_closest_min_distance = df_distances[df_distances["Distance_km"] < min_distance]
+    return df_distances, df_closest, df_closest_min_distance

utils/utils_vars.py ADDED Viewed

	@@ -0,0 +1,243 @@

+import numpy as np
+import pandas as pd
+# url = "https://raw.githubusercontent.com/DavMelchi/STORAGE/refs/heads/main/physical_db/physical_database.csv"
+url = r"./physical_db/physical_database.csv"
+def get_physical_db():
+    """
+    Reads the physical_database.csv file from the physical_db directory and
+    returns a pandas DataFrame containing only the columns 'Code_Sector',
+    'Azimut', 'Longitude', 'Latitude', and 'Hauteur'.
+    Returns:
+        pd.DataFrame: A DataFrame containing the filtered columns.
+    """
+    physical = pd.read_csv(url)
+    physical = physical[
+        [
+            "Code_Sector",
+            "Azimut",
+            "Longitude",
+            "Latitude",
+            "Hauteur",
+            "City",
+            "Adresse",
+            "Commune",
+            "Cercle",
+        ]
+    ]
+    return physical
+class UtilsVars:
+    sector_mapping = {
+        4: 1,
+        5: 2,
+        6: 3,
+        11: 1,
+        12: 2,
+        13: 3,
+        71: 1,
+        72: 2,
+        73: 3,
+        81: 1,
+        82: 2,
+        83: 3,
+    }
+    type_cellule = {1: "Macro Cell 1800", 0: "Macro Cell 900"}
+    oml_band_frequence = {1: "OML BAND GSM 1800", 0: "OML BAND GSM 900"}
+    gsm_band = {1: "G1800", 0: "G900"}
+    configuration_schema = {1: "EGPRS 1800", 0: "EGPRS 900"}
+    channeltype_mapping = {4: "BCCH", 3: "TRX_TCH"}
+    oml_lte_freq_band = {
+        "L1800": "OML E-UTRA Band 3 - 20MHz",
+        "L800": "OML E-UTRA Band 20 - 20MHz",
+        "L2300": "OML E-UTRA Band 43 - 20MHz",
+        "L2600": "OML E-UTRA Band 7 - 20MHz",
+        "L700": "OML E-UTRA Band 28 - 20MHz",
+    }
+    porteuse_mapping = {
+        3004: "OML UTRA Band VIII",
+        3006: "OML UTRA Band VIII",
+        10812: "OML UTRA Band I",
+        10787: "OML UTRA Band I",
+        10837: "OML UTRA Band I",
+    }
+    color_mapping = {
+        "U900": "7fff0000",
+        "U2100": "7f00ff00",
+        "G900": "7fff0000",
+        "G1800": "7f00ff00",
+        "L800": "7fff0000",
+        "L1800": "7f00ff00",
+        "L2300": "7f00ffff",
+        "L2600": "7f0000ff",
+        "L700": "7fff00ff",
+    }
+    size_mapping = {
+        "U900": 100,
+        "U2100": 120,
+        "G900": 100,
+        "G1800": 120,
+        "L800": 120,
+        "L1800": 140,
+        "L2300": 100,
+        "L2600": 90,
+        "L700": 80,
+    }
+    lte_band = {
+        1786: "L1800",
+        6350: "L800",
+        3050: "L2600",
+        38750: "L2300",
+        1761: "L1800",
+        9260: "L700",
+    }
+    wcdma_band = {
+        3004: "U900",
+        3006: "U900",
+        10787: "U2100",
+        10837: "U2100",
+        10812: "U2100",
+    }
+    bsc_name = {
+        403698: "MBSCTST",
+        403699: "MBSC01",
+        403701: "MBSC04",
+        403702: "MBSC03",
+        403703: "MBSC02",
+        406283: "MBSKTL01",
+        406284: "MBSSEG01",
+        406308: "MBSSK0S1",
+        406309: "ASBSCMSC3",
+    }
+    final_lte_database = ""
+    final_gsm_database = ""
+    final_wcdma_database = ""
+    final_trx_database = ""
+    final_mrbts_database = ""
+    final_invunit_database = ""
+    final_mal_database = ""
+    gsm_dfs = []
+    wcdma_dfs = []
+    lte_dfs = []
+    all_db_dfs = []
+    all_db_dfs_names = []
+    final_all_database = None
+    atoll_dfs = []
+    final_atoll_database = None
+    final_nice_database = None
+    neighbors_database = ""
+    file_path = ""
+    gsm_kml_file = None
+    wcdma_kml_file = None
+    lte_kml_file = None
+    adjl_database = None
+    # physisal_db = get_physical_db()
+def get_band(text):
+    """
+    Extract the band from the given string.
+    Parameters
+    ----------
+    text : str
+        The string to extract the band from.
+    Returns
+    -------
+    str or np.nan
+        The extracted band, or NaN if the text was not a string or did not contain
+        any of the recognized bands (L1800, L2300, L800).
+    """
+    if isinstance(text, str):  # Check if text is a string
+        if "L1800" in text:
+            return "L1800"
+        elif "L2300" in text:
+            return "L2300"
+        elif "L800" in text:
+            return "L800"
+        elif "L2600" in text:
+            return "L2600"
+        elif "L700" in text:
+            return "L700"
+    return np.nan  # or return None
+def clean_bands(bands):
+    if pd.isna(bands):
+        return None
+    parts = [p for p in bands.split("/") if p != "nan"]
+    return "/".join(parts) if parts else None
+class GsmAnalysisData:
+    total_number_of_bsc = 0
+    total_number_of_cell = 0
+    number_of_site = 0
+    number_of_cell_per_bsc = pd.DataFrame()
+    number_of_site_per_bsc = pd.DataFrame()
+    number_of_bts_name_empty = 0
+    number_of_bcf_name_empty = 0
+    number_of_bcch_empty = 0
+    bts_administate_distribution = pd.DataFrame()
+    trx_administate_distribution = pd.DataFrame()
+    number_of_trx_per_bsc = pd.DataFrame()
+    number_of_cell_per_lac = pd.DataFrame()
+    number_of_site_per_lac = pd.DataFrame()
+    trx_frequency_distribution = pd.DataFrame()
+class WcdmaAnalysisData:
+    total_number_of_rnc = 0
+    total_number_of_wcel = 0
+    number_of_site = 0
+    number_of_site_per_rnc = 0
+    number_of_cell_per_rnc = pd.DataFrame()
+    number_of_empty_wbts_name = 0
+    number_of_empty_wcel_name = 0
+    wcel_administate_distribution = pd.DataFrame()
+    psc_distribution = pd.DataFrame()
+    number_of_cell_per_lac = pd.DataFrame()
+    number_of_site_per_lac = pd.DataFrame()
+class LteFddAnalysisData:
+    total_number_of_lncel = 0
+    total_number_of_site = 0
+    number_of_empty_lncel_name = 0
+    number_of_empty_lncel_cellname = 0
+    number_of_empty_lnbts_name = 0
+    number_of_cell_per_band = pd.DataFrame()
+    phycellid_distribution = pd.DataFrame()
+    rootsequenceindex_distribution = pd.DataFrame()
+    lncel_administate_distribution = pd.DataFrame()
+    number_of_cell_per_tac = pd.DataFrame()
+class LteTddAnalysisData:
+    total_number_of_lncel = 0
+    total_number_of_site = 0
+    number_of_empty_lncel_name = 0
+    number_of_empty_lncel_cellname = 0
+    number_of_empty_lnbts_name = 0
+    number_of_cell_per_band = pd.DataFrame()
+    phycellid_distribution = pd.DataFrame()
+    rootsequenceindex_distribution = pd.DataFrame()
+    lncel_administate_distribution = pd.DataFrame()
+    number_of_cell_per_tac = pd.DataFrame()
+class SiteAnalysisData:
+    total_number_of_site = 0
+    total_munber_of_gsm_site = 0
+    total_number_of_wcdma_site = 0
+    total_number_of_lte_site = 0
+    gsm_bands_distribution = pd.DataFrame()
+    wcdma_bands_distribution = pd.DataFrame()
+    lte_bands_distribution = pd.DataFrame()
+    all_bands_distribution = pd.DataFrame()
+    number_of_trx_per_site_distribution = pd.DataFrame()